perf: streamline douyin browser sync handling
This commit is contained in:
@@ -12,6 +12,7 @@ from urllib.parse import quote, unquote
|
||||
import httpx
|
||||
from fastapi import Depends, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
from starlette.concurrency import run_in_threadpool
|
||||
|
||||
DEFAULT_CREATOR_CENTER_URLS = [
|
||||
"https://creator.douyin.com/creator-micro/home",
|
||||
@@ -37,6 +38,7 @@ class DouyinAccountSyncRequest(BaseModel):
|
||||
session_cookie: str = ""
|
||||
creator_center_urls: list[str] = Field(default_factory=lambda: list(DEFAULT_CREATOR_CENTER_URLS))
|
||||
allow_creator_center_profile_fallback: bool = False
|
||||
compact_response: bool = False
|
||||
manual_profile_payload: dict[str, Any] | None = None
|
||||
manual_creator_pages: list[ManualPageCapture] = Field(default_factory=list)
|
||||
manual_work_payloads: list[dict[str, Any]] = Field(default_factory=list)
|
||||
@@ -1051,11 +1053,12 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
|
||||
snapshot_type: str,
|
||||
source_url: str,
|
||||
payload: Any,
|
||||
summary: dict[str, Any]
|
||||
summary: dict[str, Any],
|
||||
index_fields: bool = True
|
||||
) -> str:
|
||||
snapshot_id = make_id("dysnap")
|
||||
collected_at = now()
|
||||
fields = _flatten_json(payload)
|
||||
fields = _flatten_json(payload) if index_fields else []
|
||||
legacy.db.execute(
|
||||
"""
|
||||
INSERT INTO douyin_account_snapshots (
|
||||
@@ -1107,7 +1110,8 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
|
||||
"video_count": len(public_data["videos"]),
|
||||
"nickname": public_data["profile"].get("nickname", ""),
|
||||
"tags": public_data["profile"].get("tags", [])
|
||||
}
|
||||
},
|
||||
index_fields=not sync_request.compact_response
|
||||
)
|
||||
|
||||
for page in creator_data["pages"]:
|
||||
@@ -1122,8 +1126,9 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
|
||||
payload,
|
||||
{
|
||||
"blob_count": len(page["blobs"]),
|
||||
"field_count": len(_flatten_json(payload))
|
||||
}
|
||||
"field_count": 0 if sync_request.compact_response else len(_flatten_json(payload))
|
||||
},
|
||||
index_fields=not sync_request.compact_response
|
||||
)
|
||||
|
||||
for manual_video in sync_request.manual_work_payloads:
|
||||
@@ -1221,6 +1226,69 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
|
||||
})
|
||||
return payloads
|
||||
|
||||
def _finalize_sync_workspace(
|
||||
owner: dict[str, Any],
|
||||
request: DouyinAccountSyncRequest,
|
||||
public_data: dict[str, Any],
|
||||
creator_data: dict[str, Any]
|
||||
) -> dict[str, Any]:
|
||||
creator_payloads = _extract_creator_payloads(creator_data)
|
||||
if creator_payloads:
|
||||
creator_profile = _pick_best_profile(
|
||||
[candidate for payload in creator_payloads for candidate in _extract_profile_candidates(payload)]
|
||||
)
|
||||
creator_videos = _extract_videos(creator_payloads)
|
||||
creator_identity_match = _profiles_appear_same(public_data["profile"], creator_profile)
|
||||
should_merge_creator = creator_identity_match or request.allow_creator_center_profile_fallback
|
||||
if should_merge_creator:
|
||||
if creator_profile.get("nickname"):
|
||||
public_data["profile"] = _merge_profile_payload(public_data["profile"], creator_profile)
|
||||
if not public_data["source_url"]:
|
||||
public_data["source_url"] = creator_profile.get("canonical_profile_url") or request.profile_url
|
||||
if request.allow_creator_center_profile_fallback and not creator_identity_match:
|
||||
public_data["errors"].append("creator_center_profile_fallback_used")
|
||||
elif public_data["profile"].get("nickname") != creator_profile.get("nickname"):
|
||||
public_data["errors"].append("creator_center_profile_merge_partial")
|
||||
public_data["videos"].extend(creator_videos)
|
||||
elif creator_profile.get("nickname") or creator_videos:
|
||||
public_data["errors"].append("creator_center_identity_mismatch_skipped")
|
||||
if not public_data["profile"].get("nickname") and not public_data["videos"]:
|
||||
message = "No Douyin profile or creator-center data could be extracted"
|
||||
if "creator_center_identity_mismatch_skipped" in public_data["errors"]:
|
||||
message = "Creator-center capture belongs to a different logged-in Douyin account; automatic merge was skipped"
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"message": message,
|
||||
"profile_url": request.profile_url,
|
||||
"resolved_profile_url": public_data["source_url"],
|
||||
"public_blob_count": len(public_data["raw_pages"]),
|
||||
"public_video_count": len(public_data["videos"]),
|
||||
"public_errors": public_data["errors"],
|
||||
"creator_page_count": len(creator_data["pages"]),
|
||||
"creator_errors": creator_data["errors"]
|
||||
}
|
||||
)
|
||||
account_row = _upsert_account(owner, public_data["profile"], request, public_data, creator_data)
|
||||
sync_errors = public_data["errors"] + creator_data["errors"]
|
||||
if request.compact_response:
|
||||
return {
|
||||
"account": {
|
||||
"id": account_row["id"],
|
||||
"nickname": account_row["nickname"],
|
||||
"profile_url": account_row["profile_url"],
|
||||
"douyin_id": account_row["douyin_id"],
|
||||
"sec_uid": account_row["sec_uid"],
|
||||
"sync_status": account_row["sync_status"]
|
||||
},
|
||||
"sync_errors": sync_errors,
|
||||
"public_video_count": len(public_data["videos"]),
|
||||
"creator_page_count": len(creator_data["pages"])
|
||||
}
|
||||
workspace = _build_workspace_payload(account_row)
|
||||
workspace["sync_errors"] = sync_errors
|
||||
return workspace
|
||||
|
||||
def _build_account_payload(account_row: dict[str, Any], include_recent_videos: int = 8) -> dict[str, Any]:
|
||||
videos = _list_videos(account_row["id"], limit=max(include_recent_videos, 12))
|
||||
tags = _safe_json_loads(account_row["tags_json"], [])
|
||||
@@ -1881,47 +1949,13 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
|
||||
request.session_cookie,
|
||||
request.manual_creator_pages
|
||||
)
|
||||
creator_payloads = _extract_creator_payloads(creator_data)
|
||||
if creator_payloads:
|
||||
creator_profile = _pick_best_profile(
|
||||
[candidate for payload in creator_payloads for candidate in _extract_profile_candidates(payload)]
|
||||
)
|
||||
creator_videos = _extract_videos(creator_payloads)
|
||||
creator_identity_match = _profiles_appear_same(public_data["profile"], creator_profile)
|
||||
should_merge_creator = creator_identity_match or request.allow_creator_center_profile_fallback
|
||||
if should_merge_creator:
|
||||
if creator_profile.get("nickname"):
|
||||
public_data["profile"] = _merge_profile_payload(public_data["profile"], creator_profile)
|
||||
if not public_data["source_url"]:
|
||||
public_data["source_url"] = creator_profile.get("canonical_profile_url") or request.profile_url
|
||||
if request.allow_creator_center_profile_fallback and not creator_identity_match:
|
||||
public_data["errors"].append("creator_center_profile_fallback_used")
|
||||
elif public_data["profile"].get("nickname") != creator_profile.get("nickname"):
|
||||
public_data["errors"].append("creator_center_profile_merge_partial")
|
||||
public_data["videos"].extend(creator_videos)
|
||||
elif creator_profile.get("nickname") or creator_videos:
|
||||
public_data["errors"].append("creator_center_identity_mismatch_skipped")
|
||||
if not public_data["profile"].get("nickname") and not public_data["videos"]:
|
||||
message = "No Douyin profile or creator-center data could be extracted"
|
||||
if "creator_center_identity_mismatch_skipped" in public_data["errors"]:
|
||||
message = "Creator-center capture belongs to a different logged-in Douyin account; automatic merge was skipped"
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"message": message,
|
||||
"profile_url": request.profile_url,
|
||||
"resolved_profile_url": public_data["source_url"],
|
||||
"public_blob_count": len(public_data["raw_pages"]),
|
||||
"public_video_count": len(public_data["videos"]),
|
||||
"public_errors": public_data["errors"],
|
||||
"creator_page_count": len(creator_data["pages"]),
|
||||
"creator_errors": creator_data["errors"]
|
||||
}
|
||||
)
|
||||
account_row = _upsert_account(account, public_data["profile"], request, public_data, creator_data)
|
||||
workspace = _build_workspace_payload(account_row)
|
||||
workspace["sync_errors"] = public_data["errors"] + creator_data["errors"]
|
||||
return workspace
|
||||
return await run_in_threadpool(
|
||||
_finalize_sync_workspace,
|
||||
account,
|
||||
request,
|
||||
public_data,
|
||||
creator_data
|
||||
)
|
||||
|
||||
@app.get("/v2/douyin/accounts/{account_id}")
|
||||
def get_douyin_account(
|
||||
|
||||
@@ -743,6 +743,20 @@ async function main() {
|
||||
};
|
||||
await saveJsonSafe(path.join(runDir, "summary.json"), summary);
|
||||
|
||||
let storyforgeToken = options.storyforgeToken;
|
||||
if (options.syncEnabled && !storyforgeToken) {
|
||||
const auth = await loginStoryForge(
|
||||
options.backendUrl,
|
||||
options.storyforgeUsername,
|
||||
options.storyforgePassword
|
||||
);
|
||||
storyforgeToken = auth.token;
|
||||
await saveJson(path.join(runDir, "storyforge-login.json"), {
|
||||
account: auth.account,
|
||||
default_external_base_url: auth.default_external_base_url
|
||||
});
|
||||
}
|
||||
|
||||
const context = await chromium.launchPersistentContext(options.stateDir, {
|
||||
headless: options.headless,
|
||||
viewport: { width: 1440, height: 1024 },
|
||||
@@ -772,6 +786,7 @@ async function main() {
|
||||
const syncBody = {
|
||||
profile_url: options.profileUrl,
|
||||
allow_creator_center_profile_fallback: options.allowCreatorCenterFallback,
|
||||
compact_response: true,
|
||||
manual_profile_payload: profileBundle,
|
||||
manual_creator_pages: creatorPages,
|
||||
manual_work_payloads: videoPages,
|
||||
@@ -785,20 +800,7 @@ async function main() {
|
||||
summary.captured_creator_pages = creatorPages.length;
|
||||
|
||||
if (options.syncEnabled) {
|
||||
let token = options.storyforgeToken;
|
||||
if (!token) {
|
||||
const auth = await loginStoryForge(
|
||||
options.backendUrl,
|
||||
options.storyforgeUsername,
|
||||
options.storyforgePassword
|
||||
);
|
||||
token = auth.token;
|
||||
await saveJson(path.join(runDir, "storyforge-login.json"), {
|
||||
account: auth.account,
|
||||
default_external_base_url: auth.default_external_base_url
|
||||
});
|
||||
}
|
||||
const workspace = await syncCapture(options.backendUrl, token, syncRequestPath);
|
||||
const workspace = await syncCapture(options.backendUrl, storyforgeToken, syncRequestPath);
|
||||
summary.sync_result = {
|
||||
account_id: workspace.account?.id || "",
|
||||
nickname: workspace.account?.nickname || "",
|
||||
|
||||
Reference in New Issue
Block a user