perf: streamline douyin browser sync handling

This commit is contained in:
kris
2026-03-20 19:41:31 +08:00
parent 4356c46b9e
commit 5c52476a45
2 changed files with 96 additions and 60 deletions

View File

@@ -12,6 +12,7 @@ from urllib.parse import quote, unquote
import httpx
from fastapi import Depends, HTTPException
from pydantic import BaseModel, Field
from starlette.concurrency import run_in_threadpool
DEFAULT_CREATOR_CENTER_URLS = [
"https://creator.douyin.com/creator-micro/home",
@@ -37,6 +38,7 @@ class DouyinAccountSyncRequest(BaseModel):
session_cookie: str = ""
creator_center_urls: list[str] = Field(default_factory=lambda: list(DEFAULT_CREATOR_CENTER_URLS))
allow_creator_center_profile_fallback: bool = False
compact_response: bool = False
manual_profile_payload: dict[str, Any] | None = None
manual_creator_pages: list[ManualPageCapture] = Field(default_factory=list)
manual_work_payloads: list[dict[str, Any]] = Field(default_factory=list)
@@ -1051,11 +1053,12 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
snapshot_type: str,
source_url: str,
payload: Any,
summary: dict[str, Any]
summary: dict[str, Any],
index_fields: bool = True
) -> str:
snapshot_id = make_id("dysnap")
collected_at = now()
fields = _flatten_json(payload)
fields = _flatten_json(payload) if index_fields else []
legacy.db.execute(
"""
INSERT INTO douyin_account_snapshots (
@@ -1107,7 +1110,8 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
"video_count": len(public_data["videos"]),
"nickname": public_data["profile"].get("nickname", ""),
"tags": public_data["profile"].get("tags", [])
}
},
index_fields=not sync_request.compact_response
)
for page in creator_data["pages"]:
@@ -1122,8 +1126,9 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
payload,
{
"blob_count": len(page["blobs"]),
"field_count": len(_flatten_json(payload))
}
"field_count": 0 if sync_request.compact_response else len(_flatten_json(payload))
},
index_fields=not sync_request.compact_response
)
for manual_video in sync_request.manual_work_payloads:
@@ -1221,6 +1226,69 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
})
return payloads
def _finalize_sync_workspace(
owner: dict[str, Any],
request: DouyinAccountSyncRequest,
public_data: dict[str, Any],
creator_data: dict[str, Any]
) -> dict[str, Any]:
creator_payloads = _extract_creator_payloads(creator_data)
if creator_payloads:
creator_profile = _pick_best_profile(
[candidate for payload in creator_payloads for candidate in _extract_profile_candidates(payload)]
)
creator_videos = _extract_videos(creator_payloads)
creator_identity_match = _profiles_appear_same(public_data["profile"], creator_profile)
should_merge_creator = creator_identity_match or request.allow_creator_center_profile_fallback
if should_merge_creator:
if creator_profile.get("nickname"):
public_data["profile"] = _merge_profile_payload(public_data["profile"], creator_profile)
if not public_data["source_url"]:
public_data["source_url"] = creator_profile.get("canonical_profile_url") or request.profile_url
if request.allow_creator_center_profile_fallback and not creator_identity_match:
public_data["errors"].append("creator_center_profile_fallback_used")
elif public_data["profile"].get("nickname") != creator_profile.get("nickname"):
public_data["errors"].append("creator_center_profile_merge_partial")
public_data["videos"].extend(creator_videos)
elif creator_profile.get("nickname") or creator_videos:
public_data["errors"].append("creator_center_identity_mismatch_skipped")
if not public_data["profile"].get("nickname") and not public_data["videos"]:
message = "No Douyin profile or creator-center data could be extracted"
if "creator_center_identity_mismatch_skipped" in public_data["errors"]:
message = "Creator-center capture belongs to a different logged-in Douyin account; automatic merge was skipped"
raise HTTPException(
status_code=400,
detail={
"message": message,
"profile_url": request.profile_url,
"resolved_profile_url": public_data["source_url"],
"public_blob_count": len(public_data["raw_pages"]),
"public_video_count": len(public_data["videos"]),
"public_errors": public_data["errors"],
"creator_page_count": len(creator_data["pages"]),
"creator_errors": creator_data["errors"]
}
)
account_row = _upsert_account(owner, public_data["profile"], request, public_data, creator_data)
sync_errors = public_data["errors"] + creator_data["errors"]
if request.compact_response:
return {
"account": {
"id": account_row["id"],
"nickname": account_row["nickname"],
"profile_url": account_row["profile_url"],
"douyin_id": account_row["douyin_id"],
"sec_uid": account_row["sec_uid"],
"sync_status": account_row["sync_status"]
},
"sync_errors": sync_errors,
"public_video_count": len(public_data["videos"]),
"creator_page_count": len(creator_data["pages"])
}
workspace = _build_workspace_payload(account_row)
workspace["sync_errors"] = sync_errors
return workspace
def _build_account_payload(account_row: dict[str, Any], include_recent_videos: int = 8) -> dict[str, Any]:
videos = _list_videos(account_row["id"], limit=max(include_recent_videos, 12))
tags = _safe_json_loads(account_row["tags_json"], [])
@@ -1881,47 +1949,13 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
request.session_cookie,
request.manual_creator_pages
)
creator_payloads = _extract_creator_payloads(creator_data)
if creator_payloads:
creator_profile = _pick_best_profile(
[candidate for payload in creator_payloads for candidate in _extract_profile_candidates(payload)]
)
creator_videos = _extract_videos(creator_payloads)
creator_identity_match = _profiles_appear_same(public_data["profile"], creator_profile)
should_merge_creator = creator_identity_match or request.allow_creator_center_profile_fallback
if should_merge_creator:
if creator_profile.get("nickname"):
public_data["profile"] = _merge_profile_payload(public_data["profile"], creator_profile)
if not public_data["source_url"]:
public_data["source_url"] = creator_profile.get("canonical_profile_url") or request.profile_url
if request.allow_creator_center_profile_fallback and not creator_identity_match:
public_data["errors"].append("creator_center_profile_fallback_used")
elif public_data["profile"].get("nickname") != creator_profile.get("nickname"):
public_data["errors"].append("creator_center_profile_merge_partial")
public_data["videos"].extend(creator_videos)
elif creator_profile.get("nickname") or creator_videos:
public_data["errors"].append("creator_center_identity_mismatch_skipped")
if not public_data["profile"].get("nickname") and not public_data["videos"]:
message = "No Douyin profile or creator-center data could be extracted"
if "creator_center_identity_mismatch_skipped" in public_data["errors"]:
message = "Creator-center capture belongs to a different logged-in Douyin account; automatic merge was skipped"
raise HTTPException(
status_code=400,
detail={
"message": message,
"profile_url": request.profile_url,
"resolved_profile_url": public_data["source_url"],
"public_blob_count": len(public_data["raw_pages"]),
"public_video_count": len(public_data["videos"]),
"public_errors": public_data["errors"],
"creator_page_count": len(creator_data["pages"]),
"creator_errors": creator_data["errors"]
}
)
account_row = _upsert_account(account, public_data["profile"], request, public_data, creator_data)
workspace = _build_workspace_payload(account_row)
workspace["sync_errors"] = public_data["errors"] + creator_data["errors"]
return workspace
return await run_in_threadpool(
_finalize_sync_workspace,
account,
request,
public_data,
creator_data
)
@app.get("/v2/douyin/accounts/{account_id}")
def get_douyin_account(

View File

@@ -743,6 +743,20 @@ async function main() {
};
await saveJsonSafe(path.join(runDir, "summary.json"), summary);
let storyforgeToken = options.storyforgeToken;
if (options.syncEnabled && !storyforgeToken) {
const auth = await loginStoryForge(
options.backendUrl,
options.storyforgeUsername,
options.storyforgePassword
);
storyforgeToken = auth.token;
await saveJson(path.join(runDir, "storyforge-login.json"), {
account: auth.account,
default_external_base_url: auth.default_external_base_url
});
}
const context = await chromium.launchPersistentContext(options.stateDir, {
headless: options.headless,
viewport: { width: 1440, height: 1024 },
@@ -772,6 +786,7 @@ async function main() {
const syncBody = {
profile_url: options.profileUrl,
allow_creator_center_profile_fallback: options.allowCreatorCenterFallback,
compact_response: true,
manual_profile_payload: profileBundle,
manual_creator_pages: creatorPages,
manual_work_payloads: videoPages,
@@ -785,20 +800,7 @@ async function main() {
summary.captured_creator_pages = creatorPages.length;
if (options.syncEnabled) {
let token = options.storyforgeToken;
if (!token) {
const auth = await loginStoryForge(
options.backendUrl,
options.storyforgeUsername,
options.storyforgePassword
);
token = auth.token;
await saveJson(path.join(runDir, "storyforge-login.json"), {
account: auth.account,
default_external_base_url: auth.default_external_base_url
});
}
const workspace = await syncCapture(options.backendUrl, token, syncRequestPath);
const workspace = await syncCapture(options.backendUrl, storyforgeToken, syncRequestPath);
summary.sync_result = {
account_id: workspace.account?.id || "",
nickname: workspace.account?.nickname || "",