fix: guard douyin creator-center identity merges

This commit is contained in:
kris
2026-03-20 19:31:29 +08:00
parent 10820595cf
commit 4356c46b9e
3 changed files with 227 additions and 22 deletions

View File

@@ -50,3 +50,4 @@ Each run writes:
- This is designed as a browser-assisted capture flow, not a fully headless anti-bot bypass.
- If Douyin shows a slider or challenge page, solve it manually in the opened browser window and then continue.
- Use `--no-sync` if you only want to save a local bundle for inspection.
- Creator-center pages belong to the currently logged-in Douyin account. StoryForge now treats them as supplemental evidence by default and will not let them overwrite the target profile unless you explicitly pass `--allow-creator-center-fallback`.

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env node
import fs from "node:fs/promises";
import { execFileSync } from "node:child_process";
import os from "node:os";
import path from "node:path";
import process from "node:process";
@@ -20,6 +21,39 @@ const JSON_CAPTURE_LIMIT = 1_500_000;
const SCRIPT_SCAN_LIMIT = 2_000_000;
const WAIT_AFTER_NAV_MS = 4_000;
const RESPONSE_READ_TIMEOUT_MS = 2_000;
const PYTHON_HTTP_BRIDGE = `
import json
import sys
import urllib.error
import urllib.request
url, method, headers_json, body_mode, body_value = sys.argv[1:6]
headers = json.loads(headers_json)
body = None
if body_mode == "text":
body = body_value.encode("utf-8")
elif body_mode == "path":
with open(body_value, "rb") as handle:
body = handle.read()
request = urllib.request.Request(url, data=body, headers=headers, method=method)
try:
with urllib.request.urlopen(request, timeout=120) as response:
raw = response.read().decode("utf-8", "replace")
try:
payload = json.loads(raw) if raw else None
except Exception:
payload = {"raw": raw}
print(json.dumps({"status": response.status, "data": payload}, ensure_ascii=False))
except urllib.error.HTTPError as error:
raw = error.read().decode("utf-8", "replace")
try:
payload = json.loads(raw) if raw else None
except Exception:
payload = {"raw": raw}
print(json.dumps({"status": error.code, "data": payload}, ensure_ascii=False))
except Exception as error:
print(json.dumps({"status": 599, "data": {"raw": str(error)}}, ensure_ascii=False))
`;
function printHelp() {
console.log(`StoryForge Douyin Browser Capture
@@ -46,6 +80,8 @@ Mode flags:
--skip-login-prompt Do not pause for manual login / captcha completion
--no-sync Capture only, do not import into StoryForge
--no-creator-center Skip creator-center page capture
--allow-creator-center-fallback
Allow creator-center identity to replace a missing public profile
--note <text> Discovery note saved into StoryForge
Examples:
@@ -71,6 +107,7 @@ function parseArgs(argv) {
manualPrompt: true,
syncEnabled: true,
creatorCenterEnabled: true,
allowCreatorCenterFallback: false,
creatorCenterUrls: [...DEFAULT_CREATOR_CENTER_URLS],
note: "",
profileUrl: "",
@@ -150,6 +187,9 @@ function parseArgs(argv) {
case "--no-creator-center":
options.creatorCenterEnabled = false;
break;
case "--allow-creator-center-fallback":
options.allowCreatorCenterFallback = true;
break;
default:
throw new Error(`Unknown argument: ${arg}`);
}
@@ -442,6 +482,78 @@ async function clickFirstVisible(page, selectors) {
return false;
}
function escapeRegExp(value) {
return String(value || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function decodeEscapedUrl(value) {
return String(value || "")
.replace(/\\u002F/g, "/")
.replace(/\\\//g, "/")
.replace(/&amp;/g, "&");
}
async function resolveCreatorPrefetchUrl(page) {
const current = new URL(page.url());
const html = await page.content();
const escapedPath = escapeRegExp(current.pathname);
const mapped = html.match(
new RegExp(`"${escapedPath}"\\s*:\\s*"(https://creator\\.douyin\\.com[^"]+prefetch\\.json)"`)
);
if (mapped?.[1]) {
return decodeEscapedUrl(mapped[1]);
}
const discovered = Array.from(
new Set(
[...html.matchAll(/https:\/\/creator\.douyin\.com\/goofy\/douyin_creator_pc\/mono\/prefetch\/[^"]+prefetch\.json/g)].map(
(match) => decodeEscapedUrl(match[0])
)
)
);
return (
discovered.find((candidate) => candidate.includes(current.pathname.replace(/^\/creator-micro\//, ""))) ||
discovered[0] ||
`https://creator.douyin.com/goofy/douyin_creator_pc/mono/prefetch${current.pathname}/prefetch.json`
);
}
async function collectCreatorPrefetchResults(page) {
const prefetchUrl = await resolveCreatorPrefetchUrl(page);
return page.evaluate(async ({ prefetchUrl }) => {
try {
const prefetchResp = await fetch(prefetchUrl, { credentials: "same-origin" });
const prefetchText = await prefetchResp.text();
const prefetch = JSON.parse(prefetchText);
const results = [];
for (const api of prefetch?.apis || []) {
const target = new URL(api.url, window.location.origin);
for (const [key, value] of Object.entries(api.params || {})) {
target.searchParams.set(key, String(value));
}
const resp = await fetch(target.toString(), {
credentials: api.credentials || "same-origin",
});
const payload = await resp.json().catch(() => null);
results.push({
url: target.toString(),
payload,
});
}
return {
prefetch_url: prefetchUrl,
prefetch,
results,
};
} catch (error) {
return {
prefetch_url: prefetchUrl,
error: String(error),
results: [],
};
}
}, { prefetchUrl });
}
async function prepareProfilePage(page, options) {
await clickFirstVisible(page, [
"text=作品",
@@ -512,32 +624,38 @@ async function saveJsonSafe(filePath, value) {
}
}
async function loginStoryForge(baseUrl, username, password) {
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/v2/auth/login`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ username, password })
});
if (!response.ok) {
throw new Error(`StoryForge login failed: ${response.status} ${await response.text()}`);
async function requestJson(urlString, { method = "GET", headers = {}, body = null, bodyPath = "" } = {}) {
const bodyMode = bodyPath ? "path" : body === null ? "none" : "text";
const bodyValue = bodyPath || (typeof body === "string" ? body : JSON.stringify(body));
const stdout = execFileSync(
"python3",
["-c", PYTHON_HTTP_BRIDGE, urlString, method, JSON.stringify(headers), bodyMode, bodyValue],
{ maxBuffer: 20 * 1024 * 1024, encoding: "utf8" }
);
const payload = JSON.parse(String(stdout || "").trim() || "{}");
if ((payload.status || 500) >= 400) {
throw new Error(`Request failed: ${payload.status} ${JSON.stringify(payload.data)}`);
}
return response.json();
return payload.data;
}
async function syncCapture(baseUrl, token, body) {
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/v2/douyin/accounts/sync`, {
async function loginStoryForge(baseUrl, username, password) {
return requestJson(`${baseUrl.replace(/\/$/, "")}/v2/auth/login`, {
method: "POST",
headers: { "content-type": "application/json" },
body: { username, password }
});
}
async function syncCapture(baseUrl, token, bodyPath) {
return requestJson(`${baseUrl.replace(/\/$/, "")}/v2/douyin/accounts/sync`, {
method: "POST",
headers: {
"content-type": "application/json",
Authorization: `Bearer ${token}`
},
body: JSON.stringify(body)
bodyPath
});
const payload = await response.json().catch(async () => ({ raw: await response.text() }));
if (!response.ok) {
throw new Error(`StoryForge sync failed: ${response.status} ${JSON.stringify(payload)}`);
}
return payload;
}
async function captureCreatorPages(context, options, runDir) {
@@ -552,7 +670,10 @@ async function captureCreatorPages(context, options, runDir) {
try {
console.error(`Capturing creator-center page: ${url}`);
await navigateAndSettle(page, url, options.waitMs);
const bundle = await capturePageBundle(page, "creator_center", responseCapture);
const prefetchResults = await collectCreatorPrefetchResults(page);
const bundle = await capturePageBundle(page, "creator_center", responseCapture, {
creator_prefetch: prefetchResults
});
pages.push({
url: bundle.page_url,
title: bundle.page_title,
@@ -650,12 +771,14 @@ async function main() {
const syncBody = {
profile_url: options.profileUrl,
allow_creator_center_profile_fallback: options.allowCreatorCenterFallback,
manual_profile_payload: profileBundle,
manual_creator_pages: creatorPages,
manual_work_payloads: videoPages,
discovery_note: options.note || "browser-assisted capture"
};
await saveJson(path.join(runDir, "storyforge-sync-request.json"), syncBody);
const syncRequestPath = path.join(runDir, "storyforge-sync-request.json");
await saveJson(syncRequestPath, syncBody);
summary.video_link_count = videoLinks.length;
summary.captured_video_pages = videoPages.length;
@@ -675,7 +798,7 @@ async function main() {
default_external_base_url: auth.default_external_base_url
});
}
const workspace = await syncCapture(options.backendUrl, token, syncBody);
const workspace = await syncCapture(options.backendUrl, token, syncRequestPath);
summary.sync_result = {
account_id: workspace.account?.id || "",
nickname: workspace.account?.nickname || "",