fix: guard douyin creator-center identity merges
This commit is contained in:
@@ -50,3 +50,4 @@ Each run writes:
|
||||
- This is designed as a browser-assisted capture flow, not a fully headless anti-bot bypass.
|
||||
- If Douyin shows a slider or challenge page, solve it manually in the opened browser window and then continue.
|
||||
- Use `--no-sync` if you only want to save a local bundle for inspection.
|
||||
- Creator-center pages belong to the currently logged-in Douyin account. StoryForge now treats them as supplemental evidence by default and will not let them overwrite the target profile unless you explicitly pass `--allow-creator-center-fallback`.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import fs from "node:fs/promises";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
@@ -20,6 +21,39 @@ const JSON_CAPTURE_LIMIT = 1_500_000;
|
||||
const SCRIPT_SCAN_LIMIT = 2_000_000;
|
||||
const WAIT_AFTER_NAV_MS = 4_000;
|
||||
const RESPONSE_READ_TIMEOUT_MS = 2_000;
|
||||
const PYTHON_HTTP_BRIDGE = `
|
||||
import json
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
url, method, headers_json, body_mode, body_value = sys.argv[1:6]
|
||||
headers = json.loads(headers_json)
|
||||
body = None
|
||||
if body_mode == "text":
|
||||
body = body_value.encode("utf-8")
|
||||
elif body_mode == "path":
|
||||
with open(body_value, "rb") as handle:
|
||||
body = handle.read()
|
||||
request = urllib.request.Request(url, data=body, headers=headers, method=method)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=120) as response:
|
||||
raw = response.read().decode("utf-8", "replace")
|
||||
try:
|
||||
payload = json.loads(raw) if raw else None
|
||||
except Exception:
|
||||
payload = {"raw": raw}
|
||||
print(json.dumps({"status": response.status, "data": payload}, ensure_ascii=False))
|
||||
except urllib.error.HTTPError as error:
|
||||
raw = error.read().decode("utf-8", "replace")
|
||||
try:
|
||||
payload = json.loads(raw) if raw else None
|
||||
except Exception:
|
||||
payload = {"raw": raw}
|
||||
print(json.dumps({"status": error.code, "data": payload}, ensure_ascii=False))
|
||||
except Exception as error:
|
||||
print(json.dumps({"status": 599, "data": {"raw": str(error)}}, ensure_ascii=False))
|
||||
`;
|
||||
|
||||
function printHelp() {
|
||||
console.log(`StoryForge Douyin Browser Capture
|
||||
@@ -46,6 +80,8 @@ Mode flags:
|
||||
--skip-login-prompt Do not pause for manual login / captcha completion
|
||||
--no-sync Capture only, do not import into StoryForge
|
||||
--no-creator-center Skip creator-center page capture
|
||||
--allow-creator-center-fallback
|
||||
Allow creator-center identity to replace a missing public profile
|
||||
--note <text> Discovery note saved into StoryForge
|
||||
|
||||
Examples:
|
||||
@@ -71,6 +107,7 @@ function parseArgs(argv) {
|
||||
manualPrompt: true,
|
||||
syncEnabled: true,
|
||||
creatorCenterEnabled: true,
|
||||
allowCreatorCenterFallback: false,
|
||||
creatorCenterUrls: [...DEFAULT_CREATOR_CENTER_URLS],
|
||||
note: "",
|
||||
profileUrl: "",
|
||||
@@ -150,6 +187,9 @@ function parseArgs(argv) {
|
||||
case "--no-creator-center":
|
||||
options.creatorCenterEnabled = false;
|
||||
break;
|
||||
case "--allow-creator-center-fallback":
|
||||
options.allowCreatorCenterFallback = true;
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unknown argument: ${arg}`);
|
||||
}
|
||||
@@ -442,6 +482,78 @@ async function clickFirstVisible(page, selectors) {
|
||||
return false;
|
||||
}
|
||||
|
||||
function escapeRegExp(value) {
|
||||
return String(value || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
function decodeEscapedUrl(value) {
|
||||
return String(value || "")
|
||||
.replace(/\\u002F/g, "/")
|
||||
.replace(/\\\//g, "/")
|
||||
.replace(/&/g, "&");
|
||||
}
|
||||
|
||||
async function resolveCreatorPrefetchUrl(page) {
|
||||
const current = new URL(page.url());
|
||||
const html = await page.content();
|
||||
const escapedPath = escapeRegExp(current.pathname);
|
||||
const mapped = html.match(
|
||||
new RegExp(`"${escapedPath}"\\s*:\\s*"(https://creator\\.douyin\\.com[^"]+prefetch\\.json)"`)
|
||||
);
|
||||
if (mapped?.[1]) {
|
||||
return decodeEscapedUrl(mapped[1]);
|
||||
}
|
||||
const discovered = Array.from(
|
||||
new Set(
|
||||
[...html.matchAll(/https:\/\/creator\.douyin\.com\/goofy\/douyin_creator_pc\/mono\/prefetch\/[^"]+prefetch\.json/g)].map(
|
||||
(match) => decodeEscapedUrl(match[0])
|
||||
)
|
||||
)
|
||||
);
|
||||
return (
|
||||
discovered.find((candidate) => candidate.includes(current.pathname.replace(/^\/creator-micro\//, ""))) ||
|
||||
discovered[0] ||
|
||||
`https://creator.douyin.com/goofy/douyin_creator_pc/mono/prefetch${current.pathname}/prefetch.json`
|
||||
);
|
||||
}
|
||||
|
||||
async function collectCreatorPrefetchResults(page) {
|
||||
const prefetchUrl = await resolveCreatorPrefetchUrl(page);
|
||||
return page.evaluate(async ({ prefetchUrl }) => {
|
||||
try {
|
||||
const prefetchResp = await fetch(prefetchUrl, { credentials: "same-origin" });
|
||||
const prefetchText = await prefetchResp.text();
|
||||
const prefetch = JSON.parse(prefetchText);
|
||||
const results = [];
|
||||
for (const api of prefetch?.apis || []) {
|
||||
const target = new URL(api.url, window.location.origin);
|
||||
for (const [key, value] of Object.entries(api.params || {})) {
|
||||
target.searchParams.set(key, String(value));
|
||||
}
|
||||
const resp = await fetch(target.toString(), {
|
||||
credentials: api.credentials || "same-origin",
|
||||
});
|
||||
const payload = await resp.json().catch(() => null);
|
||||
results.push({
|
||||
url: target.toString(),
|
||||
payload,
|
||||
});
|
||||
}
|
||||
return {
|
||||
prefetch_url: prefetchUrl,
|
||||
prefetch,
|
||||
results,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
prefetch_url: prefetchUrl,
|
||||
error: String(error),
|
||||
results: [],
|
||||
};
|
||||
}
|
||||
}, { prefetchUrl });
|
||||
}
|
||||
|
||||
async function prepareProfilePage(page, options) {
|
||||
await clickFirstVisible(page, [
|
||||
"text=作品",
|
||||
@@ -512,32 +624,38 @@ async function saveJsonSafe(filePath, value) {
|
||||
}
|
||||
}
|
||||
|
||||
async function loginStoryForge(baseUrl, username, password) {
|
||||
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/v2/auth/login`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({ username, password })
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`StoryForge login failed: ${response.status} ${await response.text()}`);
|
||||
async function requestJson(urlString, { method = "GET", headers = {}, body = null, bodyPath = "" } = {}) {
|
||||
const bodyMode = bodyPath ? "path" : body === null ? "none" : "text";
|
||||
const bodyValue = bodyPath || (typeof body === "string" ? body : JSON.stringify(body));
|
||||
const stdout = execFileSync(
|
||||
"python3",
|
||||
["-c", PYTHON_HTTP_BRIDGE, urlString, method, JSON.stringify(headers), bodyMode, bodyValue],
|
||||
{ maxBuffer: 20 * 1024 * 1024, encoding: "utf8" }
|
||||
);
|
||||
const payload = JSON.parse(String(stdout || "").trim() || "{}");
|
||||
if ((payload.status || 500) >= 400) {
|
||||
throw new Error(`Request failed: ${payload.status} ${JSON.stringify(payload.data)}`);
|
||||
}
|
||||
return response.json();
|
||||
return payload.data;
|
||||
}
|
||||
|
||||
async function syncCapture(baseUrl, token, body) {
|
||||
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/v2/douyin/accounts/sync`, {
|
||||
async function loginStoryForge(baseUrl, username, password) {
|
||||
return requestJson(`${baseUrl.replace(/\/$/, "")}/v2/auth/login`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: { username, password }
|
||||
});
|
||||
}
|
||||
|
||||
async function syncCapture(baseUrl, token, bodyPath) {
|
||||
return requestJson(`${baseUrl.replace(/\/$/, "")}/v2/douyin/accounts/sync`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
Authorization: `Bearer ${token}`
|
||||
},
|
||||
body: JSON.stringify(body)
|
||||
bodyPath
|
||||
});
|
||||
const payload = await response.json().catch(async () => ({ raw: await response.text() }));
|
||||
if (!response.ok) {
|
||||
throw new Error(`StoryForge sync failed: ${response.status} ${JSON.stringify(payload)}`);
|
||||
}
|
||||
return payload;
|
||||
}
|
||||
|
||||
async function captureCreatorPages(context, options, runDir) {
|
||||
@@ -552,7 +670,10 @@ async function captureCreatorPages(context, options, runDir) {
|
||||
try {
|
||||
console.error(`Capturing creator-center page: ${url}`);
|
||||
await navigateAndSettle(page, url, options.waitMs);
|
||||
const bundle = await capturePageBundle(page, "creator_center", responseCapture);
|
||||
const prefetchResults = await collectCreatorPrefetchResults(page);
|
||||
const bundle = await capturePageBundle(page, "creator_center", responseCapture, {
|
||||
creator_prefetch: prefetchResults
|
||||
});
|
||||
pages.push({
|
||||
url: bundle.page_url,
|
||||
title: bundle.page_title,
|
||||
@@ -650,12 +771,14 @@ async function main() {
|
||||
|
||||
const syncBody = {
|
||||
profile_url: options.profileUrl,
|
||||
allow_creator_center_profile_fallback: options.allowCreatorCenterFallback,
|
||||
manual_profile_payload: profileBundle,
|
||||
manual_creator_pages: creatorPages,
|
||||
manual_work_payloads: videoPages,
|
||||
discovery_note: options.note || "browser-assisted capture"
|
||||
};
|
||||
await saveJson(path.join(runDir, "storyforge-sync-request.json"), syncBody);
|
||||
const syncRequestPath = path.join(runDir, "storyforge-sync-request.json");
|
||||
await saveJson(syncRequestPath, syncBody);
|
||||
|
||||
summary.video_link_count = videoLinks.length;
|
||||
summary.captured_video_pages = videoPages.length;
|
||||
@@ -675,7 +798,7 @@ async function main() {
|
||||
default_external_base_url: auth.default_external_base_url
|
||||
});
|
||||
}
|
||||
const workspace = await syncCapture(options.backendUrl, token, syncBody);
|
||||
const workspace = await syncCapture(options.backendUrl, token, syncRequestPath);
|
||||
summary.sync_result = {
|
||||
account_id: workspace.account?.id || "",
|
||||
nickname: workspace.account?.nickname || "",
|
||||
|
||||
Reference in New Issue
Block a user