Files
storyforge/scripts/douyin-browser-capture/control_panel.mjs
2026-03-20 22:27:54 +08:00

766 lines
25 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
import { spawn } from "node:child_process";
import fs from "node:fs/promises";
import http from "node:http";
import os from "node:os";
import path from "node:path";
import process from "node:process";
import { fileURLToPath } from "node:url";
const SCRIPT_DIR = path.dirname(fileURLToPath(import.meta.url));
const CAPTURE_SCRIPT = path.join(SCRIPT_DIR, "capture_and_sync.mjs");
const DEFAULT_PORT = Number.parseInt(process.env.PORT || "3618", 10);
const DEFAULT_BACKEND_URL = "http://127.0.0.1:8081";
const DEFAULT_OUTPUT_ROOT = "/Users/kris/code/StoryForge-gitea/output/playwright/douyin/control-panel";
const DEFAULT_STATE_DIR = path.join(os.homedir(), ".storyforge", "douyin-playwright");
const MAX_LOG_LINES = 240;
const MAX_RECENT_RUNS = 8;
const runs = new Map();
function nowIso() {
return new Date().toISOString();
}
function createRunId() {
return `run-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
}
async function ensureDir(dir) {
await fs.mkdir(dir, { recursive: true });
}
function trimLogBuffer(logs) {
if (logs.length > MAX_LOG_LINES) {
logs.splice(0, logs.length - MAX_LOG_LINES);
}
}
function appendLog(run, source, chunk) {
const lines = String(chunk || "")
.split(/\r?\n/)
.map((line) => line.trimEnd())
.filter(Boolean);
for (const line of lines) {
run.logs.push(`[${new Date().toLocaleTimeString("zh-CN", { hour12: false })}] [${source}] ${line}`);
}
trimLogBuffer(run.logs);
}
async function readJsonBody(req) {
const chunks = [];
let size = 0;
for await (const chunk of req) {
size += chunk.length;
if (size > 512 * 1024) {
throw new Error("Request body too large");
}
chunks.push(chunk);
}
const raw = Buffer.concat(chunks).toString("utf8").trim();
return raw ? JSON.parse(raw) : {};
}
async function readJsonIfExists(filePath) {
try {
const raw = await fs.readFile(filePath, "utf8");
return JSON.parse(raw);
} catch {
return null;
}
}
async function findLatestCaptureDir(runBaseDir) {
try {
const entries = await fs.readdir(runBaseDir, { withFileTypes: true });
const dirs = [];
for (const entry of entries) {
if (!entry.isDirectory()) {
continue;
}
const fullPath = path.join(runBaseDir, entry.name);
const stat = await fs.stat(fullPath);
dirs.push({ fullPath, mtimeMs: stat.mtimeMs });
}
dirs.sort((left, right) => right.mtimeMs - left.mtimeMs);
return dirs[0]?.fullPath || "";
} catch {
return "";
}
}
async function loadArtifacts(runBaseDir) {
const outputDir = await findLatestCaptureDir(runBaseDir);
if (!outputDir) {
return null;
}
const [summary, syncResponse, syncError, login] = await Promise.all([
readJsonIfExists(path.join(outputDir, "summary.json")),
readJsonIfExists(path.join(outputDir, "storyforge-sync-response.json")),
readJsonIfExists(path.join(outputDir, "storyforge-sync-error.json")),
readJsonIfExists(path.join(outputDir, "storyforge-login.json"))
]);
return {
outputDir,
summary,
syncResponse,
syncError,
login
};
}
async function refreshRunArtifacts(run) {
const artifacts = await loadArtifacts(run.runBaseDir);
if (!artifacts) {
return;
}
run.outputDir = artifacts.outputDir;
run.summary = artifacts.summary;
run.syncResponse = artifacts.syncResponse;
run.syncError = artifacts.syncError;
run.login = artifacts.login;
}
function serializeRun(run) {
if (!run) {
return null;
}
return {
id: run.id,
status: run.status,
profileUrl: run.profileUrl,
backendUrl: run.backendUrl,
syncEnabled: run.syncEnabled,
headless: run.headless,
startedAt: run.startedAt,
continuedAt: run.continuedAt || "",
finishedAt: run.finishedAt || "",
outputDir: run.outputDir || "",
exitCode: run.exitCode,
signal: run.signal || "",
summary: run.summary || null,
syncResponse: run.syncResponse || null,
syncError: run.syncError || null,
logs: run.logs.slice(-80)
};
}
function getActiveRun() {
return Array.from(runs.values()).find((run) => !["completed", "failed", "terminated"].includes(run.status)) || null;
}
function buildCaptureArgs(payload, runBaseDir, readyFile) {
const parsedMaxVideos = Number.parseInt(String(payload.maxVideos ?? "4"), 10);
const parsedWaitMs = Number.parseInt(String(payload.waitMs ?? "4000"), 10);
const args = [
CAPTURE_SCRIPT,
"--profile-url",
String(payload.profileUrl || "").trim(),
"--backend-url",
String(payload.backendUrl || DEFAULT_BACKEND_URL).trim(),
"--output-dir",
runBaseDir,
"--state-dir",
String(payload.stateDir || DEFAULT_STATE_DIR).trim(),
"--max-videos",
String(Number.isFinite(parsedMaxVideos) ? Math.max(0, parsedMaxVideos) : 4),
"--wait-ms",
String(Number.isFinite(parsedWaitMs) ? Math.max(800, parsedWaitMs) : 4000),
"--ready-file",
readyFile
];
if (payload.note) {
args.push("--note", String(payload.note).trim());
}
if (payload.headless) {
args.push("--headless");
}
if (!payload.syncEnabled) {
args.push("--no-sync");
}
if (payload.skipCreatorCenter) {
args.push("--no-creator-center");
}
if (payload.allowCreatorCenterFallback) {
args.push("--allow-creator-center-fallback");
}
if (payload.token) {
args.push("--storyforge-token", String(payload.token).trim());
} else if (payload.syncEnabled) {
args.push("--storyforge-username", String(payload.username || "").trim());
args.push("--storyforge-password", String(payload.password || ""));
}
return args;
}
async function startRun(payload) {
const profileUrl = String(payload.profileUrl || "").trim();
if (!profileUrl) {
throw new Error("请先填写抖音主页链接");
}
const syncEnabled = payload.syncEnabled !== false;
if (syncEnabled && !String(payload.token || "").trim()) {
if (!String(payload.username || "").trim() || !String(payload.password || "")) {
throw new Error("导入 StoryForge 时需要账号密码,或者直接提供 Token");
}
}
if (getActiveRun()) {
throw new Error("当前已有进行中的采集任务,请先完成或等待结束");
}
const id = createRunId();
const runBaseDir = path.join(DEFAULT_OUTPUT_ROOT, id);
const readyFile = path.join(runBaseDir, "manual-ready.signal");
await ensureDir(runBaseDir);
const args = buildCaptureArgs(
{
...payload,
profileUrl,
syncEnabled
},
runBaseDir,
readyFile
);
const child = spawn(process.execPath, args, {
cwd: SCRIPT_DIR,
env: process.env,
stdio: ["ignore", "pipe", "pipe"]
});
const run = {
id,
status: "awaiting_continue",
profileUrl,
backendUrl: String(payload.backendUrl || DEFAULT_BACKEND_URL).trim(),
syncEnabled,
headless: Boolean(payload.headless),
startedAt: nowIso(),
continuedAt: "",
finishedAt: "",
runBaseDir,
readyFile,
child,
logs: [],
outputDir: "",
summary: null,
syncResponse: null,
syncError: null,
exitCode: null,
signal: ""
};
const visibleArgs = args.map((arg, index) => {
if (args[index - 1] === "--storyforge-password") {
return "******";
}
return arg;
});
appendLog(run, "system", `Started ${process.execPath} ${visibleArgs.join(" ")}`);
child.stdout.on("data", (chunk) => appendLog(run, "stdout", chunk));
child.stderr.on("data", (chunk) => appendLog(run, "stderr", chunk));
child.on("exit", async (code, signal) => {
run.exitCode = code;
run.signal = signal || "";
run.finishedAt = nowIso();
await refreshRunArtifacts(run);
run.status = signal ? "terminated" : code === 0 ? "completed" : "failed";
appendLog(run, "system", `Process exited with status ${run.status}${code !== null ? ` (${code})` : ""}`);
});
runs.set(id, run);
return run;
}
async function continueRun(runId) {
const run = runs.get(runId);
if (!run) {
throw new Error("采集任务不存在");
}
if (["completed", "failed", "terminated"].includes(run.status)) {
throw new Error("这个采集任务已经结束了");
}
await ensureDir(path.dirname(run.readyFile));
await fs.writeFile(run.readyFile, `${nowIso()}\n`, "utf8");
run.continuedAt = nowIso();
run.status = "capturing";
appendLog(run, "system", "Manual ready signal sent");
return run;
}
async function listRecentRuns() {
await ensureDir(DEFAULT_OUTPUT_ROOT);
const entries = await fs.readdir(DEFAULT_OUTPUT_ROOT, { withFileTypes: true });
const dirs = [];
for (const entry of entries) {
if (!entry.isDirectory()) {
continue;
}
const runBaseDir = path.join(DEFAULT_OUTPUT_ROOT, entry.name);
const stat = await fs.stat(runBaseDir);
dirs.push({ id: entry.name, runBaseDir, mtimeMs: stat.mtimeMs });
}
dirs.sort((left, right) => right.mtimeMs - left.mtimeMs);
const recent = [];
for (const item of dirs.slice(0, MAX_RECENT_RUNS)) {
const artifacts = await loadArtifacts(item.runBaseDir);
recent.push({
id: item.id,
outputDir: artifacts?.outputDir || "",
summary: artifacts?.summary || null,
syncResponse: artifacts?.syncResponse || null,
syncError: artifacts?.syncError || null
});
}
return recent;
}
function sendJson(res, statusCode, payload) {
const body = JSON.stringify(payload, null, 2);
res.writeHead(statusCode, {
"content-type": "application/json; charset=utf-8",
"cache-control": "no-store"
});
res.end(body);
}
function sendHtml(res, html) {
res.writeHead(200, {
"content-type": "text/html; charset=utf-8",
"cache-control": "no-store"
});
res.end(html);
}
function renderPage() {
return `<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>StoryForge Douyin Browser Assist</title>
<style>
:root {
color-scheme: light;
--bg: #f4efe6;
--ink: #16313d;
--muted: #577182;
--accent: #1f6e5f;
--accent-2: #b97524;
--card: rgba(255, 255, 255, 0.86);
--border: rgba(22, 49, 61, 0.12);
}
* { box-sizing: border-box; }
body {
margin: 0;
font-family: "PingFang SC", "Noto Sans SC", sans-serif;
background:
radial-gradient(circle at top left, rgba(31, 110, 95, 0.16), transparent 30%),
linear-gradient(135deg, #f6efe3, #eff7f4 55%, #fdf8ef);
color: var(--ink);
}
main {
max-width: 1080px;
margin: 0 auto;
padding: 32px 20px 48px;
}
h1, h2, h3 { margin: 0; }
.hero {
background: linear-gradient(135deg, #0b3c5d, #1f6e5f 58%, #b97524);
color: white;
border-radius: 28px;
padding: 28px;
box-shadow: 0 18px 42px rgba(11, 60, 93, 0.18);
}
.hero p {
margin: 12px 0 0;
max-width: 760px;
line-height: 1.6;
color: rgba(255, 255, 255, 0.88);
}
.grid {
display: grid;
grid-template-columns: 1.1fr 0.9fr;
gap: 18px;
margin-top: 20px;
}
.card {
background: var(--card);
border: 1px solid var(--border);
border-radius: 22px;
padding: 20px;
box-shadow: 0 12px 30px rgba(22, 49, 61, 0.08);
backdrop-filter: blur(8px);
}
.stack { display: grid; gap: 12px; }
label { display: grid; gap: 6px; font-size: 14px; color: var(--muted); }
input, textarea {
width: 100%;
border-radius: 14px;
border: 1px solid rgba(22, 49, 61, 0.12);
padding: 12px 14px;
font: inherit;
background: rgba(255, 255, 255, 0.96);
color: var(--ink);
}
textarea { min-height: 88px; resize: vertical; }
.row { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; }
.checks {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 10px;
}
.check {
display: flex;
align-items: center;
gap: 8px;
border: 1px solid rgba(22, 49, 61, 0.1);
border-radius: 14px;
padding: 10px 12px;
background: rgba(255, 255, 255, 0.75);
color: var(--ink);
}
button {
border: 0;
border-radius: 999px;
padding: 12px 18px;
font: inherit;
cursor: pointer;
transition: transform 0.15s ease, opacity 0.15s ease;
}
button:hover { transform: translateY(-1px); }
.primary { background: var(--accent); color: white; }
.secondary { background: rgba(22, 49, 61, 0.08); color: var(--ink); }
.warning { background: var(--accent-2); color: white; }
.actions { display: flex; gap: 10px; flex-wrap: wrap; }
.steps {
display: grid;
gap: 10px;
margin-top: 16px;
}
.step {
border-left: 3px solid var(--accent);
padding-left: 12px;
color: var(--muted);
line-height: 1.55;
}
.pill {
display: inline-flex;
align-items: center;
padding: 6px 10px;
border-radius: 999px;
background: rgba(31, 110, 95, 0.1);
color: var(--accent);
font-size: 13px;
font-weight: 600;
}
.status-box {
display: grid;
gap: 12px;
margin-top: 14px;
}
.status-line {
display: flex;
justify-content: space-between;
gap: 12px;
font-size: 14px;
color: var(--muted);
}
pre {
margin: 0;
padding: 14px;
border-radius: 18px;
background: #12222c;
color: #d7efe8;
min-height: 220px;
max-height: 380px;
overflow: auto;
font-size: 12px;
line-height: 1.55;
white-space: pre-wrap;
word-break: break-word;
}
.recent-list { display: grid; gap: 12px; }
.recent-item {
border: 1px solid rgba(22, 49, 61, 0.1);
border-radius: 16px;
padding: 14px;
background: rgba(255, 255, 255, 0.72);
}
.meta { color: var(--muted); font-size: 13px; line-height: 1.55; }
.path {
font-family: "SF Mono", ui-monospace, monospace;
font-size: 12px;
color: var(--muted);
word-break: break-all;
}
.hint {
color: var(--muted);
font-size: 13px;
line-height: 1.55;
}
@media (max-width: 900px) {
.grid, .row, .checks { grid-template-columns: 1fr; }
}
</style>
</head>
<body>
<main>
<section class="hero">
<span class="pill">StoryForge / Douyin Browser Assist</span>
<h1 style="margin-top: 14px;">用网页点按钮,驱动真实浏览器采集抖音账号</h1>
<p>这不是无头绕反爬,而是一个可控的半自动流程。你点击“开始采集”后,脚本会打开真实 Chromium会话沿用同一份登录态。你在浏览器里登录或过滑块后回到这里点“已完成登录继续采集”系统就会继续抓取主页、creator-center并按安全规则同步进 StoryForge。</p>
</section>
<div class="grid">
<section class="card stack">
<div>
<h2>开始新采集</h2>
<p class="hint">默认会导入 StoryForge如果只是想先抓本地 bundle可以勾选“仅采集不导入”。</p>
</div>
<form id="capture-form" class="stack">
<label>
抖音主页链接
<input id="profile-url" name="profileUrl" placeholder="https://www.douyin.com/user/..." required />
</label>
<div class="row">
<label>
StoryForge 地址
<input id="backend-url" name="backendUrl" value="${DEFAULT_BACKEND_URL}" />
</label>
<label>
备注
<input id="note" name="note" placeholder="例如:浏览器辅助采集" />
</label>
</div>
<div class="row">
<label>
StoryForge 用户名
<input id="username" name="username" placeholder="kris" />
</label>
<label>
StoryForge 密码
<input id="password" name="password" type="password" placeholder="用于自动导入时登录" />
</label>
</div>
<div class="row">
<label>
已有 Token可选
<input id="token" name="token" placeholder="Bearer token可替代账号密码" />
</label>
<label>
最大作品页抓取数
<input id="max-videos" name="maxVideos" type="number" min="0" max="10" value="4" />
</label>
</div>
<div class="checks">
<label class="check"><input id="sync-enabled" type="checkbox" checked /> 导入 StoryForge</label>
<label class="check"><input id="headless" type="checkbox" /> Headless</label>
<label class="check"><input id="skip-creator-center" type="checkbox" /> 跳过 creator-center</label>
<label class="check"><input id="allow-fallback" type="checkbox" /> 允许 creator-center 兜底</label>
</div>
<div class="actions">
<button class="primary" type="submit">开始采集</button>
<button class="warning" id="continue-button" type="button" disabled>已完成登录,继续采集</button>
<button class="secondary" id="refresh-button" type="button">刷新状态</button>
</div>
</form>
<div class="steps">
<div class="step">1. 点击“开始采集”,脚本会在本机打开 Chromium。</div>
<div class="step">2. 在打开的浏览器里完成登录、滑块或验证码,并确认已进入目标主页。</div>
<div class="step">3. 回到这里点击“已完成登录,继续采集”。</div>
<div class="step">4. 等待脚本自动抓取、写出 <code>summary.json</code>,并可选同步到 StoryForge。</div>
</div>
</section>
<section class="card">
<h2>当前任务</h2>
<div id="active-status" class="status-box">
<p class="hint">当前没有进行中的采集任务。</p>
</div>
<h3 style="margin-top: 18px;">实时日志</h3>
<pre id="logs">等待任务启动…</pre>
</section>
</div>
<section class="card" style="margin-top: 18px;">
<div style="display: flex; justify-content: space-between; gap: 12px; align-items: center;">
<div>
<h2>最近运行</h2>
<p class="hint">这里展示的是控制台模式启动过的采集任务。</p>
</div>
</div>
<div id="recent-runs" class="recent-list" style="margin-top: 14px;"></div>
</section>
</main>
<script>
const activeStatusEl = document.getElementById("active-status");
const logsEl = document.getElementById("logs");
const recentRunsEl = document.getElementById("recent-runs");
const continueButton = document.getElementById("continue-button");
const refreshButton = document.getElementById("refresh-button");
const form = document.getElementById("capture-form");
let activeRunId = "";
function escapeHtml(value) {
return String(value || "")
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;");
}
function renderActiveRun(run) {
activeRunId = run?.id || "";
continueButton.disabled = !run || run.status !== "awaiting_continue";
if (!run) {
activeStatusEl.innerHTML = '<p class="hint">当前没有进行中的采集任务。</p>';
logsEl.textContent = "等待任务启动…";
return;
}
const summary = run.summary || {};
const syncErrors = (summary.sync_result?.sync_errors || run.syncResponse?.sync_errors || []).join("、");
activeStatusEl.innerHTML = [
'<div class="status-line"><strong>状态</strong><span>' + escapeHtml(run.status) + '</span></div>',
'<div class="status-line"><strong>主页</strong><span>' + escapeHtml(run.profileUrl) + '</span></div>',
'<div class="status-line"><strong>开始时间</strong><span>' + escapeHtml(run.startedAt) + '</span></div>',
run.outputDir ? '<div class="status-line"><strong>输出目录</strong><span class="path">' + escapeHtml(run.outputDir) + '</span></div>' : '',
summary.status ? '<div class="status-line"><strong>采集结果</strong><span>' + escapeHtml(summary.status) + '</span></div>' : '',
summary.video_link_count !== undefined ? '<div class="status-line"><strong>作品链接数</strong><span>' + escapeHtml(summary.video_link_count) + '</span></div>' : '',
summary.captured_creator_pages !== undefined ? '<div class="status-line"><strong>creator 页面数</strong><span>' + escapeHtml(summary.captured_creator_pages) + '</span></div>' : '',
syncErrors ? '<div class="status-line"><strong>同步提示</strong><span>' + escapeHtml(syncErrors) + '</span></div>' : ''
].filter(Boolean).join("");
logsEl.textContent = (run.logs || []).join("\\n") || "任务已启动,等待日志…";
}
function renderRecentRuns(items) {
if (!items.length) {
recentRunsEl.innerHTML = '<p class="hint">还没有控制台模式的历史运行记录。</p>';
return;
}
recentRunsEl.innerHTML = items.map((item) => {
const summary = item.summary || {};
const syncResult = summary.sync_result || {};
return [
'<article class="recent-item">',
'<div style="display:flex;justify-content:space-between;gap:12px;align-items:center;">',
'<strong>' + escapeHtml(summary.profile_url || item.id) + '</strong>',
'<span class="pill">' + escapeHtml(summary.status || "unknown") + '</span>',
'</div>',
'<p class="meta" style="margin:10px 0 0;">作品链接 ' + escapeHtml(summary.video_link_count ?? "-") + 'creator 页面 ' + escapeHtml(summary.captured_creator_pages ?? "-") + '</p>',
syncResult.nickname ? '<p class="meta" style="margin:8px 0 0;">同步账号:' + escapeHtml(syncResult.nickname) + '</p>' : '',
item.outputDir ? '<div class="path" style="margin-top:8px;">' + escapeHtml(item.outputDir) + '</div>' : '',
'</article>'
].join("");
}).join("");
}
async function refreshStatus() {
const response = await fetch("/api/status", { cache: "no-store" });
const payload = await response.json();
renderActiveRun(payload.activeRun);
renderRecentRuns(payload.recentRuns || []);
}
form.addEventListener("submit", async (event) => {
event.preventDefault();
const payload = {
profileUrl: document.getElementById("profile-url").value.trim(),
backendUrl: document.getElementById("backend-url").value.trim(),
username: document.getElementById("username").value.trim(),
password: document.getElementById("password").value,
token: document.getElementById("token").value.trim(),
note: document.getElementById("note").value.trim(),
maxVideos: document.getElementById("max-videos").value,
syncEnabled: document.getElementById("sync-enabled").checked,
headless: document.getElementById("headless").checked,
skipCreatorCenter: document.getElementById("skip-creator-center").checked,
allowCreatorCenterFallback: document.getElementById("allow-fallback").checked
};
const response = await fetch("/api/start", {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify(payload)
});
const result = await response.json();
if (!response.ok) {
alert(result.error || "启动失败");
return;
}
await refreshStatus();
});
continueButton.addEventListener("click", async () => {
if (!activeRunId) {
return;
}
const response = await fetch("/api/runs/" + encodeURIComponent(activeRunId) + "/continue", {
method: "POST"
});
const result = await response.json();
if (!response.ok) {
alert(result.error || "继续失败");
return;
}
await refreshStatus();
});
refreshButton.addEventListener("click", refreshStatus);
refreshStatus();
setInterval(refreshStatus, 1500);
</script>
</body>
</html>`;
}
const server = http.createServer(async (req, res) => {
const url = new URL(req.url || "/", "http://127.0.0.1");
try {
if (req.method === "GET" && url.pathname === "/") {
sendHtml(res, renderPage());
return;
}
if (req.method === "GET" && url.pathname === "/api/status") {
const activeRun = getActiveRun();
if (activeRun) {
await refreshRunArtifacts(activeRun);
}
sendJson(res, 200, {
activeRun: serializeRun(activeRun),
recentRuns: await listRecentRuns()
});
return;
}
if (req.method === "POST" && url.pathname === "/api/start") {
const payload = await readJsonBody(req);
const run = await startRun(payload);
sendJson(res, 200, { run: serializeRun(run) });
return;
}
if (req.method === "POST" && /^\/api\/runs\/[^/]+\/continue$/.test(url.pathname)) {
const runId = decodeURIComponent(url.pathname.split("/")[3] || "");
const run = await continueRun(runId);
sendJson(res, 200, { run: serializeRun(run) });
return;
}
sendJson(res, 404, { error: "Not found" });
} catch (error) {
sendJson(res, 500, { error: error?.message || String(error) });
}
});
ensureDir(DEFAULT_OUTPUT_ROOT)
.then(() => {
server.listen(DEFAULT_PORT, "127.0.0.1", () => {
console.log(`StoryForge Douyin control panel: http://127.0.0.1:${DEFAULT_PORT}`);
});
})
.catch((error) => {
console.error(error?.stack || String(error));
process.exitCode = 1;
});