From 1fa1b586f718b87d5bb071f9ef482a0445830c4c Mon Sep 17 00:00:00 2001 From: kris Date: Fri, 20 Mar 2026 14:51:22 +0800 Subject: [PATCH] feat: add browser-assisted douyin capture flow --- .gitignore | 2 + README.md | 18 + docs/AUDIT_2026-03-18.md | 5 +- docs/LAN_E2E_GUIDE_2026-03-18.md | 20 + docs/MVP_STATUS_2026-03-18.md | 3 + scripts/douyin-browser-capture/README.md | 52 ++ .../capture_and_sync.mjs | 683 ++++++++++++++++++ .../douyin-browser-capture/package-lock.json | 59 ++ scripts/douyin-browser-capture/package.json | 14 + 9 files changed, 855 insertions(+), 1 deletion(-) create mode 100644 scripts/douyin-browser-capture/README.md create mode 100644 scripts/douyin-browser-capture/capture_and_sync.mjs create mode 100644 scripts/douyin-browser-capture/package-lock.json create mode 100644 scripts/douyin-browser-capture/package.json diff --git a/.gitignore b/.gitignore index c53c85f..72a6bbf 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,8 @@ build/ .kotlin/ **/.gradle/ **/.kotlin/ +node_modules/ +**/node_modules/ # Runtime data and artifacts data/ diff --git a/README.md b/README.md index f8976a0..88d08f0 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,24 @@ cd /Users/kris/code/StoryForge-gitea/android-app ./gradlew assembleDebug ``` +## Douyin Browser Capture + +```bash +cd /Users/kris/code/StoryForge-gitea/scripts/douyin-browser-capture +npm install +npx playwright install chromium +npm run capture -- \ + --profile-url https://www.douyin.com/user/your_account \ + --storyforge-username kris \ + --storyforge-password 'Asd123456.' +``` + +说明: + +- 这是“真实浏览器 + 人工登录/过挑战 + 自动提取 + 回写 StoryForge”的辅助采集工具 +- 默认输出到 `output/playwright/douyin/` +- 详细说明见 `scripts/douyin-browser-capture/README.md` + ## Collector Service ```bash diff --git a/docs/AUDIT_2026-03-18.md b/docs/AUDIT_2026-03-18.md index 8f46b3b..241527a 100644 --- a/docs/AUDIT_2026-03-18.md +++ b/docs/AUDIT_2026-03-18.md @@ -116,6 +116,9 @@ - public 页面命中抖音反爬挑战时的显式诊断返回 - 真实 smoke 结果表明,纯 public 主页抓取会落到 `byted_acrawler` 挑战页,而不是正常 profile 数据页 - 同时,`manual_profile_payload + manual_work_payloads` 已验证可完成账号入库、分析报告生成、相似账号搜索和对标关系写入 +- 现已新增浏览器辅助采集工具 `/Users/kris/code/StoryForge-gitea/scripts/douyin-browser-capture/capture_and_sync.mjs` +- 该工具使用真实 Playwright Chromium 会话打开抖音页面,允许人工登录 / 过滑块后继续自动提取 `