diff --git a/README.md b/README.md index 0f25036..3c99925 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,13 @@ N8N_BASE_URL=http://127.0.0.1:5670 - `cli-proxy-api`:`http://127.0.0.1:8317` - 公网入口:`https://storyforge.hyzq.net/` +公网维护常用脚本: + +```bash +./scripts/smoke_public_storyforge.sh +./scripts/deploy_public_storyforge.sh +``` + 首次启动时,如果数据库里还没有 `super_admin`,`collector-service` 会按 `BOOTSTRAP_SUPERADMIN_USERNAME / BOOTSTRAP_SUPERADMIN_PASSWORD / BOOTSTRAP_SUPERADMIN_DISPLAY_NAME` 创建最高权限账号。未配置时不会再自动写入默认口令账号。 diff --git a/collector-service/app/bilibili_features.py b/collector-service/app/bilibili_features.py deleted file mode 100644 index 062e03e..0000000 --- a/collector-service/app/bilibili_features.py +++ /dev/null @@ -1,545 +0,0 @@ -from __future__ import annotations - -import json -from typing import Any - -from fastapi import Depends, HTTPException, Query -from pydantic import BaseModel, Field - - -def _safe_json_dumps(value: Any) -> str: - return json.dumps(value, ensure_ascii=False, separators=(",", ":")) - - -def _first_non_empty(*values: Any) -> str: - for value in values: - if value is None: - continue - if isinstance(value, str): - stripped = value.strip() - if stripped: - return stripped - elif value not in ("", [], {}, ()): - return str(value) - return "" - - -class BilibiliContentSourceCreateRequest(BaseModel): - project_id: str = "" - source_kind: str = "creator_account" - platform: str = "" - handle: str = "" - source_url: str = "" - title: str = "" - local_path: str = "" - metadata: dict[str, Any] = Field(default_factory=dict) - - -class BilibiliContentSourceSyncRequest(BaseModel): - project_id: str = "" - knowledge_base_id: str = "" - assistant_id: str = "" - content_source_id: str = "" - platform: str = "" - handle: str = "" - source_url: str = "" - title: str = "" - analysis_model_profile_id: str = "" - language: str = "auto" - max_items: int = Field(default=5, ge=1, le=20) - skip_existing: bool = True - auto_trigger_analysis: bool = True - - -class BilibiliReviewCreateRequest(BaseModel): - project_id: str = "" - source_job_id: str = "" - assistant_id: str = "" - title: str = "" - platform: str = "bilibili" - content_type: str = "video" - publish_url: str = "" - published_at: str = "" - metrics: dict[str, Any] = Field(default_factory=dict) - verdict: str = "" - highlights: str = "" - next_actions: str = "" - notes: str = "" - - -class BilibiliReviewUpdateRequest(BaseModel): - title: str | None = None - platform: str | None = None - content_type: str | None = None - publish_url: str | None = None - published_at: str | None = None - metrics: dict[str, Any] | None = None - verdict: str | None = None - highlights: str | None = None - next_actions: str | None = None - notes: str | None = None - assistant_id: str | None = None - - -def _is_youtube_url(source_url: str) -> bool: - lowered = source_url.strip().lower() - return "youtube.com" in lowered or "youtu.be" in lowered - - -def _resolve_bilibili_platform(legacy: Any, platform: str, source_url: str = "") -> str: - if _is_youtube_url(source_url): - raise HTTPException(status_code=400, detail="YouTube sources are not supported in the bilibili routes") - - inferred = legacy.infer_platform_from_url(source_url) if source_url.strip() else "" - normalized = legacy.normalize_platform_slug(platform, allow_blank=True) - if not normalized: - normalized = inferred or "bilibili" - - if normalized == "youtube": - raise HTTPException(status_code=400, detail="YouTube sources are not supported in the bilibili routes") - if inferred and inferred not in {"bilibili", "youtube"} and not platform.strip(): - raise HTTPException( - status_code=400, - detail=f"Bilibili routes only accept bilibili sources, not {inferred}", - ) - if normalized != "bilibili": - raise HTTPException( - status_code=400, - detail=f"Bilibili routes only accept bilibili sources, not {normalized}", - ) - return "bilibili" - - -def _content_source_query(legacy: Any, account_id: str, project_id: str | None = None) -> tuple[str, tuple[Any, ...]]: - clauses = ["user_id = ?", "platform = 'bilibili'"] - params: list[Any] = [account_id] - if project_id is not None: - normalized_project = project_id.strip() - if normalized_project: - clauses.append("project_id = ?") - params.append(normalized_project) - else: - clauses.append("(project_id IS NULL OR project_id = '')") - sql = f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY created_at DESC" - return sql, tuple(params) - - -def _job_query( - source_id: str | None = None, - project_id: str | None = None, - limit: int = 50, -) -> tuple[str, tuple[Any, ...]]: - clauses = ["j.user_id = ?", "cs.platform = 'bilibili'"] - params: list[Any] = [] - if source_id: - clauses.append("j.content_source_id = ?") - params.append(source_id) - if project_id is not None: - normalized_project = project_id.strip() - if normalized_project: - clauses.append("j.project_id = ?") - params.append(normalized_project) - else: - clauses.append("(j.project_id IS NULL OR j.project_id = '')") - sql = ( - "SELECT j.* " - "FROM jobs j " - "JOIN content_sources cs ON cs.id = j.content_source_id " - f"WHERE {' AND '.join(clauses)} " - "ORDER BY j.created_at DESC " - "LIMIT ?" - ) - params = [*params] - return sql, tuple([*params, limit]) - - -def _review_query(project_id: str | None = None, limit: int = 50) -> tuple[str, tuple[Any, ...]]: - clauses = ["r.user_id = ?", "r.platform = 'bilibili'"] - params: list[Any] = [] - if project_id is not None: - normalized_project = project_id.strip() - if normalized_project: - clauses.append("r.project_id = ?") - params.append(normalized_project) - else: - clauses.append("(r.project_id IS NULL OR r.project_id = '')") - sql = ( - "SELECT r.* " - "FROM publish_reviews r " - f"WHERE {' AND '.join(clauses)} " - "ORDER BY COALESCE(NULLIF(r.published_at, ''), r.created_at) DESC, r.created_at DESC " - "LIMIT ?" - ) - return sql, tuple([*params, limit]) - - -def _build_sync_result(legacy: Any, row: dict[str, Any], content_source: dict[str, Any]) -> dict[str, Any]: - payload = legacy.job_payload(row) - payload["content_source"] = legacy.content_source_payload(content_source) - return payload - - -def register_bilibili_routes(app: Any, legacy: Any) -> None: - def now() -> str: - return legacy.utc_now() - - def make_id(prefix: str) -> str: - return legacy.make_id(prefix) - - def resolve_project(account: dict[str, Any], project_id: str) -> dict[str, Any]: - return legacy.resolve_target_project(account["id"], project_id or None, username=account["username"]) - - def resolve_kb(account: dict[str, Any], kb_id: str, project_id: str) -> dict[str, Any]: - return legacy.resolve_target_kb(account["id"], kb_id or None, project_id, username=account["username"]) - - def resolve_assistant(account: dict[str, Any], assistant_id: str, project_id: str) -> dict[str, Any] | None: - return legacy.resolve_target_assistant(account["id"], assistant_id or None, project_id) - - def create_or_update_source( - *, - account: dict[str, Any], - request: BilibiliContentSourceCreateRequest, - sync_request: BilibiliContentSourceSyncRequest | None = None, - ) -> dict[str, Any]: - source_url = _first_non_empty(request.source_url, sync_request.source_url if sync_request else "") - _resolve_bilibili_platform(legacy, request.platform or (sync_request.platform if sync_request else ""), source_url) - - project = resolve_project(account, request.project_id or (sync_request.project_id if sync_request else "")) - title = _first_non_empty(request.title, sync_request.title if sync_request else "", request.handle, source_url) - metadata: dict[str, Any] = dict(request.metadata) - metadata.setdefault("platform", "bilibili") - if sync_request: - metadata.update( - { - "sync_mode": "recent_uploads", - "max_items": sync_request.max_items, - "analysis_model_profile_id": sync_request.analysis_model_profile_id, - } - ) - - return legacy.create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind=(request.source_kind or "creator_account").strip(), - platform="bilibili", - handle=request.handle.strip(), - source_url=source_url.strip(), - title=title.strip(), - local_path=request.local_path.strip(), - metadata=metadata, - ) - - async def sync_source( - *, - account: dict[str, Any], - request: BilibiliContentSourceSyncRequest, - content_source: dict[str, Any] | None = None, - ) -> dict[str, Any]: - source_row = content_source - if request.content_source_id.strip(): - source_row = legacy.load_owned_content_source(request.content_source_id.strip(), account["id"]) - - source_url = _first_non_empty( - request.source_url, - (source_row or {}).get("source_url", ""), - ) - _resolve_bilibili_platform( - legacy, - request.platform or (source_row or {}).get("platform", ""), - source_url, - ) - - project_id = request.project_id or (source_row or {}).get("project_id", "") - project = resolve_project(account, project_id) - kb = resolve_kb(account, request.knowledge_base_id, project["id"]) - assistant = resolve_assistant(account, request.assistant_id, project["id"]) - source_title = _first_non_empty( - request.title, - (source_row or {}).get("title", ""), - request.handle, - source_url, - ) - - if source_row and source_row.get("project_id") and source_row["project_id"] != project["id"]: - raise HTTPException(status_code=400, detail="Content source does not belong to the target project") - - if not source_row: - source_row = create_or_update_source( - account=account, - request=BilibiliContentSourceCreateRequest( - project_id=project["id"], - source_kind="creator_account", - platform="bilibili", - handle=request.handle.strip(), - source_url=source_url, - title=source_title, - local_path="", - metadata={ - "sync_mode": "recent_uploads", - "max_items": request.max_items, - "analysis_model_profile_id": request.analysis_model_profile_id, - }, - ), - sync_request=request, - ) - - job_row = legacy.create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="content_source_sync", - line_type="content_source_sync", - workflow_key="content_source_sync_pipeline", - title=f"{source_title} 内容源同步", - language=request.language, - source_url=source_url, - assistant_id=(assistant or {}).get("id"), - content_source_id=source_row["id"], - artifacts={ - "platform": "bilibili", - "source_kind": source_row.get("source_kind", "creator_account"), - "source_title": source_title, - "source_url": source_url, - "max_items": request.max_items, - "skip_existing": request.skip_existing, - "auto_trigger_analysis": request.auto_trigger_analysis, - "analysis_model_profile_id": request.analysis_model_profile_id, - }, - analysis_model_profile_id=request.analysis_model_profile_id, - ) - legacy.update_content_source_metadata( - source_row["id"], - { - "platform": "bilibili", - "last_sync_job_id": job_row["id"], - "last_sync_requested_at": now(), - "max_items": request.max_items, - "analysis_model_profile_id": request.analysis_model_profile_id, - }, - ) - return _build_sync_result(legacy, await legacy.trigger_orchestrated_job(job_row), source_row) - - @app.get("/v2/bilibili/content-sources") - def list_bilibili_content_sources( - project_id: str | None = Query(default=None), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - sql, params = _content_source_query(legacy, account["id"], project_id) - return [legacy.content_source_payload(row) for row in legacy.db.fetch_all(sql, params)] - - @app.post("/v2/bilibili/content-sources") - def create_bilibili_content_source( - request: BilibiliContentSourceCreateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - row = create_or_update_source(account=account, request=request) - return legacy.content_source_payload(row) - - @app.get("/v2/bilibili/content-sources/{source_id}") - def get_bilibili_content_source( - source_id: str, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - row = legacy.load_owned_content_source(source_id, account["id"]) - if row.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili content source not found") - return legacy.content_source_payload(row) - - @app.post("/v2/bilibili/content-sources/{source_id}/sync") - async def sync_bilibili_content_source( - source_id: str, - request: BilibiliContentSourceSyncRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - row = legacy.load_owned_content_source(source_id, account["id"]) - if row.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili content source not found") - return await sync_source(account=account, request=request, content_source=row) - - @app.post("/v2/bilibili/pipelines/content-source-sync") - async def create_bilibili_content_source_sync_job( - request: BilibiliContentSourceSyncRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - return await sync_source(account=account, request=request) - - @app.get("/v2/bilibili/content-sources/{source_id}/jobs") - def list_bilibili_content_source_jobs( - source_id: str, - limit: int = Query(default=50, ge=1, le=200), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - row = legacy.load_owned_content_source(source_id, account["id"]) - if row.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili content source not found") - sql, params = _job_query(source_id=source_id, limit=limit) - rows = legacy.db.fetch_all(sql, (account["id"], *params)) - return [legacy.job_payload(item) for item in rows] - - @app.get("/v2/bilibili/jobs") - def list_bilibili_jobs( - project_id: str | None = Query(default=None), - content_source_id: str | None = Query(default=None), - limit: int = Query(default=50, ge=1, le=200), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - if content_source_id: - row = legacy.load_owned_content_source(content_source_id.strip(), account["id"]) - if row.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili content source not found") - sql, params = _job_query(source_id=content_source_id.strip() if content_source_id else None, project_id=project_id, limit=limit) - rows = legacy.db.fetch_all(sql, (account["id"], *params)) - return [legacy.job_payload(item) for item in rows] - - @app.get("/v2/bilibili/jobs/{job_id}") - def get_bilibili_job( - job_id: str, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - row = legacy.load_owned_job(job_id, account["id"]) - if row.get("content_source_id"): - source = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ? AND user_id = ?", (row["content_source_id"], account["id"])) - if not source or source.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili job not found") - return legacy.job_context_payload(row) - - @app.get("/v2/bilibili/reviews") - def list_bilibili_reviews( - project_id: str | None = Query(default=None), - limit: int = Query(default=50, ge=1, le=200), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - sql, params = _review_query(project_id=project_id, limit=limit) - rows = legacy.db.fetch_all(sql, (account["id"], *params)) - return [legacy.review_payload(item) for item in rows] - - @app.get("/v2/bilibili/reviews/{review_id}") - def get_bilibili_review( - review_id: str, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - row = legacy.load_owned_review(review_id, account["id"]) - if row.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili review not found") - return legacy.review_payload(row) - - @app.post("/v2/bilibili/reviews") - def create_bilibili_review( - request: BilibiliReviewCreateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_job = None - if request.source_job_id.strip(): - source_job = legacy.load_owned_job(request.source_job_id.strip(), account["id"]) - if source_job.get("content_source_id"): - source = legacy.db.fetch_one( - "SELECT * FROM content_sources WHERE id = ? AND user_id = ?", - (source_job["content_source_id"], account["id"]), - ) - if not source or source.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili source job not found") - normalized_platform = _resolve_bilibili_platform(legacy, request.platform, source_job.get("source_url", "") if source_job else "") - requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "") - project = resolve_project(account, requested_project_id) - assistant = resolve_assistant(account, request.assistant_id, project["id"]) - review_id = make_id("review") - title = _first_non_empty(request.title, source_job.get("title", "") if source_job else "", f"{project['name']} 复盘") - timestamp = now() - legacy.db.execute( - """ - INSERT INTO publish_reviews ( - id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, - publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at - ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - review_id, - account["id"], - project["id"], - source_job["id"] if source_job else None, - (assistant or {}).get("id") or None, - title, - normalized_platform, - request.content_type.strip() or "video", - request.publish_url.strip(), - request.published_at.strip(), - _safe_json_dumps(request.metrics), - request.verdict.strip(), - request.highlights.strip(), - request.next_actions.strip(), - request.notes.strip(), - timestamp, - timestamp, - ), - ) - row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) - return legacy.review_payload(row) - - @app.patch("/v2/bilibili/reviews/{review_id}") - def update_bilibili_review( - review_id: str, - request: BilibiliReviewUpdateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - current = legacy.load_owned_review(review_id, account["id"]) - if current.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili review not found") - assistant_id = current.get("assistant_id") or None - if request.assistant_id is not None: - assistant = resolve_assistant(account, request.assistant_id or "", current.get("project_id", "")) - assistant_id = (assistant or {}).get("id") or None - if request.platform is not None: - _resolve_bilibili_platform(legacy, request.platform, current.get("publish_url", "")) - legacy.db.execute( - """ - UPDATE publish_reviews - SET title = ?, platform = ?, content_type = ?, publish_url = ?, published_at = ?, - metrics_json = ?, verdict = ?, highlights = ?, next_actions = ?, notes = ?, - assistant_id = ?, updated_at = ? - WHERE id = ? AND user_id = ? - """, - ( - request.title if request.title is not None else current.get("title", ""), - "bilibili", - request.content_type if request.content_type is not None else current.get("content_type", "video"), - request.publish_url if request.publish_url is not None else current.get("publish_url", ""), - request.published_at if request.published_at is not None else current.get("published_at", ""), - _safe_json_dumps(request.metrics if request.metrics is not None else legacy.parse_json_object(current.get("metrics_json") or "{}")), - request.verdict if request.verdict is not None else current.get("verdict", ""), - request.highlights if request.highlights is not None else current.get("highlights", ""), - request.next_actions if request.next_actions is not None else current.get("next_actions", ""), - request.notes if request.notes is not None else current.get("notes", ""), - assistant_id, - now(), - review_id, - account["id"], - ), - ) - row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) - return legacy.review_payload(row) - - @app.get("/v2/bilibili/content-sources/{source_id}/reviews") - def list_bilibili_content_source_reviews( - source_id: str, - limit: int = Query(default=50, ge=1, le=200), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - row = legacy.load_owned_content_source(source_id, account["id"]) - if row.get("platform") != "bilibili": - raise HTTPException(status_code=404, detail="Bilibili content source not found") - rows = legacy.db.fetch_all( - """ - SELECT r.* - FROM publish_reviews r - JOIN jobs j ON j.id = r.source_job_id - WHERE r.user_id = ? AND r.platform = 'bilibili' AND j.content_source_id = ? - ORDER BY COALESCE(NULLIF(r.published_at, ''), r.created_at) DESC, r.created_at DESC - LIMIT ? - """, - (account["id"], source_id, limit), - ) - return [legacy.review_payload(item) for item in rows] - - -__all__ = ["register_bilibili_routes"] diff --git a/collector-service/app/domestic_platform_features.py b/collector-service/app/domestic_platform_features.py index ed858e9..c197500 100644 --- a/collector-service/app/domestic_platform_features.py +++ b/collector-service/app/domestic_platform_features.py @@ -349,6 +349,15 @@ def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, l "created_at": row["created_at"], } + def _model_profile_payload(row: dict[str, Any]) -> dict[str, Any]: + return { + "id": row["id"], + "name": row["name"], + "model_name": row["model_name"], + "base_url": row["base_url"], + "is_default": bool(row.get("is_default", 0)), + } + def _report_payload(row: dict[str, Any]) -> dict[str, Any]: suggestions = [ { @@ -368,6 +377,8 @@ def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, l return { "id": row["id"], "focus_text": row.get("focus_text", ""), + "model_profile_ids": _parse_json(row.get("model_profile_ids_json") or "[]", []), + "linked_account_ids": _parse_json(row.get("linked_account_ids_json") or "[]", []), "suggestions": suggestions, "created_at": row["created_at"], } @@ -381,10 +392,34 @@ def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, l f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC", (account_row["id"],), ) + recent_searches = legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_similarity_searches WHERE source_account_id = ? ORDER BY created_at DESC LIMIT 5", + (account_row["id"],), + ) return { "account": _account_payload(account_row), + "latest_public_snapshot": None, + "latest_creator_snapshot": None, "recent_reports": [_report_payload(row) for row in reports], "linked_accounts": [_relation_payload(row) for row in relations], + "recent_similarity_searches": [ + { + "id": row["id"], + "prompt_text": row.get("prompt_text", ""), + "context": _parse_json(row.get("context_json") or "{}", {}), + "created_at": row["created_at"], + } + for row in recent_searches + ], + "available_model_profiles": [_model_profile_payload(row) for row in legacy.db.fetch_all( + """ + SELECT * + FROM model_profiles + WHERE owner_account_id IS NULL OR owner_account_id = ? + ORDER BY is_default DESC, created_at ASC + """, + (account_row["user_id"],), + )], } async def _call_reasoning_model(user_id: str, prompt: str, *, system_prompt: str, model_profile_id: str = "", temperature: float = 0.3) -> tuple[str, dict[str, Any]]: @@ -510,6 +545,8 @@ def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, l items.append(_tracking_digest_item(tracked, video)) items.sort(key=lambda item: item.get("created_at", ""), reverse=True) return { + "generated_at": now(), + "since": threshold, "items": items[:limit], "tracked_accounts": tracked_items, "cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""), @@ -524,6 +561,21 @@ def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, l account_row = _require_account(account_id, account["id"]) return _workspace_payload(account_row) + @app.get(f"/v2/{platform}/accounts/{{account_id}}/analysis-reports") + def list_platform_analysis_reports(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]: + account_row = _require_account(account_id, account["id"]) + return _workspace_payload(account_row)["recent_reports"] + + @app.get(f"/v2/{platform}/accounts/{{account_id}}/snapshots") + def list_platform_snapshots(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]: + _require_account(account_id, account["id"]) + return [] + + @app.get(f"/v2/{platform}/accounts/{{account_id}}/creator-fields") + def get_platform_creator_fields(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + _require_account(account_id, account["id"]) + raise HTTPException(status_code=404, detail="No creator-center snapshot found") + @app.get(f"/v2/{platform}/accounts/{{account_id}}/videos") def list_platform_account_videos( account_id: str, @@ -604,8 +656,10 @@ def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, l return { "report_id": report_id, "account_id": account_row["id"], + "created_at": now(), "suggestions": report_payload["suggestions"], "context": context, + "top_video_analyses": [], } @app.post(f"/v2/{platform}/accounts/{{account_id}}/videos/analyze-top") diff --git a/collector-service/app/douyin_features.py b/collector-service/app/douyin_features.py index 8cf30f1..729dcca 100644 --- a/collector-service/app/douyin_features.py +++ b/collector-service/app/douyin_features.py @@ -1385,14 +1385,19 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: high_value = int(stats.get("like") or 0) >= 100 or int(stats.get("play") or 0) >= 5000 or bool(borrowing_points) return { "tracking_id": tracked_item["id"], + "platform": "douyin", "tracked_account_id": tracked_item["tracked_account_id"], + "tracked_account_name": tracked_item["account"]["nickname"], "assistant_id": tracked_item["assistant_id"], "assistant_name": tracked_item["assistant_name"], + "note": tracked_item.get("note", ""), "account": tracked_item["account"], "video": video, "summary": _compact_text(summary, 160), + "summary_text": _compact_text(summary, 160), "borrowing_points": borrowing_points, "is_high_value": high_value, + "created_at": video.get("published_at") or now(), } def _build_tracking_digest(user_id: str, since_value: str = "", limit: int = 24) -> dict[str, Any]: @@ -1418,8 +1423,8 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: return { "generated_at": now(), "since": since_dt.isoformat(), - "cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""), "tracked_accounts": tracked_accounts, + "cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""), "items": items[: max(1, min(limit, 100))] } diff --git a/collector-service/app/kuaishou_features.py b/collector-service/app/kuaishou_features.py deleted file mode 100644 index be081cd..0000000 --- a/collector-service/app/kuaishou_features.py +++ /dev/null @@ -1,381 +0,0 @@ -from __future__ import annotations - -import json -from typing import Any - -from fastapi import Depends, HTTPException, Query -from pydantic import BaseModel, Field - -from .core_main import ( - content_source_payload, - create_content_source, - create_job_record, - job_payload, - load_owned_content_source, - load_owned_job, - make_id, - parse_json_object, - resolve_target_assistant, - resolve_target_kb, - resolve_target_project, - review_payload, - trigger_orchestrated_job, - utc_now, - model_profile_for_account, - db, -) - -KUAISHOU_PLATFORM = "kuaishou" -KUAISHOU_URL_HINTS = ( - "kuaishou.com", - "v.kuaishou.com", - "chenzhongtech.com", -) -YOUTUBE_URL_HINTS = ( - "youtube.com", - "youtu.be", - "m.youtube.com", - "music.youtube.com", -) - - -class KuaishouContentSourceCreateRequest(BaseModel): - project_id: str = "" - source_kind: str = "creator_account" - handle: str = "" - source_url: str = "" - title: str = "" - local_path: str = "" - metadata: dict[str, Any] = Field(default_factory=dict) - - -class KuaishouContentSourceSyncRequest(BaseModel): - project_id: str = "" - knowledge_base_id: str = "" - assistant_id: str = "" - content_source_id: str = "" - handle: str = "" - source_url: str = "" - title: str = "" - analysis_model_profile_id: str = "" - language: str = "auto" - max_items: int = Field(default=5, ge=1, le=20) - skip_existing: bool = True - auto_trigger_analysis: bool = True - - -class KuaishouReviewCreateRequest(BaseModel): - project_id: str = "" - source_job_id: str = "" - assistant_id: str = "" - title: str = "" - content_type: str = "video" - publish_url: str = "" - published_at: str = "" - metrics: dict[str, Any] = Field(default_factory=dict) - verdict: str = "" - highlights: str = "" - next_actions: str = "" - notes: str = "" - - -def _normalize_text(value: str | None) -> str: - return str(value or "").strip() - - -def _is_youtube_url(value: str) -> bool: - normalized = _normalize_text(value).lower() - return any(hint in normalized for hint in YOUTUBE_URL_HINTS) - - -def _is_kuaishou_url(value: str) -> bool: - normalized = _normalize_text(value).lower() - return any(hint in normalized for hint in KUAISHOU_URL_HINTS) - - -def _ensure_kuaishou_url(value: str) -> str: - normalized = _normalize_text(value) - if not normalized: - return "" - if _is_youtube_url(normalized): - raise HTTPException(status_code=400, detail="YouTube URLs are not supported in the Kuaishou routes") - return normalized - - -def _content_source_is_kuaishou(row: dict[str, Any]) -> bool: - if _normalize_text(row.get("platform")).lower() == KUAISHOU_PLATFORM: - return True - return _is_kuaishou_url(row.get("source_url", "")) - - -def _job_is_kuaishou(row: dict[str, Any]) -> bool: - artifacts = parse_json_object(row.get("artifacts_json") or "{}") - source_url = _normalize_text(row.get("source_url")) - if source_url and _is_youtube_url(source_url): - return False - if source_url and _is_kuaishou_url(source_url): - return True - if _normalize_text(artifacts.get("platform")).lower() == KUAISHOU_PLATFORM: - return True - content_source_id = _normalize_text(row.get("content_source_id")) - if content_source_id: - source_row = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (content_source_id,)) - return bool(source_row and _content_source_is_kuaishou(source_row)) - return False - - -def _require_owned_kuaishou_source(source_id: str, account_id: str) -> dict[str, Any]: - row = load_owned_content_source(source_id, account_id) - if not _content_source_is_kuaishou(row): - raise HTTPException(status_code=400, detail="Content source does not belong to the Kuaishou route") - return row - - -def _list_kuaishou_jobs(account_id: str, project_id: str | None = None, limit: int = 50) -> list[dict[str, Any]]: - rows = db.fetch_all( - "SELECT * FROM jobs WHERE user_id = ? ORDER BY created_at DESC LIMIT ?", - (account_id, max(limit, 1) * 10), - ) - items: list[dict[str, Any]] = [] - for row in rows: - if project_id and _normalize_text(row.get("project_id")) != project_id: - continue - if _job_is_kuaishou(row): - items.append(job_payload(row)) - if len(items) >= limit: - break - return items - - -def _list_kuaishou_reviews(account_id: str, project_id: str | None = None, limit: int = 50) -> list[dict[str, Any]]: - clauses = ["user_id = ?", "platform = ?"] - params: list[Any] = [account_id, KUAISHOU_PLATFORM] - if project_id is not None: - normalized = project_id.strip() - if normalized: - clauses.append("project_id = ?") - params.append(normalized) - else: - clauses.append("(project_id IS NULL OR project_id = '')") - sql = f""" - SELECT * FROM publish_reviews - WHERE {' AND '.join(clauses)} - ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC - LIMIT ? - """ - params.append(limit) - return [review_payload(row) for row in db.fetch_all(sql, tuple(params))] - - -def register_kuaishou_routes(app: Any, legacy: Any) -> None: - """Register a small Kuaishou route set on top of the shared collector tables.""" - - @app.get("/v2/kuaishou/content-sources") - def list_kuaishou_content_sources( - project_id: str | None = Query(default=None), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - clauses = ["user_id = ?", "platform = ?"] - params: list[Any] = [account["id"], KUAISHOU_PLATFORM] - if project_id: - resolve_target_project(account["id"], project_id, username=account["username"]) - clauses.append("project_id = ?") - params.append(project_id) - rows = legacy.db.fetch_all( - f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY created_at DESC", - tuple(params), - ) - return [content_source_payload(row) for row in rows] - - @app.post("/v2/kuaishou/content-sources") - def create_kuaishou_content_source_api( - request: KuaishouContentSourceCreateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) - source_url = _ensure_kuaishou_url(request.source_url) - if source_url and _is_youtube_url(source_url): - raise HTTPException(status_code=400, detail="YouTube URLs are not supported in the Kuaishou routes") - row = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind=_normalize_text(request.source_kind) or "creator_account", - platform=KUAISHOU_PLATFORM, - handle=_normalize_text(request.handle), - source_url=source_url, - title=_normalize_text(request.title) or _normalize_text(request.handle) or source_url, - local_path=_normalize_text(request.local_path), - metadata=request.metadata, - ) - return content_source_payload(row) - - @app.post("/v2/kuaishou/pipelines/content-source-sync") - async def create_kuaishou_content_source_sync_job( - request: KuaishouContentSourceSyncRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_row = None - if request.content_source_id.strip(): - source_row = _require_owned_kuaishou_source(request.content_source_id.strip(), account["id"]) - - requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "") - project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - kb = resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"]) - assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - profile = model_profile_for_account(account["id"], request.analysis_model_profile_id or None) - - source_url = _ensure_kuaishou_url( - request.source_url or (source_row or {}).get("source_url", "") - ) - if not source_url: - raise HTTPException(status_code=400, detail="source_url or content_source_id with a Kuaishou URL is required") - - handle = _normalize_text(request.handle or (source_row or {}).get("handle", "")) - source_title = ( - _normalize_text(request.title) - or (source_row or {}).get("title", "").strip() - or handle - or source_url - ) - - if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]: - raise HTTPException(status_code=400, detail="Content source does not belong to target project") - - if not source_row: - source_row = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind="creator_account", - platform=KUAISHOU_PLATFORM, - handle=handle, - source_url=source_url, - title=source_title, - metadata={ - "sync_mode": "recent_uploads", - "max_items": request.max_items, - "analysis_model_profile_id": profile["id"], - }, - ) - - job_row = create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="content_source_sync", - line_type="content_source_sync", - workflow_key="content_source_sync_pipeline", - title=f"{source_title} 内容源同步", - language=request.language, - source_url=source_url, - assistant_id=(assistant or {}).get("id"), - content_source_id=source_row["id"], - artifacts={ - "platform": KUAISHOU_PLATFORM, - "handle": handle, - "source_account_url": source_url, - "source_title": source_title, - "max_items": request.max_items, - "skip_existing": request.skip_existing, - "auto_trigger_analysis": request.auto_trigger_analysis, - }, - analysis_model_profile_id=profile["id"], - ) - legacy.update_content_source_metadata( - source_row["id"], - { - "sync_mode": "recent_uploads", - "max_items": request.max_items, - "analysis_model_profile_id": profile["id"], - "last_sync_job_id": job_row["id"], - "last_sync_requested_at": utc_now(), - }, - ) - return job_payload(await trigger_orchestrated_job(job_row)) - - @app.get("/v2/kuaishou/jobs") - def list_kuaishou_jobs_api( - project_id: str | None = Query(default=None), - limit: int = Query(default=20, ge=1, le=100), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - return _list_kuaishou_jobs(account["id"], project_id=project_id, limit=limit) - - @app.get("/v2/kuaishou/workspace") - def get_kuaishou_workspace( - project_id: str | None = Query(default=None), - limit: int = Query(default=10, ge=1, le=50), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - content_sources = list_kuaishou_content_sources(project_id=project_id, account=account) - reviews = _list_kuaishou_reviews(account["id"], project_id=project_id, limit=limit) - jobs = _list_kuaishou_jobs(account["id"], project_id=project_id, limit=limit) - return { - "platform": KUAISHOU_PLATFORM, - "project_id": project_id or "", - "content_sources": content_sources, - "recent_jobs": jobs, - "recent_reviews": reviews, - "counts": { - "content_sources": len(content_sources), - "jobs": len(jobs), - "reviews": len(reviews), - }, - } - - @app.get("/v2/kuaishou/reviews") - def list_kuaishou_reviews_api( - project_id: str | None = Query(default=None), - limit: int = Query(default=50, ge=1, le=200), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - return _list_kuaishou_reviews(account["id"], project_id=project_id, limit=limit) - - @app.post("/v2/kuaishou/reviews") - def create_kuaishou_review( - request: KuaishouReviewCreateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_job = None - if request.source_job_id.strip(): - source_job = load_owned_job(request.source_job_id.strip(), account["id"]) - if not _job_is_kuaishou(source_job): - raise HTTPException(status_code=400, detail="Source job does not belong to the Kuaishou route") - - requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "") - project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - review_id = make_id("review") - title = request.title.strip() or (source_job.get("title", "") if source_job else "") - if not title: - title = f"{project['name']} 快手复盘" - timestamp = utc_now() - db.execute( - """ - INSERT INTO publish_reviews ( - id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, - publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at - ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - review_id, - account["id"], - project["id"], - source_job["id"] if source_job else None, - (assistant or {}).get("id") or None, - title, - KUAISHOU_PLATFORM, - request.content_type or "video", - _normalize_text(request.publish_url), - _normalize_text(request.published_at), - json.dumps(request.metrics, ensure_ascii=False), - _normalize_text(request.verdict), - _normalize_text(request.highlights), - _normalize_text(request.next_actions), - _normalize_text(request.notes), - timestamp, - timestamp, - ), - ) - row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) - return review_payload(row) diff --git a/collector-service/app/legacy_runtime.py b/collector-service/app/legacy_runtime.py deleted file mode 100644 index dcb1e84..0000000 --- a/collector-service/app/legacy_runtime.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import annotations - -import importlib.machinery -import importlib.util -import sys -import types -from pathlib import Path -from typing import Any - -BASE_DIR = Path(__file__).resolve().parent -PYCACHE_DIR = BASE_DIR / "__pycache__" -LEGACY_PYC_DIR = BASE_DIR / "_legacy_pyc" -SUPPORTED_PYTHON = (3, 11) - -_LEGACY_MODULE: Any | None = None - - -def _ensure_supported_runtime() -> None: - if sys.version_info[:2] != SUPPORTED_PYTHON: - version = ".".join(map(str, sys.version_info[:3])) - required = ".".join(map(str, SUPPORTED_PYTHON)) - raise RuntimeError( - f"Legacy collector bytecode requires Python {required}. Current runtime: {version}." - ) - - -def _ensure_package() -> None: - package = sys.modules.get("app") - if package is None: - package = types.ModuleType("app") - package.__path__ = [str(BASE_DIR)] - sys.modules["app"] = package - - -def _load_sourceless_module(module_name: str, pyc_path: Path) -> Any: - loader = importlib.machinery.SourcelessFileLoader(module_name, str(pyc_path)) - spec = importlib.util.spec_from_loader(module_name, loader) - if spec is None: - raise RuntimeError(f"Unable to create spec for {module_name}") - module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module - loader.exec_module(module) - return module - - -def load_legacy_main() -> Any: - global _LEGACY_MODULE - if _LEGACY_MODULE is not None: - return _LEGACY_MODULE - - _ensure_supported_runtime() - _ensure_package() - - for name in ("database", "fastgpt", "openai_compat"): - full_name = f"app.{name}" - if full_name not in sys.modules: - pyc_dir = LEGACY_PYC_DIR if (LEGACY_PYC_DIR / f"{name}.cpython-311.pyc").exists() else PYCACHE_DIR - _load_sourceless_module(full_name, pyc_dir / f"{name}.cpython-311.pyc") - - legacy_name = "app.main_legacy" - if legacy_name in sys.modules: - _LEGACY_MODULE = sys.modules[legacy_name] - return _LEGACY_MODULE - - main_pyc_dir = LEGACY_PYC_DIR if (LEGACY_PYC_DIR / "main.cpython-311.pyc").exists() else PYCACHE_DIR - _LEGACY_MODULE = _load_sourceless_module(legacy_name, main_pyc_dir / "main.cpython-311.pyc") - _LEGACY_MODULE.__package__ = "app" - return _LEGACY_MODULE diff --git a/collector-service/app/main.py b/collector-service/app/main.py index b686fe8..300b434 100644 --- a/collector-service/app/main.py +++ b/collector-service/app/main.py @@ -4,14 +4,7 @@ from .domestic_platform_features import register_domestic_platform_routes from .douyin_features import register_douyin_routes from .oneliner_features import register_oneliner_routes -try: - from . import core_main as core -except Exception: - # Keep a bytecode-backed fallback so the app can still boot if the - # recovered source baseline is incomplete in this workspace. - from .legacy_runtime import load_legacy_main - - core = load_legacy_main() +from . import core_main as core app = core.app diff --git a/collector-service/app/wechat_video_features.py b/collector-service/app/wechat_video_features.py deleted file mode 100644 index c00c061..0000000 --- a/collector-service/app/wechat_video_features.py +++ /dev/null @@ -1,531 +0,0 @@ -from __future__ import annotations - -import json -from collections import Counter -from typing import Any - -from fastapi import Depends, HTTPException, Query -from pydantic import BaseModel, Field - -# This module is intentionally self-contained because the task only allows -# writes to a new file. To activate it, import `register_wechat_video_routes` -# from `app.main` and call it with `(app, core)`. - -WECHAT_VIDEO_PLATFORM = "wechat_video" -ACCOUNT_SOURCE_KIND = "creator_account" -YOUTUBE_HOST_MARKERS = ("youtube.com", "youtu.be") - - -class WechatVideoAccountSyncRequest(BaseModel): - project_id: str = "" - knowledge_base_id: str = "" - assistant_id: str = "" - content_source_id: str = "" - profile_url: str = "" - handle: str = "" - title: str = "" - analysis_model_profile_id: str = "" - language: str = "auto" - max_items: int = Field(default=5, ge=1, le=20) - skip_existing: bool = True - auto_trigger_analysis: bool = True - - -class WechatVideoReviewCreateRequest(BaseModel): - project_id: str = "" - source_job_id: str = "" - assistant_id: str = "" - title: str = "" - content_type: str = "video" - publish_url: str = "" - published_at: str = "" - metrics: dict[str, Any] = Field(default_factory=dict) - verdict: str = "" - highlights: str = "" - next_actions: str = "" - notes: str = "" - - -def register_wechat_video_routes(app: Any, legacy: Any) -> None: - if getattr(app.state, "wechat_video_routes_registered", False): - return - app.state.wechat_video_routes_registered = True - - def _account_not_found() -> HTTPException: - return HTTPException(status_code=404, detail="WeChat Video account not found") - - def _normalize_wechat_source_url(source_url: str) -> str: - normalized = source_url.strip() - if not normalized: - return "" - lowered = normalized.lower() - if any(marker in lowered for marker in YOUTUBE_HOST_MARKERS): - raise HTTPException(status_code=400, detail="YouTube is not supported by wechat_video routes") - inferred = legacy.infer_platform_from_url(normalized) - if inferred != WECHAT_VIDEO_PLATFORM: - raise HTTPException( - status_code=400, - detail="wechat_video routes only accept channels.weixin.qq.com or mp.weixin.qq.com/s URLs", - ) - return normalized - - def _require_owned_account(source_id: str, user_id: str) -> dict[str, Any]: - row = legacy.load_owned_content_source(source_id, user_id) - if row.get("platform") != WECHAT_VIDEO_PLATFORM or row.get("source_kind") != ACCOUNT_SOURCE_KIND: - raise _account_not_found() - return row - - def _list_sync_job_rows(source_row: dict[str, Any], *, limit: int = 50) -> list[dict[str, Any]]: - return legacy.db.fetch_all( - """ - SELECT * - FROM jobs - WHERE user_id = ? AND content_source_id = ? AND source_type = 'content_source_sync' - ORDER BY created_at DESC - LIMIT ? - """, - (source_row["user_id"], source_row["id"], max(1, limit)), - ) - - def _list_video_job_rows(source_row: dict[str, Any], *, limit: int = 200) -> list[dict[str, Any]]: - sync_rows = _list_sync_job_rows(source_row, limit=max(1, limit)) - if not sync_rows: - return [] - parent_job_ids = [row["id"] for row in sync_rows] - placeholders = ",".join("?" for _ in parent_job_ids) - query = f""" - SELECT * - FROM jobs - WHERE user_id = ? AND source_type = 'video_link' AND parent_job_id IN ({placeholders}) - ORDER BY created_at DESC - """ - params: tuple[Any, ...] = (source_row["user_id"], *parent_job_ids) - return legacy.db.fetch_all(query, params)[: max(1, limit)] - - def _dedupe_latest_video_jobs(rows: list[dict[str, Any]]) -> list[dict[str, Any]]: - deduped: list[dict[str, Any]] = [] - seen_urls: set[str] = set() - for row in rows: - source_url = str(row.get("source_url") or "").strip() - if not source_url or source_url in seen_urls: - continue - seen_urls.add(source_url) - deduped.append(row) - return deduped - - def _fetch_content_source(source_id: str) -> dict[str, Any] | None: - if not source_id: - return None - return legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,)) - - def _load_related_reviews(source_row: dict[str, Any], video_rows: list[dict[str, Any]], *, limit: int = 50) -> list[dict[str, Any]]: - candidate_rows = legacy.db.fetch_all( - """ - SELECT * - FROM publish_reviews - WHERE user_id = ? AND platform = ? - ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC - LIMIT 400 - """, - (source_row["user_id"], WECHAT_VIDEO_PLATFORM), - ) - job_ids = {row["id"] for row in video_rows} - video_urls = {str(row.get("source_url") or "").strip() for row in video_rows if row.get("source_url")} - results: list[dict[str, Any]] = [] - for row in candidate_rows: - source_job_id = str(row.get("source_job_id") or "").strip() - publish_url = str(row.get("publish_url") or "").strip() - if source_job_id and source_job_id in job_ids: - results.append(row) - continue - if publish_url and publish_url in video_urls: - results.append(row) - return results[: max(1, limit)] - - def _load_related_documents(video_rows: list[dict[str, Any]], *, limit: int = 30) -> list[dict[str, Any]]: - kb_ids = {str(row.get("knowledge_base_id") or "").strip() for row in video_rows if row.get("knowledge_base_id")} - video_urls = {str(row.get("source_url") or "").strip() for row in video_rows if row.get("source_url")} - documents: list[dict[str, Any]] = [] - seen_document_ids: set[str] = set() - for kb_id in kb_ids: - for row in legacy.db.fetch_all( - """ - SELECT * - FROM knowledge_documents - WHERE knowledge_base_id = ? - ORDER BY created_at DESC - LIMIT 200 - """, - (kb_id,), - ): - if row["id"] in seen_document_ids: - continue - if str(row.get("source_url") or "").strip() not in video_urls: - continue - seen_document_ids.add(row["id"]) - documents.append(row) - if len(documents) >= limit: - return documents - return documents - - def _build_review_maps(review_rows: list[dict[str, Any]]) -> tuple[dict[str, dict[str, Any]], dict[str, dict[str, Any]]]: - by_job_id: dict[str, dict[str, Any]] = {} - by_url: dict[str, dict[str, Any]] = {} - for row in review_rows: - source_job_id = str(row.get("source_job_id") or "").strip() - publish_url = str(row.get("publish_url") or "").strip() - if source_job_id and source_job_id not in by_job_id: - by_job_id[source_job_id] = row - if publish_url and publish_url not in by_url: - by_url[publish_url] = row - return by_job_id, by_url - - def _build_document_map(document_rows: list[dict[str, Any]]) -> dict[str, dict[str, Any]]: - by_url: dict[str, dict[str, Any]] = {} - for row in document_rows: - source_url = str(row.get("source_url") or "").strip() - if source_url and source_url not in by_url: - by_url[source_url] = row - return by_url - - def _build_account_payload(source_row: dict[str, Any]) -> dict[str, Any]: - payload = legacy.content_source_payload(source_row) - metadata = payload.get("metadata") or {} - latest_sync_job = None - last_sync_job_id = str(metadata.get("last_sync_job_id") or "") - if last_sync_job_id: - latest_sync_job = legacy.db.fetch_one("SELECT * FROM jobs WHERE id = ?", (last_sync_job_id,)) - payload["platform_label"] = legacy.platform_label(WECHAT_VIDEO_PLATFORM) - payload["last_sync_job_id"] = last_sync_job_id - payload["last_sync_completed_at"] = str(metadata.get("last_sync_completed_at") or "") - payload["last_sync_error"] = str(metadata.get("last_sync_error") or "") - payload["last_sync_status"] = str((latest_sync_job or {}).get("status") or "") - payload["sync_mode"] = str(metadata.get("sync_mode") or "recent_uploads") - return payload - - def _build_video_item( - job_row: dict[str, Any], - review_by_job_id: dict[str, dict[str, Any]], - review_by_url: dict[str, dict[str, Any]], - document_by_url: dict[str, dict[str, Any]], - ) -> dict[str, Any]: - source_url = str(job_row.get("source_url") or "").strip() - content_source = _fetch_content_source(str(job_row.get("content_source_id") or "").strip()) - review_row = review_by_job_id.get(job_row["id"]) or review_by_url.get(source_url) - document_row = document_by_url.get(source_url) - artifacts = legacy.parse_job_artifacts(job_row) - return { - "id": job_row["id"], - "title": job_row.get("title", ""), - "status": job_row.get("status", ""), - "source_url": source_url, - "external_id": str(artifacts.get("external_id") or ""), - "origin_sync_job_id": str(artifacts.get("origin_sync_job_id") or ""), - "job": legacy.job_payload(job_row), - "content_source": legacy.content_source_payload(content_source) if content_source else None, - "latest_review": legacy.review_payload(review_row) if review_row else None, - "document": legacy.document_payload(document_row) if document_row else None, - } - - def _build_workspace_payload(source_row: dict[str, Any]) -> dict[str, Any]: - sync_rows = _list_sync_job_rows(source_row, limit=20) - video_rows = _dedupe_latest_video_jobs(_list_video_job_rows(source_row, limit=200)) - review_rows = _load_related_reviews(source_row, video_rows, limit=20) - document_rows = _load_related_documents(video_rows, limit=12) - review_by_job_id, review_by_url = _build_review_maps(review_rows) - document_by_url = _build_document_map(document_rows) - status_counts = Counter(str(row.get("status") or "").strip() or "unknown" for row in video_rows) - latest_sync = legacy.job_context_payload(sync_rows[0]) if sync_rows else None - return { - "account": _build_account_payload(source_row), - "latest_sync_job": latest_sync, - "sync_jobs": [legacy.job_payload(row) for row in sync_rows[:10]], - "videos": { - "total": len(video_rows), - "status_counts": dict(status_counts), - "items": [ - _build_video_item(row, review_by_job_id, review_by_url, document_by_url) - for row in video_rows[:20] - ], - }, - "reviews": [legacy.review_payload(row) for row in review_rows], - "recent_documents": [legacy.document_payload(row) for row in document_rows], - "stats": { - "sync_job_count": len(sync_rows), - "video_job_count": len(video_rows), - "completed_video_count": status_counts.get("completed", 0), - "failed_video_count": status_counts.get("failed", 0), - "review_count": len(review_rows), - "document_count": len(document_rows), - }, - } - - def _update_account_source( - source_row: dict[str, Any], - *, - source_url: str, - title: str, - handle: str, - metadata_updates: dict[str, Any], - ) -> dict[str, Any]: - merged_metadata = legacy.merge_json_field(source_row.get("metadata_json") or "{}", metadata_updates) - legacy.db.execute( - """ - UPDATE content_sources - SET handle = ?, source_url = ?, title = ?, platform = ?, metadata_json = ?, updated_at = ? - WHERE id = ? AND user_id = ? - """, - ( - handle, - source_url, - title, - WECHAT_VIDEO_PLATFORM, - merged_metadata, - legacy.utc_now(), - source_row["id"], - source_row["user_id"], - ), - ) - return legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_row["id"],)) - - def _job_belongs_to_account(job_row: dict[str, Any], source_row: dict[str, Any]) -> bool: - if str(job_row.get("content_source_id") or "").strip(): - content_source = _fetch_content_source(str(job_row.get("content_source_id") or "").strip()) - metadata = (legacy.content_source_payload(content_source).get("metadata") or {}) if content_source else {} - if content_source and str(metadata.get("origin_content_source_id") or "") == source_row["id"]: - return True - if str(job_row.get("parent_job_id") or "").strip(): - parent_row = legacy.db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["parent_job_id"],)) - if parent_row and str(parent_row.get("content_source_id") or "") == source_row["id"]: - return True - return False - - @app.get("/v2/wechat-video/accounts") - def list_wechat_video_accounts( - project_id: str | None = Query(default=None), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - clauses = ["user_id = ?", "platform = ?", "source_kind = ?"] - params: list[Any] = [account["id"], WECHAT_VIDEO_PLATFORM, ACCOUNT_SOURCE_KIND] - if project_id: - project = legacy.resolve_target_project(account["id"], project_id, username=account["username"]) - clauses.append("project_id = ?") - params.append(project["id"]) - rows = legacy.db.fetch_all( - f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY updated_at DESC", - tuple(params), - ) - return [_build_account_payload(row) for row in rows] - - @app.post("/v2/wechat-video/accounts/sync") - async def sync_wechat_video_account( - request: WechatVideoAccountSyncRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_row = None - if request.content_source_id.strip(): - source_row = _require_owned_account(request.content_source_id.strip(), account["id"]) - - source_url = _normalize_wechat_source_url(request.profile_url or (source_row or {}).get("source_url", "")) - if not source_url: - raise HTTPException(status_code=400, detail="profile_url or content_source_id is required") - - requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "") - project = legacy.resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]: - raise HTTPException(status_code=400, detail="Content source does not belong to target project") - - kb = legacy.resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"]) - assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - profile = legacy.model_profile_for_account(account["id"], request.analysis_model_profile_id or None) - handle = request.handle.strip() or (source_row or {}).get("handle", "").strip() - title = request.title.strip() or (source_row or {}).get("title", "").strip() or handle or source_url - metadata_updates = { - "account_type": WECHAT_VIDEO_PLATFORM, - "sync_mode": "recent_uploads", - "max_items": request.max_items, - "analysis_model_profile_id": profile["id"], - "last_sync_error": "", - } - - if not source_row: - source_row = legacy.create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind=ACCOUNT_SOURCE_KIND, - platform=WECHAT_VIDEO_PLATFORM, - handle=handle, - source_url=source_url, - title=title, - metadata=metadata_updates, - ) - else: - source_row = _update_account_source( - source_row, - source_url=source_url, - title=title, - handle=handle, - metadata_updates=metadata_updates, - ) - - job_row = legacy.create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="content_source_sync", - line_type="content_source_sync", - workflow_key="content_source_sync_pipeline", - title=f"{title} 内容源同步", - language=request.language, - source_url=source_url, - assistant_id=(assistant or {}).get("id"), - content_source_id=source_row["id"], - artifacts={ - "platform": WECHAT_VIDEO_PLATFORM, - "handle": handle, - "source_account_url": source_url, - "source_title": title, - "max_items": request.max_items, - "skip_existing": request.skip_existing, - "auto_trigger_analysis": request.auto_trigger_analysis, - }, - analysis_model_profile_id=profile["id"], - ) - legacy.update_content_source_metadata( - source_row["id"], - { - "sync_mode": "recent_uploads", - "max_items": request.max_items, - "analysis_model_profile_id": profile["id"], - "last_sync_job_id": job_row["id"], - "last_sync_requested_at": legacy.utc_now(), - "last_sync_error": "", - }, - ) - queued_row = await legacy.trigger_orchestrated_job(job_row) - source_row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_row["id"],)) - workspace = _build_workspace_payload(source_row) - workspace["sync_job"] = legacy.job_payload(queued_row) - return workspace - - @app.get("/v2/wechat-video/accounts/{account_id}") - def get_wechat_video_account( - account_id: str, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_row = _require_owned_account(account_id, account["id"]) - return _build_workspace_payload(source_row) - - @app.get("/v2/wechat-video/accounts/{account_id}/workspace") - def get_wechat_video_account_workspace( - account_id: str, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_row = _require_owned_account(account_id, account["id"]) - return _build_workspace_payload(source_row) - - @app.get("/v2/wechat-video/accounts/{account_id}/videos") - def list_wechat_video_account_videos( - account_id: str, - limit: int = Query(default=50, ge=1, le=200), - status: str = Query(default=""), - q: str = Query(default=""), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_row = _require_owned_account(account_id, account["id"]) - video_rows = _dedupe_latest_video_jobs(_list_video_job_rows(source_row, limit=max(limit * 4, 200))) - normalized_status = status.strip().lower() - normalized_query = q.strip().lower() - if normalized_status: - video_rows = [row for row in video_rows if str(row.get("status") or "").lower() == normalized_status] - if normalized_query: - video_rows = [ - row - for row in video_rows - if normalized_query in str(row.get("title") or "").lower() - or normalized_query in str(row.get("source_url") or "").lower() - ] - selected_rows = video_rows[:limit] - review_rows = _load_related_reviews(source_row, selected_rows, limit=max(limit, 20)) - document_rows = _load_related_documents(selected_rows, limit=max(limit, 20)) - review_by_job_id, review_by_url = _build_review_maps(review_rows) - document_by_url = _build_document_map(document_rows) - return { - "account": _build_account_payload(source_row), - "total": len(video_rows), - "status_counts": dict(Counter(str(row.get("status") or "").strip() or "unknown" for row in video_rows)), - "items": [ - _build_video_item(row, review_by_job_id, review_by_url, document_by_url) - for row in selected_rows - ], - } - - @app.get("/v2/wechat-video/accounts/{account_id}/reviews") - def list_wechat_video_account_reviews( - account_id: str, - limit: int = Query(default=50, ge=1, le=200), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - source_row = _require_owned_account(account_id, account["id"]) - video_rows = _dedupe_latest_video_jobs(_list_video_job_rows(source_row, limit=200)) - review_rows = _load_related_reviews(source_row, video_rows, limit=limit) - return [legacy.review_payload(row) for row in review_rows] - - @app.post("/v2/wechat-video/accounts/{account_id}/reviews") - def create_wechat_video_review( - account_id: str, - request: WechatVideoReviewCreateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_row = _require_owned_account(account_id, account["id"]) - source_job = None - if request.source_job_id.strip(): - source_job = legacy.load_owned_job(request.source_job_id.strip(), account["id"]) - if not _job_belongs_to_account(source_job, source_row): - raise HTTPException(status_code=400, detail="source_job_id does not belong to the target WeChat Video account") - - requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else source_row.get("project_id", "")) - project = legacy.resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - if source_row.get("project_id") and source_row.get("project_id") != project["id"]: - raise HTTPException(status_code=400, detail="WeChat Video account does not belong to target project") - - assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - publish_url = request.publish_url.strip() or (source_job.get("source_url", "") if source_job else "") - if publish_url: - _normalize_wechat_source_url(publish_url) - title = request.title.strip() or (source_job.get("title", "") if source_job else "") or f"{source_row.get('title', '')} 复盘".strip() - if not title: - title = "微信视频号复盘" - - review_id = legacy.make_id("review") - timestamp = legacy.utc_now() - legacy.db.execute( - """ - INSERT INTO publish_reviews ( - id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, - publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at - ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - review_id, - account["id"], - project["id"], - source_job["id"] if source_job else None, - (assistant or {}).get("id") or None, - title, - WECHAT_VIDEO_PLATFORM, - request.content_type or "video", - publish_url, - request.published_at.strip(), - json.dumps(request.metrics, ensure_ascii=False), - request.verdict.strip(), - request.highlights.strip(), - request.next_actions.strip(), - request.notes.strip(), - timestamp, - timestamp, - ), - ) - row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) - return legacy.review_payload(row) diff --git a/collector-service/app/xiaohongshu_features.py b/collector-service/app/xiaohongshu_features.py deleted file mode 100644 index 5502653..0000000 --- a/collector-service/app/xiaohongshu_features.py +++ /dev/null @@ -1,765 +0,0 @@ -from __future__ import annotations - -import json -import re -from datetime import datetime, timezone -from html import unescape -from typing import Any, Iterable -from urllib.parse import unquote - -import httpx -from fastapi import Depends, HTTPException, Query -from pydantic import BaseModel, Field - -DEFAULT_TIMEOUT = 20.0 -MAX_HTML_SEARCH_BYTES = 2_000_000 -DEFAULT_USER_AGENT = ( - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " - "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" -) -XHS_PLATFORM = "xiaohongshu" - - -class XHSManualPageCapture(BaseModel): - url: str = "" - title: str = "" - payload: dict[str, Any] = Field(default_factory=dict) - - -class XiaohongshuContentSourceCreateRequest(BaseModel): - project_id: str = "" - source_kind: str - handle: str = "" - source_url: str = "" - title: str = "" - local_path: str = "" - metadata: dict[str, Any] = Field(default_factory=dict) - - -class XiaohongshuContentSourceSyncRequest(BaseModel): - project_id: str = "" - knowledge_base_id: str = "" - assistant_id: str = "" - content_source_id: str = "" - source_url: str = "" - handle: str = "" - title: str = "" - language: str = "auto" - max_items: int = Field(default=5, ge=1, le=20) - skip_existing: bool = True - auto_trigger_analysis: bool = True - manual_source_payload: dict[str, Any] | None = None - manual_pages: list[XHSManualPageCapture] = Field(default_factory=list) - discovery_note: str = "" - - -class XiaohongshuReviewCreateRequest(BaseModel): - project_id: str = "" - source_job_id: str = "" - assistant_id: str = "" - title: str = "" - platform: str = XHS_PLATFORM - content_type: str = "note" - publish_url: str = "" - published_at: str = "" - metrics: dict[str, Any] = Field(default_factory=dict) - verdict: str = "" - highlights: str = "" - next_actions: str = "" - notes: str = "" - - -class XiaohongshuReviewUpdateRequest(BaseModel): - title: str | None = None - platform: str | None = None - content_type: str | None = None - publish_url: str | None = None - published_at: str | None = None - metrics: dict[str, Any] | None = None - verdict: str | None = None - highlights: str | None = None - next_actions: str | None = None - notes: str | None = None - assistant_id: str | None = None - - -def _safe_json_dumps(value: Any) -> str: - return json.dumps(value, ensure_ascii=False, separators=(",", ":")) - - -def _first_non_empty(*values: Any) -> str: - for value in values: - if value is None: - continue - if isinstance(value, str): - stripped = value.strip() - if stripped: - return stripped - elif value not in ("", [], {}, ()): - return str(value) - return "" - - -def _dedupe_strings(values: Iterable[str]) -> list[str]: - result: list[str] = [] - seen: set[str] = set() - for value in values: - item = str(value).strip() - if not item: - continue - key = item.lower() - if key in seen: - continue - seen.add(key) - result.append(item) - return result - - -def _compact_text(value: Any, limit: int = 500) -> str: - text = str(value or "").strip() - if len(text) <= limit: - return text - return f"{text[: limit - 1]}…" - - -def _parse_count(value: Any) -> float: - if value is None: - return 0.0 - if isinstance(value, (int, float)): - return float(value) - text = str(value).strip().lower().replace(",", "") - if not text: - return 0.0 - multiplier = 1.0 - if text.endswith("w") or text.endswith("万"): - multiplier = 10_000.0 - text = text[:-1] - elif text.endswith("亿"): - multiplier = 100_000_000.0 - text = text[:-1] - text = text.replace("+", "") - match = re.search(r"-?\d+(?:\.\d+)?", text) - if not match: - return 0.0 - try: - return float(match.group()) * multiplier - except ValueError: - return 0.0 - - -def _normalize_timestamp(value: Any) -> str | None: - if value in (None, "", 0, "0"): - return None - if isinstance(value, str): - stripped = value.strip() - if not stripped: - return None - if re.match(r"^\d{4}-\d{2}-\d{2}T", stripped): - return stripped - if stripped.isdigit(): - value = int(stripped) - else: - return stripped - if isinstance(value, (int, float)): - ts = float(value) - if ts > 10_000_000_000: - ts /= 1000.0 - try: - return datetime.fromtimestamp(ts, tz=timezone.utc).replace(microsecond=0).isoformat() - except Exception: - return None - return None - - -def _extract_hashtags(*texts: str) -> list[str]: - tags: list[str] = [] - for text in texts: - if not text: - continue - tags.extend(match.group(1) for match in re.finditer(r"#([\w\u4e00-\u9fff]+)", text)) - return _dedupe_strings(tags) - - -def _extract_keywords(*texts: str) -> list[str]: - candidates: list[str] = [] - for text in texts: - if not text: - continue - candidates.extend(_extract_hashtags(text)) - candidates.extend(re.findall(r"[\u4e00-\u9fff]{2,8}", text)) - candidates.extend(re.findall(r"[A-Za-z][A-Za-z0-9_]{2,20}", text)) - stop_words = { - "小红书", - "笔记", - "内容", - "账号", - "发布", - "更多", - "关注", - "用户", - "xhs", - "xiaohongshu", - } - return _dedupe_strings(item for item in candidates if item.lower() not in stop_words) - - -def _walk_json(value: Any) -> Iterable[dict[str, Any]]: - if isinstance(value, dict): - yield value - for child in value.values(): - yield from _walk_json(child) - elif isinstance(value, list): - for child in value: - yield from _walk_json(child) - - -def _extract_json_objects_from_text(text: str) -> list[Any]: - decoder = json.JSONDecoder() - objects: list[Any] = [] - seen: set[str] = set() - if not text: - return objects - - candidates = [text, unquote(text), unescape(text), unescape(unquote(text))] - for candidate in candidates: - snippet = candidate[:MAX_HTML_SEARCH_BYTES] - for match in re.finditer(r"[\{\[]", snippet): - try: - obj, _ = decoder.raw_decode(snippet[match.start() :]) - except Exception: - continue - marker = _safe_json_dumps(obj) - if marker in seen: - continue - seen.add(marker) - objects.append(obj) - if len(objects) >= 50: - return objects - return objects - - -def _extract_json_blobs_from_html(html: str) -> list[dict[str, Any]]: - blobs: list[dict[str, Any]] = [] - seen: set[str] = set() - for attrs, content in re.findall(r"]*)>(.*?)", html, re.IGNORECASE | re.DOTALL): - script_id_match = re.search(r'id=["\']([^"\']+)["\']', attrs, re.IGNORECASE) - script_id = script_id_match.group(1) if script_id_match else "" - for obj in _extract_json_objects_from_text(content.strip()): - marker = _safe_json_dumps(obj) - if marker in seen: - continue - seen.add(marker) - blobs.append({"script_id": script_id, "payload": obj}) - return blobs - - -async def _fetch_html(url: str, cookie: str = "") -> tuple[str, str]: - headers = { - "User-Agent": DEFAULT_USER_AGENT, - "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", - } - if cookie.strip(): - headers["Cookie"] = cookie.strip() - async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT, follow_redirects=True) as client: - response = await client.get(url, headers=headers) - response.raise_for_status() - return str(response.url), response.text - - -def _note_candidate_score(value: dict[str, Any]) -> int: - score = 0 - if any(key in value for key in ("note_id", "noteId", "id", "post_id")): - score += 2 - if any(key in value for key in ("title", "desc", "content", "text", "note")): - score += 2 - if any(key in value for key in ("author", "user", "owner")): - score += 2 - if "stats" in value and isinstance(value["stats"], dict): - score += 2 - return score - - -def _extract_note_candidates(payload: Any) -> list[dict[str, Any]]: - candidates: list[dict[str, Any]] = [] - for item in _walk_json(payload): - if _note_candidate_score(item) >= 4: - candidates.append(item) - for key in ("author", "user", "owner"): - child = item.get(key) - if isinstance(child, dict) and _note_candidate_score(child) >= 3: - candidates.append(child) - return candidates - - -def _normalize_note_candidate(candidate: dict[str, Any], fallback_url: str = "") -> dict[str, Any]: - stats_source = candidate.get("stats") if isinstance(candidate.get("stats"), dict) else {} - author = candidate.get("author") if isinstance(candidate.get("author"), dict) else {} - if not author and isinstance(candidate.get("user"), dict): - author = candidate["user"] - cover = candidate.get("cover") or candidate.get("image") or candidate.get("images") - if isinstance(cover, list) and cover: - cover = cover[0] - if isinstance(cover, dict): - cover = _first_non_empty( - cover.get("url_list", [""])[0] if isinstance(cover.get("url_list"), list) else "", - cover.get("url"), - ) - return { - "note_id": _first_non_empty(candidate.get("note_id"), candidate.get("noteId"), candidate.get("id"), candidate.get("post_id")), - "title": _first_non_empty(candidate.get("title"), candidate.get("desc"), candidate.get("content"), candidate.get("text")), - "content": _first_non_empty(candidate.get("content"), candidate.get("desc"), candidate.get("text"), candidate.get("note")), - "author_name": _first_non_empty(author.get("nickname"), author.get("name"), candidate.get("nickname")), - "author_url": _first_non_empty(author.get("profile_url"), candidate.get("profile_url")), - "share_url": _first_non_empty(candidate.get("share_url"), candidate.get("url"), fallback_url), - "cover_url": _first_non_empty(cover), - "published_at": _normalize_timestamp(candidate.get("publish_time") or candidate.get("created_at") or candidate.get("create_time")), - "tags": _extract_hashtags( - _first_non_empty(candidate.get("title")), - _first_non_empty(candidate.get("desc"), candidate.get("content")), - ), - "stats": { - "like": _parse_count(stats_source.get("like_count") or stats_source.get("liked_count") or candidate.get("like_count")), - "comment": _parse_count(stats_source.get("comment_count") or candidate.get("comment_count")), - "collect": _parse_count(stats_source.get("collect_count") or candidate.get("collect_count")), - "share": _parse_count(stats_source.get("share_count") or candidate.get("share_count")), - }, - "raw": candidate, - } - - -def _extract_notes(payloads: Iterable[Any]) -> list[dict[str, Any]]: - notes: list[dict[str, Any]] = [] - seen: set[str] = set() - for payload in payloads: - for candidate in _extract_note_candidates(payload): - normalized = _normalize_note_candidate(candidate) - dedupe_key = normalized["note_id"] or normalized["share_url"] or normalized["title"] - if not dedupe_key or dedupe_key in seen: - continue - seen.add(dedupe_key) - notes.append(normalized) - notes.sort( - key=lambda item: ( - item["stats"]["like"] + item["stats"]["comment"] * 3 + item["stats"]["collect"] * 2 + item["stats"]["share"] * 4 - ), - reverse=True, - ) - return notes - - -def _is_xhs_source_row(row: dict[str, Any]) -> bool: - platform = str(row.get("platform", "") or "").strip().lower() - if platform == XHS_PLATFORM: - return True - source_url = str(row.get("source_url", "") or "") - normalized = source_url.strip().lower() - return "xiaohongshu.com" in normalized or "xhslink.com" in normalized - - -def _job_matches_platform(row: dict[str, Any], legacy: Any) -> bool: - if row.get("content_source_id"): - source = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["content_source_id"],)) - if source: - return _is_xhs_source_row(source) - source_url = str(row.get("source_url") or "") - return "xiaohongshu.com" in source_url.lower() or "xhslink.com" in source_url.lower() - - -def _review_matches_platform(row: dict[str, Any], legacy: Any) -> bool: - return str(row.get("platform", "") or "").strip().lower() == XHS_PLATFORM - - -def _normalize_platform(value: str | None) -> str: - return str(value or "").strip().lower() - - -def _require_xhs_platform(value: str | None) -> str: - normalized = _normalize_platform(value or XHS_PLATFORM) - if normalized != XHS_PLATFORM: - raise HTTPException(status_code=400, detail="Xiaohongshu routes only support the xiaohongshu platform") - return normalized - - -def register_xiaohongshu_routes(app: Any, legacy: Any) -> None: - def now() -> str: - return legacy.utc_now() - - def make_id(prefix: str) -> str: - return legacy.make_id(prefix) - - def _content_source_row_or_404(source_id: str, account_id: str) -> dict[str, Any]: - row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ? AND user_id = ?", (source_id, account_id)) - if not row: - raise HTTPException(status_code=404, detail="Content source not found") - if not _is_xhs_source_row(row): - raise HTTPException(status_code=404, detail="Content source not found") - return row - - def _xhs_job_payload(row: dict[str, Any]) -> dict[str, Any]: - payload = legacy.job_payload(row) - if row.get("content_source_id"): - source_row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["content_source_id"],)) - if source_row and _is_xhs_source_row(source_row): - payload["content_source"] = legacy.content_source_payload(source_row) - return payload - - def _xhs_review_payload(row: dict[str, Any]) -> dict[str, Any]: - payload = legacy.review_payload(row) - if payload.get("platform", "") != XHS_PLATFORM: - payload["platform"] = XHS_PLATFORM - return payload - - async def _collect_public_source( - source_url: str, - manual_payload: dict[str, Any] | None, - manual_pages: list[XHSManualPageCapture], - ) -> dict[str, Any]: - source_url = source_url.strip() - blobs: list[dict[str, Any]] = [] - errors: list[str] = [] - - if manual_payload: - blobs.append({"script_id": "manual_source_payload", "payload": manual_payload}) - - for page in manual_pages: - blobs.append({ - "script_id": "manual_page_payload", - "url": page.url, - "title": page.title, - "payload": page.payload, - }) - - if source_url: - try: - final_url, html = await _fetch_html(source_url) - source_url = final_url - blobs.extend(_extract_json_blobs_from_html(html)) - except Exception as exc: - errors.append(f"source_fetch_failed: {exc}") - - payloads = [item["payload"] for item in blobs] - notes = _extract_notes(payloads) - source_title = _first_non_empty( - manual_payload.get("title", "") if manual_payload else "", - *(item.get("title", "") for item in notes[:3]), - source_url, - ) - return { - "source_url": source_url, - "title": source_title, - "notes": notes, - "raw_pages": blobs, - "errors": errors, - } - - @app.get("/v2/xiaohongshu/content-sources") - def list_content_sources( - project_id: str | None = Query(default=None), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - clauses = ["user_id = ?", "platform = ?"] - params: list[Any] = [account["id"], XHS_PLATFORM] - if project_id is not None: - normalized_project = project_id.strip() - if normalized_project: - clauses.append("project_id = ?") - params.append(normalized_project) - else: - clauses.append("(project_id IS NULL OR project_id = '')") - rows = legacy.db.fetch_all( - f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY created_at DESC", - tuple(params), - ) - return [legacy.content_source_payload(row) for row in rows] - - @app.post("/v2/xiaohongshu/content-sources") - def create_content_source_api( - request: XiaohongshuContentSourceCreateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - project = legacy.resolve_target_project(account["id"], request.project_id or None, username=account["username"]) - row = legacy.create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind=request.source_kind.strip(), - platform=XHS_PLATFORM, - handle=request.handle.strip(), - source_url=request.source_url.strip(), - title=request.title.strip(), - local_path=request.local_path.strip(), - metadata={ - **request.metadata, - "platform_label": "小红书", - "platform": XHS_PLATFORM, - }, - ) - return legacy.content_source_payload(row) - - @app.get("/v2/xiaohongshu/content-sources/{source_id}") - def get_content_source(source_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: - row = _content_source_row_or_404(source_id, account["id"]) - return legacy.content_source_payload(row) - - @app.post("/v2/xiaohongshu/content-sources/sync") - async def sync_content_source( - request: XiaohongshuContentSourceSyncRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_row = None - if request.content_source_id.strip(): - source_row = _content_source_row_or_404(request.content_source_id.strip(), account["id"]) - - requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "") - project = legacy.resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - kb = legacy.resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"]) - assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - source_url = (request.source_url or (source_row or {}).get("source_url") or "").strip() - if not source_url and not source_row: - raise HTTPException(status_code=400, detail="source_url or content_source_id is required") - - if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]: - raise HTTPException(status_code=400, detail="Content source does not belong to target project") - - if source_row and not _is_xhs_source_row(source_row): - raise HTTPException(status_code=400, detail="Content source is not scoped to Xiaohongshu") - - source_kind = (source_row or {}).get("source_kind", "creator_account") - handle = (request.handle or (source_row or {}).get("handle", "")).strip() - source_title = ( - request.title.strip() - or (source_row or {}).get("title", "").strip() - or handle - or source_url - ) - - if not source_row: - source_row = legacy.create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind=source_kind or "creator_account", - platform=XHS_PLATFORM, - handle=handle, - source_url=source_url, - title=source_title, - metadata={ - "platform": XHS_PLATFORM, - "platform_label": "小红书", - "sync_mode": "recent_notes", - "max_items": request.max_items, - }, - ) - - public_data = await _collect_public_source(source_url, request.manual_source_payload, request.manual_pages) - note_count = len(public_data["notes"]) - top_notes = [ - { - "note_id": item["note_id"], - "title": _compact_text(item["title"], 120), - "content": _compact_text(item["content"], 180), - "author_name": item["author_name"], - "published_at": item["published_at"], - "stats": item["stats"], - "tags": item["tags"][:6], - } - for item in public_data["notes"][: request.max_items] - ] - - job_row = legacy.create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="content_source_sync", - line_type="content_source_sync", - workflow_key="content_source_sync_pipeline", - title=f"{source_title} 内容源同步", - language=request.language, - source_url=source_url, - assistant_id=(assistant or {}).get("id"), - content_source_id=source_row["id"], - artifacts={ - "platform": XHS_PLATFORM, - "handle": handle, - "source_account_url": source_url, - "source_title": source_title, - "skip_existing": request.skip_existing, - "auto_trigger_analysis": request.auto_trigger_analysis, - "max_items": request.max_items, - "note_count": note_count, - "top_notes": top_notes, - "raw_pages": public_data["raw_pages"], - "errors": public_data["errors"], - "discovery_note": request.discovery_note.strip(), - }, - analysis_model_profile_id="", - ) - - legacy.update_content_source_metadata( - source_row["id"], - { - "platform": XHS_PLATFORM, - "platform_label": "小红书", - "sync_mode": "recent_notes", - "max_items": request.max_items, - "note_count": note_count, - "last_sync_job_id": job_row["id"], - "last_sync_requested_at": now(), - }, - ) - return legacy.job_payload(await legacy.trigger_orchestrated_job(job_row)) - - @app.get("/v2/xiaohongshu/jobs") - def list_jobs( - parent_job_id: str | None = Query(default=None), - line_type: str | None = Query(default=None), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - clauses = ["user_id = ?"] - params: list[Any] = [account["id"]] - if parent_job_id is not None: - normalized_parent = parent_job_id.strip() - if normalized_parent: - clauses.append("parent_job_id = ?") - params.append(normalized_parent) - else: - clauses.append("(parent_job_id IS NULL OR parent_job_id = '')") - if line_type: - clauses.append("line_type = ?") - params.append(line_type.strip()) - rows = legacy.db.fetch_all( - f"SELECT * FROM jobs WHERE {' AND '.join(clauses)} ORDER BY created_at DESC", - tuple(params), - ) - return [_xhs_job_payload(row) for row in rows if _job_matches_platform(row, legacy)] - - @app.get("/v2/xiaohongshu/jobs/{job_id}") - def get_job(job_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: - row = legacy.db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"])) - if not row or not _job_matches_platform(row, legacy): - raise HTTPException(status_code=404, detail="Job not found") - return _xhs_job_payload(row) - - @app.get("/v2/xiaohongshu/jobs/{job_id}/events") - def get_job_events(job_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]: - row = legacy.db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"])) - if not row or not _job_matches_platform(row, legacy): - raise HTTPException(status_code=404, detail="Job not found") - return [ - legacy.job_event_payload(item) - for item in legacy.db.fetch_all("SELECT * FROM job_events WHERE job_id = ? ORDER BY created_at ASC", (job_id,)) - ] - - @app.get("/v2/xiaohongshu/reviews") - def list_reviews( - project_id: str | None = Query(default=None), - limit: int = Query(default=50, ge=1, le=200), - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> list[dict[str, Any]]: - clauses = ["user_id = ?", "platform = ?"] - params: list[Any] = [account["id"], XHS_PLATFORM] - if project_id is not None: - normalized_project = project_id.strip() - if normalized_project: - clauses.append("project_id = ?") - params.append(normalized_project) - else: - clauses.append("(project_id IS NULL OR project_id = '')") - sql = ( - f"SELECT * FROM publish_reviews WHERE {' AND '.join(clauses)} " - "ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC LIMIT ?" - ) - params.append(limit) - return [_xhs_review_payload(row) for row in legacy.db.fetch_all(sql, tuple(params))] - - @app.post("/v2/xiaohongshu/reviews") - def create_review( - request: XiaohongshuReviewCreateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - source_job = None - if request.source_job_id.strip(): - source_job = legacy.load_owned_job(request.source_job_id.strip(), account["id"]) - if not _job_matches_platform(source_job, legacy): - raise HTTPException(status_code=404, detail="Job not found") - requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "") - project = legacy.resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - review_id = make_id("review") - title = request.title.strip() or (source_job.get("title", "") if source_job else "") - if not title: - title = f"{project['name']} 复盘" - timestamp = now() - normalized_platform = _require_xhs_platform(request.platform) - legacy.db.execute( - """ - INSERT INTO publish_reviews ( - id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, - publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at - ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - review_id, - account["id"], - project["id"], - source_job["id"] if source_job else None, - (assistant or {}).get("id") or None, - title, - normalized_platform, - request.content_type.strip() or "note", - request.publish_url.strip(), - request.published_at.strip(), - _safe_json_dumps(request.metrics), - request.verdict.strip(), - request.highlights.strip(), - request.next_actions.strip(), - request.notes.strip(), - timestamp, - timestamp, - ), - ) - row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) - return _xhs_review_payload(row) - - @app.patch("/v2/xiaohongshu/reviews/{review_id}") - def update_review( - review_id: str, - request: XiaohongshuReviewUpdateRequest, - account: dict[str, Any] = Depends(legacy.require_approved), - ) -> dict[str, Any]: - current = legacy.load_owned_review(review_id, account["id"]) - if not _review_matches_platform(current, legacy): - raise HTTPException(status_code=404, detail="Review not found") - assistant_id = current.get("assistant_id") or None - if request.assistant_id is not None: - assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, current.get("project_id", "")) - assistant_id = (assistant or {}).get("id") or None - normalized_platform = current.get("platform", XHS_PLATFORM) - if request.platform is not None: - normalized_platform = _require_xhs_platform(request.platform) - legacy.db.execute( - """ - UPDATE publish_reviews - SET title = ?, platform = ?, content_type = ?, publish_url = ?, published_at = ?, - metrics_json = ?, verdict = ?, highlights = ?, next_actions = ?, notes = ?, - assistant_id = ?, updated_at = ? - WHERE id = ? AND user_id = ? - """, - ( - request.title if request.title is not None else current.get("title", ""), - normalized_platform, - request.content_type if request.content_type is not None else current.get("content_type", "note"), - request.publish_url if request.publish_url is not None else current.get("publish_url", ""), - request.published_at if request.published_at is not None else current.get("published_at", ""), - _safe_json_dumps(request.metrics if request.metrics is not None else legacy.parse_json_object(current.get("metrics_json") or "{}")), - request.verdict if request.verdict is not None else current.get("verdict", ""), - request.highlights if request.highlights is not None else current.get("highlights", ""), - request.next_actions if request.next_actions is not None else current.get("next_actions", ""), - request.notes if request.notes is not None else current.get("notes", ""), - assistant_id, - now(), - review_id, - account["id"], - ), - ) - row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) - return _xhs_review_payload(row) diff --git a/deploy/STORYFORGE_PUBLIC_GATEWAY.md b/deploy/STORYFORGE_PUBLIC_GATEWAY.md index 863d639..1a3e28e 100644 --- a/deploy/STORYFORGE_PUBLIC_GATEWAY.md +++ b/deploy/STORYFORGE_PUBLIC_GATEWAY.md @@ -7,6 +7,15 @@ 3. 云服务器本地 `collector-service` 直接承接业务 API 与数据库 4. 本机通过 SSH 反向隧道只桥接本地和局域网执行引擎到云服务器 +当前已验证的 SSH 维护入口: + +- `ubuntu@111.231.132.51` +- 公网部署目录:`/home/ubuntu/storyforge` +- systemd 服务: + - `storyforge-web-v4` + - `storyforge-collector` + - `nginx` + ## 端口映射 - 云服务器 `127.0.0.1:8081` -> 云服务器本地 `collector-service` @@ -44,3 +53,18 @@ - 本地桥接断开时,相关执行引擎会不可用,但登录和基础业务 API 仍可用 - 这仍是混合部署测试架构,不是最终完全云原生部署 + +## 标准化发布与回归 + +仓库内已经补了两个标准脚本: + +```bash +./scripts/deploy_public_storyforge.sh +./scripts/smoke_public_storyforge.sh +``` + +说明: + +- `deploy_public_storyforge.sh` 会备份远端 `web/storyforge-web-v4`,同步当前仓库的前端和 `collector-service/app`,重启 `storyforge-web-v4` / `storyforge-collector`,最后做公网 smoke。 +- `smoke_public_storyforge.sh` 会检查公网 `/healthz`、`/`、`/assets/app.js` 和 `/openapi.json`,确认最新 Web bundle 与多平台路由都已经对外可见。 +- 默认 SSH 口令可通过 `STORYFORGE_PUBLIC_PASSWORD` 传入,或从 macOS Keychain 的 `STORYFORGE_PUBLIC_KEYCHAIN_SERVICE` 读取;当前本机可沿用现有 `ai-glasses-debug-ssh` 条目。 diff --git a/docs/MVP_STATUS_2026-03-18.md b/docs/MVP_STATUS_2026-03-18.md index 1969229..2ffbb4a 100644 --- a/docs/MVP_STATUS_2026-03-18.md +++ b/docs/MVP_STATUS_2026-03-18.md @@ -1,7 +1,7 @@ # StoryForge MVP 状态 日期:2026-03-18 -更新:2026-03-20 +更新:2026-03-26 ## 已跑通或已完成代码接通 @@ -19,6 +19,11 @@ - `upload_video -> source_job_id -> cutvideo` 自动 staging 闭环 - `collector` live 运行态已从临时源码挂载切回 `StoryForge-gitea` 正式镜像 - live `collector` 已挂出 `/v2/douyin/*` 能力并通过认证接口验证 +- 多平台工作台响应契约已对齐,`domestic_platform_features.py` 统一补出 `latest_public_snapshot`、`latest_creator_snapshot`、`recent_reports`、`recent_similarity_searches`、`available_model_profiles` 和更一致的 tracking digest envelope +- `douyin` tracking digest 已补齐 `generated_at` / `since` 等与多平台一致的包裹字段,便于前端统一消费 +- `collector-service/app/main.py` 已收口到源码主线,不再保留 `legacy_runtime` fallback +- 已删除未接入主应用的旧 `xiaohongshu_features / bilibili_features / kuaishou_features / wechat_video_features / legacy_runtime` 残留模块,后端只保留当前 live 主线 +- `scripts/smoke_business.sh` 已扩展为多平台最小 smoke,可同时验证 `douyin / xiaohongshu / bilibili / kuaishou / wechat_video` 的列表、workspace 和 tracking digest 形状 - `douyin` 支持从分享文案中提取 `profile_url`,并在 public 页面命中抖音反爬挑战时返回明确诊断 - `douyin` 手工 payload 导入与账号分析链路已跑通 - `douyin` 浏览器辅助采集工具已接入,可用真实 Playwright Chromium 会话采集主页 / 视频页并直接调用现有 `/v2/douyin/accounts/sync` diff --git a/scripts/deploy_public_storyforge.sh b/scripts/deploy_public_storyforge.sh new file mode 100755 index 0000000..cae6044 --- /dev/null +++ b/scripts/deploy_public_storyforge.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(CDPATH= cd -- "$(dirname "$0")/.." && pwd)" + +HOST="${STORYFORGE_PUBLIC_HOST:-111.231.132.51}" +USER_NAME="${STORYFORGE_PUBLIC_USER:-ubuntu}" +PORT="${STORYFORGE_PUBLIC_PORT:-22}" +BASE_URL="${STORYFORGE_PUBLIC_BASE_URL:-https://storyforge.hyzq.net}" +REMOTE_BASE="${STORYFORGE_PUBLIC_REMOTE_BASE:-/home/ubuntu/storyforge}" +KEYCHAIN_SERVICE="${STORYFORGE_PUBLIC_KEYCHAIN_SERVICE:-ai-glasses-debug-ssh}" +SYNC_COLLECTOR="${STORYFORGE_PUBLIC_SYNC_COLLECTOR:-1}" +CURL_MAX_TIME="${STORYFORGE_PUBLIC_CURL_MAX_TIME:-60}" + +need_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "missing required command: $1" >&2 + exit 1 + fi +} + +need_cmd rsync +need_cmd ssh +need_cmd curl + +resolve_password() { + if [ -n "${STORYFORGE_PUBLIC_PASSWORD:-}" ]; then + printf '%s' "${STORYFORGE_PUBLIC_PASSWORD}" + return 0 + fi + if [ -n "$KEYCHAIN_SERVICE" ] && command -v security >/dev/null 2>&1; then + security find-generic-password -a "$USER_NAME" -s "$KEYCHAIN_SERVICE" -w 2>/dev/null || true + return 0 + fi + return 0 +} + +PASSWORD="$(resolve_password)" +SSH_OPTS=(-p "$PORT" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null) +RSYNC_RSH=(ssh "${SSH_OPTS[@]}") + +run_ssh() { + if [ -n "$PASSWORD" ]; then + need_cmd sshpass + SSHPASS="$PASSWORD" sshpass -e ssh "${SSH_OPTS[@]}" "$USER_NAME@$HOST" "$@" + else + ssh "${SSH_OPTS[@]}" "$USER_NAME@$HOST" "$@" + fi +} + +run_rsync() { + if [ -n "$PASSWORD" ]; then + need_cmd sshpass + SSHPASS="$PASSWORD" sshpass -e rsync -az --delete -e "$(printf '%q ' "${RSYNC_RSH[@]}")" "$@" + else + rsync -az --delete -e "$(printf '%q ' "${RSYNC_RSH[@]}")" "$@" + fi +} + +echo "[1/6] backup remote web" +run_ssh "mkdir -p '$REMOTE_BASE/backups'; ts=\$(date +%Y%m%d-%H%M%S); tar -czf '$REMOTE_BASE/backups/storyforge-web-v4-'\$ts'.tgz' -C '$REMOTE_BASE/web' storyforge-web-v4 && echo web-backup:'$REMOTE_BASE/backups/storyforge-web-v4-'\$ts'.tgz'" + +if [ "$SYNC_COLLECTOR" = "1" ]; then + echo "[2/6] backup remote collector app" + run_ssh "mkdir -p '$REMOTE_BASE/backups'; ts=\$(date +%Y%m%d-%H%M%S); tar -czf '$REMOTE_BASE/backups/storyforge-collector-app-'\$ts'.tgz' -C '$REMOTE_BASE/collector-service' app && echo collector-backup:'$REMOTE_BASE/backups/storyforge-collector-app-'\$ts'.tgz'" +else + echo "[2/6] skip collector backup" +fi + +echo "[3/6] sync web/storyforge-web-v4" +run_rsync "$ROOT/web/storyforge-web-v4/" "$USER_NAME@$HOST:$REMOTE_BASE/web/storyforge-web-v4/" + +if [ "$SYNC_COLLECTOR" = "1" ]; then + echo "[4/6] sync collector-service/app" + if [ -n "$PASSWORD" ]; then + need_cmd sshpass + SSHPASS="$PASSWORD" sshpass -e rsync -az --delete \ + --exclude '__pycache__/' \ + --exclude '*.pyc' \ + -e "$(printf '%q ' "${RSYNC_RSH[@]}")" \ + "$ROOT/collector-service/app/" \ + "$USER_NAME@$HOST:$REMOTE_BASE/collector-service/app/" + else + rsync -az --delete \ + --exclude '__pycache__/' \ + --exclude '*.pyc' \ + -e "$(printf '%q ' "${RSYNC_RSH[@]}")" \ + "$ROOT/collector-service/app/" \ + "$USER_NAME@$HOST:$REMOTE_BASE/collector-service/app/" + fi +else + echo "[4/6] skip collector sync" +fi + +echo "[5/6] restart remote services" +if [ "$SYNC_COLLECTOR" = "1" ]; then + run_ssh "sudo systemctl restart storyforge-collector storyforge-web-v4 && sleep 2 && systemctl is-active storyforge-collector storyforge-web-v4" +else + run_ssh "sudo systemctl restart storyforge-web-v4 && sleep 2 && systemctl is-active storyforge-web-v4" +fi + +echo "[6/6] verify public health" +curl -fsS --max-time "$CURL_MAX_TIME" "$BASE_URL/healthz" >/dev/null +"$ROOT/scripts/smoke_public_storyforge.sh" + +echo "public deploy finished: $BASE_URL" diff --git a/scripts/smoke_business.sh b/scripts/smoke_business.sh index 7abe28b..8619c38 100755 --- a/scripts/smoke_business.sh +++ b/scripts/smoke_business.sh @@ -4,7 +4,6 @@ set -eu BASE_URL="${STORYFORGE_BASE_URL:-http://127.0.0.1:8081}" USERNAME="${STORYFORGE_USERNAME:-storyforge-admin}" PASSWORD="${STORYFORGE_PASSWORD:-}" -ACCOUNT_ID="${STORYFORGE_SMOKE_ACCOUNT_ID:-dyacct_c2b62842b228406cb48f05fac16fdfdf}" if [ -z "$PASSWORD" ]; then echo "STORYFORGE_PASSWORD is required. Export the bootstrap super-admin password before running smoke_business.sh." >&2 @@ -19,13 +18,23 @@ import urllib.request base = os.environ.get("BASE_URL", "http://127.0.0.1:8081").rstrip("/") username = os.environ.get("USERNAME", "storyforge-admin") password = os.environ.get("PASSWORD", "") -account_id = os.environ.get("ACCOUNT_ID", "dyacct_c2b62842b228406cb48f05fac16fdfdf") +platforms = ["douyin", "xiaohongshu", "bilibili", "kuaishou", "wechat_video"] if not password: raise SystemExit("STORYFORGE_PASSWORD is required") -with urllib.request.urlopen(base + "/readyz", timeout=20) as resp: - ready = json.load(resp) +def request_json(path: str, *, method: str = "GET", payload: dict | None = None, headers: dict | None = None, timeout: int = 30): + body = None + req_headers = {"content-type": "application/json"} + if headers: + req_headers.update(headers) + if payload is not None: + body = json.dumps(payload).encode() + req = urllib.request.Request(base + path, data=body, headers=req_headers, method=method) + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.load(resp) + +ready = request_json("/readyz", timeout=20) if not ready.get("ready"): raise SystemExit("collector readyz is not healthy") @@ -40,32 +49,60 @@ with urllib.request.urlopen(login_req, timeout=20) as resp: token = login["token"] headers = {"authorization": "Bearer " + token} -checks = [ - ("/v2/douyin/accounts", "accounts"), - (f"/v2/douyin/accounts/{account_id}/workspace", "workspace"), - (f"/v2/douyin/accounts/{account_id}/videos?limit=5&sort_by=score", "videos"), -] - print("smoke login: ok") -for path, label in checks: - req = urllib.request.Request(base + path, headers=headers) - with urllib.request.urlopen(req, timeout=30) as resp: - payload = json.load(resp) - if label == "accounts": - summary = {"accounts": len(payload)} - elif label == "workspace": - summary = { - "account": payload.get("account", {}).get("nickname"), - "reports": len(payload.get("recent_reports") or []), - "linked_accounts": len(payload.get("linked_accounts") or []), - "high_score_threshold": (payload.get("video_workspace") or {}).get("high_score_threshold"), - } - else: - items = payload.get("items") or [] - summary = { - "videos": len(items), - "first_title": items[0].get("title") if items else None, - "first_has_analysis": bool(items and items[0].get("latest_analysis")), - } - print(f"{label}: " + json.dumps(summary, ensure_ascii=False)) + +platform_agents = request_json("/v2/platform-agents", headers=headers) +tenant_quota = request_json("/v2/tenant/quota", headers=headers) +if not isinstance(platform_agents, dict): + raise SystemExit("/v2/platform-agents did not return an object") +if not isinstance(tenant_quota, dict): + raise SystemExit("/v2/tenant/quota did not return an object") +print("platform-agents: " + json.dumps({"items": len(platform_agents.get("items") or [])}, ensure_ascii=False)) +print("tenant-quota: " + json.dumps({"keys": sorted(tenant_quota.keys())[:6]}, ensure_ascii=False)) + +for platform in platforms: + accounts = request_json(f"/v2/{platform}/accounts", headers=headers) + if not isinstance(accounts, list): + raise SystemExit(f"/v2/{platform}/accounts did not return a list") + digest = request_json(f"/v2/{platform}/tracking/digest", headers=headers) + if not isinstance(digest, dict): + raise SystemExit(f"/v2/{platform}/tracking/digest did not return an object") + digest_keys = {"generated_at", "since", "items", "tracked_accounts", "cursor_last_seen_at"} + if not digest_keys.issubset(digest.keys()): + raise SystemExit(f"/v2/{platform}/tracking/digest missing keys: {sorted(digest_keys - set(digest.keys()))}") + + summary = { + "accounts": len(accounts), + "tracked_accounts": len(digest.get("tracked_accounts") or []), + "digest_items": len(digest.get("items") or []), + } + + if accounts: + account_id = accounts[0]["id"] + workspace = request_json(f"/v2/{platform}/accounts/{account_id}/workspace", headers=headers) + analysis_reports = request_json(f"/v2/{platform}/accounts/{account_id}/analysis-reports", headers=headers) + if not isinstance(workspace, dict): + raise SystemExit(f"/v2/{platform}/accounts/{{id}}/workspace did not return an object") + if not isinstance(analysis_reports, list): + raise SystemExit(f"/v2/{platform}/accounts/{{id}}/analysis-reports did not return a list") + if (workspace.get("account") or {}).get("platform") != platform: + raise SystemExit(f"/v2/{platform}/accounts/{{id}}/workspace returned wrong platform") + summary.update({ + "workspace_reports": len(workspace.get("recent_reports") or []), + "analysis_reports": len(analysis_reports), + }) + + if platform == "douyin": + snapshots = request_json(f"/v2/{platform}/accounts/{account_id}/snapshots", headers=headers) + if not isinstance(snapshots, list): + raise SystemExit("/v2/douyin/accounts/{id}/snapshots did not return a list") + summary["snapshots"] = len(snapshots) + creator_snapshots = [item for item in snapshots if item.get("snapshot_type") == "creator_center"] + if creator_snapshots: + creator_fields = request_json(f"/v2/{platform}/accounts/{account_id}/creator-fields", headers=headers) + if creator_fields.get("snapshot_type") != "creator_center": + raise SystemExit("/v2/douyin/accounts/{id}/creator-fields returned an unexpected snapshot type") + summary["creator_fields"] = creator_fields.get("field_count", 0) + + print(f"{platform}: " + json.dumps(summary, ensure_ascii=False)) PY diff --git a/scripts/smoke_public_storyforge.sh b/scripts/smoke_public_storyforge.sh new file mode 100755 index 0000000..ee6fc3f --- /dev/null +++ b/scripts/smoke_public_storyforge.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${STORYFORGE_PUBLIC_BASE_URL:-https://storyforge.hyzq.net}" +CURL_MAX_TIME="${STORYFORGE_PUBLIC_CURL_MAX_TIME:-60}" + +need_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "missing required command: $1" >&2 + exit 1 + fi +} + +need_cmd curl +need_cmd python3 +need_cmd rg + +curl_fetch() { + curl -fsS --max-time "$CURL_MAX_TIME" "$@" +} + +tmp_dir="$(mktemp -d)" +trap 'rm -rf "$tmp_dir"' EXIT + +health_file="$tmp_dir/health.json" +html_file="$tmp_dir/index.html" +js_file="$tmp_dir/app.js" +openapi_file="$tmp_dir/openapi.json" + +echo "[1/4] check public healthz" +curl_fetch "$BASE_URL/healthz" >"$health_file" +python3 - "$health_file" <<'PY' +import json +import pathlib +import sys + +payload = json.loads(pathlib.Path(sys.argv[1]).read_text()) +status = str(payload.get("status") or "").lower() +if status != "ok": + raise SystemExit(f"unexpected health status: {status!r}") +print("healthz ok") +PY + +echo "[2/4] check public index" +curl_fetch "$BASE_URL/" >"$html_file" +rg -q "StoryForge" "$html_file" +echo "index ok" + +echo "[3/4] check deployed web bundle" +curl_fetch "$BASE_URL/assets/app.js" >"$js_file" +rg -q "select-platform" "$js_file" +rg -q "trackingCursorMap" "$js_file" +rg -q "renderPlatformSwitchChips" "$js_file" +echo "bundle ok" + +echo "[4/4] check public openapi routes" +curl_fetch "$BASE_URL/openapi.json" >"$openapi_file" +for route in \ + '"/v2/xiaohongshu/accounts"' \ + '"/v2/bilibili/accounts"' \ + '"/v2/kuaishou/accounts"' \ + '"/v2/wechat_video/accounts"' \ + '"/v2/platform-agents"' \ + '"/v2/tenant/quota"' +do + rg -q "$route" "$openapi_file" +done +echo "openapi ok" + +echo "public smoke passed: $BASE_URL" diff --git a/tests/test_platform_contracts.py b/tests/test_platform_contracts.py new file mode 100644 index 0000000..78f1d8e --- /dev/null +++ b/tests/test_platform_contracts.py @@ -0,0 +1,575 @@ +from __future__ import annotations + +import json +import os +import sys +import tempfile +import unittest +from pathlib import Path +from types import SimpleNamespace + +from fastapi import FastAPI +from fastapi.testclient import TestClient + + +ROOT = Path(__file__).resolve().parents[1] +APP_ROOT = ROOT / "collector-service" +if str(APP_ROOT) not in sys.path: + sys.path.insert(0, str(APP_ROOT)) + +from app.database import Database, utc_now +from app.domestic_platform_features import register_domestic_platform_routes +from app.douyin_features import register_douyin_routes + + +BOOTSTRAP_USERNAME = "storyforge-admin" + + +def _json(value: object) -> str: + return json.dumps(value, ensure_ascii=False) + + +def _make_legacy(db: Database, account_row: dict[str, object]) -> SimpleNamespace: + counter = {"value": 0} + + def make_id(prefix: str) -> str: + counter["value"] += 1 + return f"{prefix}_{counter['value']}" + + def require_approved() -> dict[str, object]: + return account_row + + def content_source_payload(row: dict[str, object]) -> dict[str, object]: + metadata_raw = row.get("metadata_json") or "{}" + try: + metadata = json.loads(str(metadata_raw)) + except json.JSONDecodeError: + metadata = {} + return { + "id": row["id"], + "user_id": row["user_id"], + "project_id": row.get("project_id", ""), + "source_kind": row["source_kind"], + "platform": row.get("platform", ""), + "handle": row.get("handle", ""), + "source_url": row.get("source_url", ""), + "title": row.get("title", ""), + "local_path": row.get("local_path", ""), + "metadata": metadata, + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + def assistant_payload(row: dict[str, object]) -> dict[str, object]: + return { + "id": row["id"], + "name": row.get("name", ""), + } + + def model_profile_for_account(account_id: str, requested_id: str | None) -> dict[str, object]: + if requested_id: + row = db.fetch_one("SELECT * FROM model_profiles WHERE id = ?", (requested_id,)) + if row: + return row + row = db.fetch_one("SELECT * FROM model_profiles WHERE is_default = 1 LIMIT 1") + if row: + return row + raise RuntimeError(f"No model profile configured for {account_id}") + + def parse_json_object(value: object) -> dict[str, object]: + if isinstance(value, dict): + return value + if isinstance(value, str) and value.strip(): + parsed = json.loads(value) + return parsed if isinstance(parsed, dict) else {} + return {} + + def resolve_target_kb(*_args: object, **_kwargs: object) -> dict[str, object]: + return {"id": "kb_contract"} + + def resolve_target_assistant(*_args: object, **_kwargs: object) -> None: + return None + + async def call_model(*_args: object, **_kwargs: object) -> str: + return "{}" + + def job_payload(row: dict[str, object]) -> dict[str, object]: + return row + + async def trigger_orchestrated_job(job_row: dict[str, object]) -> dict[str, object]: + return job_row + + return SimpleNamespace( + db=db, + utc_now=utc_now, + make_id=make_id, + require_approved=require_approved, + content_source_payload=content_source_payload, + assistant_payload=assistant_payload, + model_profile_for_account=model_profile_for_account, + parse_json_object=parse_json_object, + resolve_target_kb=resolve_target_kb, + resolve_target_assistant=resolve_target_assistant, + call_model=call_model, + job_payload=job_payload, + trigger_orchestrated_job=trigger_orchestrated_job, + ) + + +def _seed_base_account(db: Database) -> tuple[dict[str, object], dict[str, object], dict[str, object]]: + now = utc_now() + account_id = "acct_contract_owner" + project_id = "proj_contract_owner" + model_id = "model_contract_default" + db.execute( + """ + INSERT INTO accounts ( + id, username, password_hash, password_salt, display_name, role, approval_status, + approved_by, approved_at, preferred_analysis_model_id, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + account_id, + BOOTSTRAP_USERNAME, + "hash", + "salt", + "StoryForge Contract Owner", + "super_admin", + "approved", + account_id, + now, + model_id, + now, + now, + ), + ) + db.execute( + """ + INSERT INTO projects (id, user_id, name, description, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?) + """, + (project_id, account_id, "StoryForge Contracts", "", now, now), + ) + db.execute( + """ + INSERT INTO model_profiles ( + id, owner_account_id, name, provider, base_url, api_key, model_name, + is_system, is_default, created_at, updated_at + ) VALUES (?, NULL, ?, ?, ?, ?, ?, 1, 1, ?, ?) + """, + (model_id, "Default Model", "openai_compat", "http://127.0.0.1:8317/v1", "", "GLM-5", now, now), + ) + account_row = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) + project_row = db.fetch_one("SELECT * FROM projects WHERE id = ?", (project_id,)) + model_row = db.fetch_one("SELECT * FROM model_profiles WHERE id = ?", (model_id,)) + assert account_row is not None + assert project_row is not None + assert model_row is not None + return account_row, project_row, model_row + + +def _seed_douyin(db: Database, owner: dict[str, object], model_row: dict[str, object]) -> str: + now = utc_now() + account_id = "dyacct_contract_1" + db.execute( + """ + INSERT INTO douyin_accounts ( + id, user_id, profile_url, canonical_profile_url, sec_uid, douyin_uid, douyin_id, + nickname, signature, avatar_url, tags_json, profile_stats_json, raw_profile_json, + source_mode, sync_status, last_public_sync_at, last_creator_sync_at, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + account_id, + owner["id"], + "https://www.douyin.com/user/contract-test", + "https://www.douyin.com/user/contract-test", + "sec_contract_1", + "douyin_uid_contract_1", + "douyin_id_contract_1", + "Contract Douyin", + "Contract test signature", + "https://example.com/avatar.png", + _json(["增长", "内容"]), + _json({"fans_count": 1200, "likes_count": 8800}), + _json({"profile": {"nickname": "Contract Douyin"}}), + "creator_center", + "ready", + now, + now, + now, + now, + ), + ) + for index in range(2): + db.execute( + """ + INSERT INTO douyin_videos ( + id, account_id, aweme_id, title, description, share_url, cover_url, + duration_sec, published_at, tags_json, stats_json, raw_json, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + f"dyvid_contract_{index + 1}", + account_id, + f"aweme_contract_{index + 1}", + f"Contract Video {index + 1}", + "Contract summary", + "https://example.com/video", + "https://example.com/cover.png", + 28, + f"2026-03-26T10:0{index}:00+00:00", + _json(["增长", "内容"]), + _json({"play": 8200 + index * 300, "like": 410 + index * 10, "comment": 18, "share": 9}), + _json({"title": f"Contract Video {index + 1}"}), + now, + now, + ), + ) + + public_snapshot_id = "dysnap_public_contract" + creator_snapshot_id = "dysnap_creator_contract" + db.execute( + """ + INSERT INTO douyin_account_snapshots ( + id, account_id, snapshot_type, source_url, raw_payload_json, summary_json, + field_count, collected_at, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + public_snapshot_id, + account_id, + "public_profile", + "https://www.douyin.com/user/contract-test", + _json({"nickname": "Contract Douyin"}), + _json({"nickname": "Contract Douyin"}), + 1, + now, + now, + ), + ) + db.execute( + """ + INSERT INTO douyin_account_snapshots ( + id, account_id, snapshot_type, source_url, raw_payload_json, summary_json, + field_count, collected_at, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + creator_snapshot_id, + account_id, + "creator_center", + "https://creator.douyin.com/creator-micro/home", + _json({"field": "value"}), + _json({"creator": "summary"}), + 1, + now, + now, + ), + ) + db.execute( + """ + INSERT INTO douyin_snapshot_fields (snapshot_id, field_path, field_type, field_value_text) + VALUES (?, ?, ?, ?) + """, + (creator_snapshot_id, "profile.nickname", "string", "Contract Douyin"), + ) + db.execute( + """ + INSERT INTO douyin_analysis_reports ( + id, account_id, user_id, focus_text, model_profile_ids_json, linked_account_ids_json, + prompt_text, context_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + "dyreport_contract_1", + account_id, + owner["id"], + "增长诊断", + _json([model_row["id"]]), + _json([]), + "contract prompt", + _json({"account": "douyin"}), + now, + ), + ) + db.execute( + """ + INSERT INTO douyin_analysis_suggestions ( + id, report_id, model_profile_id, model_label, status, suggestion_text, parsed_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + "dysuggestion_contract_1", + "dyreport_contract_1", + model_row["id"], + "Test Model", + "ok", + "Contract analysis output", + _json({"summary": "ok"}), + now, + ), + ) + db.execute( + """ + INSERT INTO douyin_tracked_accounts ( + id, user_id, tracked_account_id, assistant_id, note, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ("dytrack_contract_1", owner["id"], account_id, None, "note", now, now), + ) + db.execute( + """ + INSERT INTO douyin_tracking_cursors (user_id, last_seen_at, updated_at) + VALUES (?, ?, ?) + """, + (owner["id"], "2026-03-26T09:00:00+00:00", now), + ) + return account_id + + +def _seed_domestic(db: Database, owner: dict[str, object], project_row: dict[str, object], platform: str) -> str: + now = utc_now() + account_id = f"{platform}_acct_contract_1" + db.execute( + """ + INSERT INTO content_sources ( + id, user_id, project_id, source_kind, platform, handle, source_url, title, local_path, + metadata_json, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + account_id, + owner["id"], + project_row["id"], + "creator_account", + platform, + f"{platform}_handle", + f"https://example.com/{platform}/profile", + f"{platform.upper()} Contract Account", + "", + _json( + { + "bio": f"{platform} bio", + "description": f"{platform} description", + "avatar_url": "https://example.com/avatar.png", + "tags": ["增长", platform], + "keywords": ["增长", "内容"], + "max_items": 5, + } + ), + now, + now, + ), + ) + for index in range(2): + db.execute( + """ + INSERT INTO content_sources ( + id, user_id, project_id, source_kind, platform, handle, source_url, title, local_path, + metadata_json, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + f"{platform}_video_contract_{index + 1}", + owner["id"], + project_row["id"], + "video_link", + platform, + "", + f"https://example.com/{platform}/video/{index + 1}", + f"{platform.upper()} Contract Video {index + 1}", + "", + _json( + { + "summary": "contract summary", + "description": "contract description", + "cover_url": "https://example.com/cover.png", + "published_at": f"2026-03-26T10:0{index}:00+00:00", + "tags": ["增长", platform], + "content_type": "video", + "duration_sec": 30, + "external_id": f"{platform}_ext_{index + 1}", + "origin_content_source_id": account_id, + "source_account_url": f"https://example.com/{platform}/profile", + } + ), + now, + now, + ), + ) + db.execute( + f""" + INSERT INTO {platform}_analysis_reports ( + id, user_id, account_source_id, focus_text, prompt_text, context_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + f"{platform}_report_contract_1", + owner["id"], + account_id, + "增长诊断", + "contract prompt", + _json({"account": platform}), + now, + ), + ) + db.execute( + f""" + INSERT INTO {platform}_analysis_suggestions ( + id, report_id, model_profile_id, model_label, status, suggestion_text, parsed_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + f"{platform}_suggestion_contract_1", + f"{platform}_report_contract_1", + "model_contract_default", + "Test Model", + "ok", + "Contract analysis output", + _json({"summary": "ok"}), + now, + ), + ) + db.execute( + f""" + INSERT INTO {platform}_tracked_accounts ( + id, user_id, tracked_account_id, assistant_id, note, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + (f"{platform}_track_contract_1", owner["id"], account_id, None, "note", now, now), + ) + db.execute( + f""" + INSERT INTO {platform}_tracking_cursors (user_id, last_seen_at, updated_at) + VALUES (?, ?, ?) + """, + (owner["id"], "2026-03-26T09:00:00+00:00", now), + ) + db.execute( + f""" + INSERT INTO {platform}_similarity_searches ( + id, user_id, source_account_id, prompt_text, context_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?) + """, + ( + f"{platform}_search_contract_1", + owner["id"], + account_id, + "contract prompt", + _json({"source_account": platform}), + now, + ), + ) + return account_id + + +def _build_app(platforms: list[str]) -> tuple[FastAPI, SimpleNamespace, dict[str, object]]: + tmpdir = tempfile.TemporaryDirectory() + db = Database(str(Path(tmpdir.name) / "storyforge.db")) + db.init_schema() + owner_row, project_row, model_row = _seed_base_account(db) + legacy = _make_legacy(db, owner_row) + app = FastAPI() + register_douyin_routes(app, legacy) + for platform in platforms: + register_domestic_platform_routes(app, legacy, platform=platform, label=platform) + app.state._tmpdir = tmpdir + app.state._legacy = legacy + app.state._project_row = project_row + app.state._model_row = model_row + app.state._owner_row = owner_row + return app, legacy, { + "owner": owner_row, + "project": project_row, + "model": model_row, + } + + +class PlatformContractTests(unittest.TestCase): + def test_douyin_tracking_digest_and_workspace_shape(self) -> None: + app, legacy, seed = _build_app(["xiaohongshu", "bilibili", "kuaishou", "wechat_video"]) + douyin_account_id = _seed_douyin(legacy.db, seed["owner"], seed["model"]) + with TestClient(app) as client: + accounts = client.get("/v2/douyin/accounts", headers={"Authorization": "Bearer dummy"}) + self.assertEqual(accounts.status_code, 200) + self.assertTrue(accounts.json()) + + workspace = client.get(f"/v2/douyin/accounts/{douyin_account_id}", headers={"Authorization": "Bearer dummy"}) + self.assertEqual(workspace.status_code, 200) + workspace_payload = workspace.json() + self.assertIn("account", workspace_payload) + self.assertIn("recent_reports", workspace_payload) + self.assertIn("latest_public_snapshot", workspace_payload) + self.assertIn("latest_creator_snapshot", workspace_payload) + self.assertIn("recent_similarity_searches", workspace_payload) + self.assertIn("available_model_profiles", workspace_payload) + + reports = client.get(f"/v2/douyin/accounts/{douyin_account_id}/analysis-reports", headers={"Authorization": "Bearer dummy"}) + self.assertEqual(reports.status_code, 200) + self.assertEqual(len(reports.json()), len(workspace_payload["recent_reports"])) + + snapshots = client.get(f"/v2/douyin/accounts/{douyin_account_id}/snapshots", headers={"Authorization": "Bearer dummy"}) + self.assertEqual(snapshots.status_code, 200) + self.assertGreaterEqual(len(snapshots.json()), 2) + creator_snapshot = next(item for item in snapshots.json() if item["snapshot_type"] == "creator_center") + creator_fields = client.get(f"/v2/douyin/accounts/{douyin_account_id}/creator-fields", headers={"Authorization": "Bearer dummy"}) + self.assertEqual(creator_fields.status_code, 200) + self.assertEqual(creator_fields.json()["snapshot_type"], "creator_center") + self.assertEqual(creator_fields.json()["id"], creator_snapshot["id"]) + + digest = client.get("/v2/douyin/tracking/digest", headers={"Authorization": "Bearer dummy"}) + self.assertEqual(digest.status_code, 200) + digest_payload = digest.json() + self.assertIn("generated_at", digest_payload) + self.assertIn("since", digest_payload) + self.assertIn("tracked_accounts", digest_payload) + self.assertIn("cursor_last_seen_at", digest_payload) + self.assertTrue(digest_payload["items"]) + digest_item = digest_payload["items"][0] + self.assertEqual(digest_item["platform"], "douyin") + self.assertIn("summary_text", digest_item) + self.assertIn("tracked_account_name", digest_item) + self.assertIn("account", digest_item) + self.assertIn("video", digest_item) + + def test_domestic_workspace_and_tracking_shape(self) -> None: + app, legacy, seed = _build_app(["xiaohongshu"]) + xhs_account_id = _seed_domestic(legacy.db, seed["owner"], seed["project"], "xiaohongshu") + with TestClient(app) as client: + workspace = client.get( + f"/v2/xiaohongshu/accounts/{xhs_account_id}/workspace", + headers={"Authorization": "Bearer dummy"}, + ) + self.assertEqual(workspace.status_code, 200) + workspace_payload = workspace.json() + self.assertIn("latest_public_snapshot", workspace_payload) + self.assertIn("latest_creator_snapshot", workspace_payload) + self.assertIn("recent_reports", workspace_payload) + self.assertIn("recent_similarity_searches", workspace_payload) + self.assertIn("available_model_profiles", workspace_payload) + + reports = client.get( + f"/v2/xiaohongshu/accounts/{xhs_account_id}/analysis-reports", + headers={"Authorization": "Bearer dummy"}, + ) + self.assertEqual(reports.status_code, 200) + self.assertEqual(reports.json(), workspace_payload["recent_reports"]) + + digest = client.get("/v2/xiaohongshu/tracking/digest", headers={"Authorization": "Bearer dummy"}) + self.assertEqual(digest.status_code, 200) + digest_payload = digest.json() + self.assertIn("generated_at", digest_payload) + self.assertIn("since", digest_payload) + self.assertIn("tracked_accounts", digest_payload) + self.assertIn("cursor_last_seen_at", digest_payload) + self.assertTrue(digest_payload["items"]) + digest_item = digest_payload["items"][0] + self.assertEqual(digest_item["platform"], "xiaohongshu") + self.assertIn("summary_text", digest_item) + self.assertIn("account", digest_item) + self.assertIn("video", digest_item) + + +if __name__ == "__main__": + unittest.main() diff --git a/web/storyforge-web-v4/assets/app.js b/web/storyforge-web-v4/assets/app.js index 9738bc0..fcd09aa 100644 --- a/web/storyforge-web-v4/assets/app.js +++ b/web/storyforge-web-v4/assets/app.js @@ -17,6 +17,11 @@ const appState = { selectedAccountRequestToken: 0, selectedWorkspace: null, selectedVideos: { items: [], meta: {}, top_scored_video_ids: [], latest_video_ids: [], high_score_threshold: 60 }, + snapshots: [], + selectedSnapshotId: "", + selectedSnapshotDetail: null, + creatorFields: null, + analysisReports: [], documents: [], discoveryQuery: "", currentPlatform: localStorage.getItem(STORAGE_KEY + ":currentPlatform") || "", @@ -30,6 +35,7 @@ const appState = { liveRecorderSources: [], liveRecorderStatus: null, liveRecorderFiles: [], + liveRecorderHealth: null, storageStatus: null, integrationHealth: null, localModelCatalog: null, @@ -43,6 +49,7 @@ const appState = { tenantQuota: null, tenantUsage: null, adminOpsOverview: null, + adminFixRuns: [], busy: false, message: "", lastAction: null, @@ -979,6 +986,11 @@ async function logoutSession() { appState.selectedAssistantId = ""; appState.selectedWorkspace = null; appState.selectedVideos = { items: [], meta: {}, top_scored_video_ids: [], latest_video_ids: [], high_score_threshold: 60 }; + appState.snapshots = []; + appState.selectedSnapshotId = ""; + appState.selectedSnapshotDetail = null; + appState.creatorFields = null; + appState.analysisReports = []; appState.documents = []; appState.trackingAccounts = []; appState.trackingDigest = null; @@ -992,8 +1004,10 @@ async function logoutSession() { appState.tenantQuota = null; appState.tenantUsage = null; appState.adminOpsOverview = null; + appState.adminFixRuns = []; appState.integrationHealth = null; appState.storageStatus = null; + appState.liveRecorderHealth = null; appState.backendCapabilities = null; appState.lastAction = null; appState.lastGeneratedCopy = null; @@ -1032,10 +1046,11 @@ async function loadAgentControlSurfaces(projectId = "") { const supportsActionRegistry = backendSupports("/v2/oneliner/action-registry"); const supportsPlatformAgents = backendSupports("/v2/platform-agents"); const supportsAdminOps = backendSupports("/v2/admin/ops/overview"); + const supportsAdminFixRuns = backendSupports("/v2/admin/ops/fix-runs"); const supportsTenantQuota = backendSupports("/v2/tenant/quota"); const supportsTenantUsage = backendSupports("/v2/tenant/usage"); - const [profile, sessionsPayload, actionRegistryPayload, platformAgentsPayload, tenantQuota, tenantUsage, adminOpsOverview] = await Promise.all([ + const [profile, sessionsPayload, actionRegistryPayload, platformAgentsPayload, tenantQuota, tenantUsage, adminOpsOverview, adminFixRunsPayload] = await Promise.all([ supportsOneLinerProfile ? storyforgeFetch(`/v2/oneliner/profile?project_id=${encodeURIComponent(normalizedProjectId)}`).catch(() => null) : Promise.resolve(null), @@ -1056,7 +1071,10 @@ async function loadAgentControlSurfaces(projectId = "") { : Promise.resolve(null), supportsAdminOps && isSuperAdmin() ? storyforgeFetch("/v2/admin/ops/overview").catch(() => null) - : Promise.resolve(null) + : Promise.resolve(null), + supportsAdminFixRuns && isSuperAdmin() + ? storyforgeFetch("/v2/admin/ops/fix-runs").catch(() => ({ items: [] })) + : Promise.resolve({ items: [] }) ]); appState.onelinerProfile = profile; @@ -1069,6 +1087,7 @@ async function loadAgentControlSurfaces(projectId = "") { appState.tenantQuota = tenantQuota; appState.tenantUsage = tenantUsage; appState.adminOpsOverview = adminOpsOverview; + appState.adminFixRuns = safeArray(adminFixRunsPayload?.items || adminFixRunsPayload); } async function loadOneLinerMessages(sessionId) { @@ -1358,6 +1377,8 @@ async function loadPlatformAccount(platform, accountId, requestToken = 0) { appState.snapshots = []; appState.selectedSnapshotId = ""; appState.selectedSnapshotDetail = null; + appState.creatorFields = null; + appState.analysisReports = []; appState.similarSearchDetail = null; return true; } @@ -1371,13 +1392,18 @@ async function loadPlatformAccount(platform, accountId, requestToken = 0) { appState.snapshots = []; appState.selectedSnapshotId = ""; appState.selectedSnapshotDetail = null; + appState.creatorFields = null; + appState.analysisReports = []; appState.similarSearchDetail = null; return true; } const videosPath = getWorkbenchRoute(normalizedPlatform, "videos", accountId); const supportsAccountVideos = videosPath && backendSupports(`/v2/${normalizedPlatform}/accounts/{account_id}/videos`); + const supportsAccountSnapshots = normalizedPlatform === "douyin" && backendSupports("/v2/douyin/accounts/{account_id}/snapshots"); + const supportsCreatorFields = normalizedPlatform === "douyin" && backendSupports("/v2/douyin/accounts/{account_id}/creator-fields"); + const supportsAnalysisReports = normalizedPlatform === "douyin" && backendSupports("/v2/douyin/accounts/{account_id}/analysis-reports"); try { - const [workspace, videos] = await Promise.all([ + const [workspace, videos, snapshotsPayload, creatorFieldsPayload, analysisReportsPayload] = await Promise.all([ storyforgeFetch(workspacePath), supportsAccountVideos ? storyforgeFetch(videosPath).catch(() => ({ @@ -1393,13 +1419,38 @@ async function loadPlatformAccount(platform, accountId, requestToken = 0) { top_scored_video_ids: [], latest_video_ids: [], high_score_threshold: 60 - }) + }), + supportsAccountSnapshots + ? storyforgeFetch(`/v2/douyin/accounts/${encodeURIComponent(accountId)}/snapshots`).catch(() => []) + : Promise.resolve([]), + supportsCreatorFields + ? storyforgeFetch(`/v2/douyin/accounts/${encodeURIComponent(accountId)}/creator-fields`).catch(() => null) + : Promise.resolve(null), + supportsAnalysisReports + ? storyforgeFetch(`/v2/douyin/accounts/${encodeURIComponent(accountId)}/analysis-reports`).catch(() => []) + : Promise.resolve([]) ]); if (token !== appState.selectedAccountRequestToken) { return false; } appState.selectedWorkspace = workspace; appState.selectedVideos = videos; + if (normalizedPlatform === "douyin") { + appState.snapshots = safeArray(snapshotsPayload?.items || snapshotsPayload); + appState.creatorFields = creatorFieldsPayload; + appState.analysisReports = safeArray(analysisReportsPayload?.items || analysisReportsPayload); + const nextSnapshotId = appState.snapshots.find((item) => item.id === appState.selectedSnapshotId)?.id || appState.snapshots[0]?.id || ""; + appState.selectedSnapshotId = nextSnapshotId; + appState.selectedSnapshotDetail = nextSnapshotId + ? await storyforgeFetch(`/v2/douyin/accounts/${encodeURIComponent(accountId)}/snapshots/${encodeURIComponent(nextSnapshotId)}`).catch(() => null) + : null; + } else { + appState.snapshots = []; + appState.selectedSnapshotId = ""; + appState.selectedSnapshotDetail = null; + appState.creatorFields = null; + appState.analysisReports = []; + } return true; } catch (error) { if (token !== appState.selectedAccountRequestToken) { @@ -1442,7 +1493,8 @@ async function bootstrap() { const supportsLiveRecorderSources = backendSupports("/v2/live-recorder/sources"); const supportsLiveRecorderStatus = backendSupports("/v2/live-recorder/status"); const supportsLiveRecorderFiles = backendSupports("/v2/live-recorder/files"); - const [contentSources, platformPayloads, reviews, integrationHealth, localModelCatalog, liveRecorderSourcesPayload, liveRecorderStatus, liveRecorderFilesPayload] = await Promise.all([ + const supportsLiveRecorderHealth = backendSupports("/v2/live-recorder/health"); + const [contentSources, platformPayloads, reviews, integrationHealth, localModelCatalog, liveRecorderSourcesPayload, liveRecorderStatus, liveRecorderFilesPayload, liveRecorderHealth] = await Promise.all([ storyforgeFetch("/v2/content-sources").catch(() => []), Promise.all(runtimePlatforms.map(async (platform) => { const accountListPath = getWorkbenchRoute(platform, "accounts"); @@ -1484,7 +1536,8 @@ async function bootstrap() { supportsLocalModels ? storyforgeFetch("/v2/integrations/local-models").catch(() => null) : Promise.resolve(null), supportsLiveRecorderSources ? storyforgeFetch("/v2/live-recorder/sources").catch(() => ({ items: [] })) : Promise.resolve({ items: [] }), supportsLiveRecorderStatus ? storyforgeFetch("/v2/live-recorder/status").catch(() => null) : Promise.resolve(null), - supportsLiveRecorderFiles ? storyforgeFetch("/v2/live-recorder/files?limit=16").catch(() => ({ items: [] })) : Promise.resolve({ items: [] }) + supportsLiveRecorderFiles ? storyforgeFetch("/v2/live-recorder/files?limit=16").catch(() => ({ items: [] })) : Promise.resolve({ items: [] }), + supportsLiveRecorderHealth ? storyforgeFetch("/v2/live-recorder/health").catch(() => null) : Promise.resolve(null) ]); const mergedAccounts = safeArray(platformPayloads) .flatMap((entry) => safeArray(entry.accounts)) @@ -1531,6 +1584,7 @@ async function bootstrap() { appState.liveRecorderFiles = safeArray(liveRecorderFilesPayload?.items || liveRecorderFilesPayload); appState.integrationHealth = integrationHealth; appState.localModelCatalog = localModelCatalog; + appState.liveRecorderHealth = liveRecorderHealth; appState.documents = await loadKnowledgeDocuments(dashboard.knowledge_bases); appState.selectedProjectId = appState.selectedProjectId || dashboard.projects?.[0]?.id || ""; if (supportsStorageStatus) { @@ -2540,12 +2594,12 @@ function renderAdminOpsPanel() {

运维与审计 Agent

仅平台最高权限用户可见。

尚未拉到概览

刷新后会自动读取失败任务、集成健康和待审事件。

+ ${renderAdminFixRunsPanel()}
`; } const incidents = safeArray(overview.incidents).slice(0, 6); const audits = safeArray(overview.recent_audits).slice(0, 5); - const fixRuns = safeArray(overview.recent_fix_runs).slice(0, 5); return `
@@ -2581,24 +2635,6 @@ function renderAdminOpsPanel() {
`).join("") || `

当前没有待处理事件

最近主链比较稳定,继续观察即可。

`}
-
-
-

最近修复计划

-

这里代表运维 Agent 输出的修复方案,必须经过审计 Agent 放行才算闭环。

-
- ${fixRuns.map((item) => ` -
-

${escapeHtml(item.plan?.summary || item.id || "修复计划")}

-

${escapeHtml(item.plan?.steps?.[0] || "待补充修复步骤")}

-
- ${escapeHtml(item.plan_scope || "plan")} - ${escapeHtml(item.audit_status || "pending")} - ${item.incident_id ? `事件 ${escapeHtml(brief(item.incident_id, 10))}` : ""} - 审计放行 -
-
- `).join("") || `

还没有修复计划

当运维 Agent 针对故障事件生成 repair plan 后,这里会自动出现。

`} -

最近审计记录

@@ -2616,6 +2652,7 @@ function renderAdminOpsPanel() {
`).join("") || `

还没有审计记录

等管理员做一次扫描或审计处理后,这里会自动出现。

`}
+ ${renderAdminFixRunsPanel()} `; } @@ -2879,6 +2916,334 @@ function renderPlatformSwitchChips(currentPlatform) { `).join(""); } +function getProjectNameById(projectId) { + return safeArray(appState.dashboard?.projects).find((project) => project.id === projectId)?.name || projectId || "-"; +} + +function formatSnapshotFieldValue(value) { + if (value == null) return "-"; + const text = typeof value === "string" ? value : JSON.stringify(value); + return brief(text, 120); +} + +function renderSnapshotFieldRows(fields, limit = 8) { + return safeArray(fields) + .slice(0, limit) + .map((field) => ` +
+

${escapeHtml(field.field_path || field.path || "field")}

+

${escapeHtml(formatSnapshotFieldValue(field.field_value_text || field.value || field.summary || ""))}

+
+ ${field.field_type ? `${escapeHtml(field.field_type)}` : ""} +
+
+ `).join(""); +} + +function renderDouyinInsightPanel() { + const selected = getSelectedAccount(); + if (!selected || getAccountPlatform(selected) !== "douyin") { + return ""; + } + const snapshots = safeArray(appState.snapshots); + const selectedSnapshot = appState.selectedSnapshotDetail + || snapshots.find((item) => item.id === appState.selectedSnapshotId) + || null; + const creatorFields = appState.creatorFields || null; + const analysisReports = safeArray(appState.analysisReports.length ? appState.analysisReports : appState.selectedWorkspace?.recent_reports); + const snapshotSummary = selectedSnapshot?.summary || {}; + const creatorSummary = creatorFields?.summary || {}; + const selectedSnapshotFields = safeArray(selectedSnapshot?.fields); + const creatorSnapshotFields = safeArray(creatorFields?.fields); + return ` +
+
+
+

抖音快照详情

+
快照、创作者字段和分析报告统一在这里看
+
+
+ ${escapeHtml(formatNumber(snapshots.length))} 个快照 + ${escapeHtml(formatNumber(creatorSnapshotFields.length || creatorFields?.field_count || 0))} 个字段 + ${escapeHtml(formatNumber(analysisReports.length))} 条报告 + 刷新 +
+
+
+
+ 快照类型 + ${escapeHtml(selectedSnapshot?.snapshot_type || "未选中")} + ${escapeHtml(selectedSnapshot?.collected_at ? formatDateTime(selectedSnapshot.collected_at) : "等待选择")} +
+
+ 字段数 + ${escapeHtml(formatNumber(selectedSnapshot?.field_count || 0))} + ${escapeHtml(selectedSnapshot?.source_url ? brief(selectedSnapshot.source_url, 28) : "暂无来源")} +
+
+ 创作者字段 + ${escapeHtml(formatNumber(creatorFields?.field_count || 0))} + ${escapeHtml(creatorFields?.collected_at ? formatDateTime(creatorFields.collected_at) : "尚未拉取")} +
+
+ 分析报告 + ${escapeHtml(formatNumber(analysisReports.length))} + ${escapeHtml(analysisReports[0]?.created_at ? formatDateTime(analysisReports[0].created_at) : "暂无报告")} +
+
+
+
+

快照列表

+

点击任意快照可以切换右侧详情,便于比对公开页和 creator center 的变化。

+
+ ${snapshots.map((snapshot) => ` +
+

${escapeHtml(snapshot.snapshot_type || "snapshot")} · ${escapeHtml(formatDateTime(snapshot.collected_at))}

+

${escapeHtml(brief(JSON.stringify(snapshot.summary || {}), 96))}

+
+ ${escapeHtml(formatNumber(snapshot.field_count || 0))} 字段 + 查看详情 +
+
+ `).join("") || `

还没有快照

同步账号后,这里会自动出现 public profile 和 creator center 快照。

`} +
+
+
+

当前快照详情

+

${escapeHtml(selectedSnapshot ? brief(JSON.stringify(snapshotSummary), 120) : "先从左侧选择一个快照")}

+
+ ${selectedSnapshot?.source_url ? `打开来源` : ""} + ${selectedSnapshot?.snapshot_type ? `${escapeHtml(selectedSnapshot.snapshot_type)}` : ""} +
+
+ ${selectedSnapshotFields.length ? renderSnapshotFieldRows(selectedSnapshotFields, 6) : `

暂无字段

选中快照后会显示原始字段明细。

`} +
+
+
+
+
+

Creator Fields

+

${escapeHtml(creatorFields ? brief(JSON.stringify(creatorSummary), 120) : "尚未拉取 creator center 字段")}

+
+ ${creatorFields?.source_url ? `打开 creator center` : ""} + ${creatorFields?.snapshot_type ? `${escapeHtml(creatorFields.snapshot_type)}` : ""} + ${creatorFields?.field_count != null ? `${escapeHtml(formatNumber(creatorFields.field_count))} 字段` : ""} +
+
+ ${creatorSnapshotFields.length ? renderSnapshotFieldRows(creatorSnapshotFields, 6) : `

还没有 creator 字段

等 creator center 快照同步后,这里会展示字段明细。

`} +
+
+
+

分析报告

+

分析报告来自 `/analysis-reports`,可直接对照结论和建议。

+
+ ${analysisReports.map((report) => { + const suggestion = safeArray(report.suggestions)[0] || null; + const summary = suggestion?.parsed_json?.executive_summary || suggestion?.suggestion_text || report.focus_text || "暂无结论"; + return ` +
+

${escapeHtml(brief(report.focus_text || "分析报告", 34))}

+

${escapeHtml(brief(summary, 120))}

+
+ ${report.created_at ? `${escapeHtml(formatDateTime(report.created_at))}` : ""} + ${suggestion?.model_label ? `${escapeHtml(suggestion.model_label)}` : ""} +
+
+ `; + }).join("") || `

还没有分析报告

对当前账号跑一次分析后,这里会自动出现结论和建议。

`} +
+
+
+
+ `; +} + +async function openDouyinSnapshotDetailAction(snapshotId) { + const selected = getSelectedAccount(); + if (!selected || getAccountPlatform(selected) !== "douyin") { + return; + } + if (!snapshotId) { + return; + } + setBusy(true, "正在加载快照详情..."); + try { + const detail = await storyforgeFetch(`/v2/douyin/accounts/${encodeURIComponent(selected.id)}/snapshots/${encodeURIComponent(snapshotId)}`); + appState.selectedSnapshotId = snapshotId; + appState.selectedSnapshotDetail = detail; + rememberAction("快照已切换", `已打开 ${detail.snapshot_type || "snapshot"} 的完整详情。`, "green", detail); + renderAll(); + } finally { + setBusy(false, ""); + } +} + +function renderLiveRecorderManagementPanel() { + const sources = safeArray(appState.liveRecorderSources); + const status = appState.liveRecorderStatus || {}; + const health = getIntegrationDetail("live_recorder"); + const liveRecorderHealth = appState.liveRecorderHealth || {}; + const files = safeArray(appState.liveRecorderFiles); + const activeItems = safeArray(status.active_recordings); + const runtimeBits = [ + health.available ? health.reachable ? "在线" : (health.configured ? "不可达" : "未配置") : "未拉取", + status.running ? `运行中 pid ${status.pid || "-"}` : "未运行", + `活动录制 ${formatNumber(activeItems.length)}`, + `最近文件 ${formatNumber(files.length)}` + ]; + const directHealthText = liveRecorderHealth + ? (liveRecorderHealth.ok || String(liveRecorderHealth.status || "").toLowerCase() === "ok" + ? "HTTP 健康:ok" + : `HTTP 健康:${liveRecorderHealth.status || liveRecorderHealth.message || "异常"}`) + : "HTTP 健康:未拉取"; + return ` +
+
+
+

Live Recorder 维护面板

+
编辑录制源、查看健康状态、导入配置和删除源都在这里
+
+
+ ${escapeHtml(health.reachable ? "健康" : health.configured ? "待检查" : "未配置")} + ${escapeHtml(status.running ? "运行中" : "已停止")} + 刷新 + 新增录制源 + 导入 URL 配置 +
+
+
+ ${runtimeBits.map((item, index) => ` +
+ ${escapeHtml(["健康", "运行", "活动", "文件"][index])} + ${escapeHtml(item)} + ${escapeHtml(index === 0 ? (health.url || health.baseUrl || "未拉取健康数据") : index === 1 ? (status.started_at ? formatDateTime(status.started_at) : "暂无启动时间") : index === 2 ? "当前租户录制状态" : "当前租户录像索引")} +
+ `).join("")} +
+
+

直连健康

+

${escapeHtml(directHealthText)}

+
+ ${liveRecorderHealth?.base_url ? `${escapeHtml(brief(liveRecorderHealth.base_url, 32))}` : ""} + ${liveRecorderHealth?.url ? `${escapeHtml(brief(liveRecorderHealth.url, 32))}` : ""} + ${liveRecorderHealth?.pid ? `${escapeHtml(`pid ${liveRecorderHealth.pid}`)}` : ""} +
+
+
+
+

录制源列表

+

默认按当前租户筛选,编辑时可改项目、Agent、标题、清晰度和启停状态。

+
+ ${sources.map((source) => ` +
+

${escapeHtml(source.title || source.remote_name || source.source_url || "录制源")}

+

${escapeHtml(source.source_url || "暂无源链接")}

+
+ ${escapeHtml(platformLabel(source.platform || "kuaishou"))} + ${escapeHtml(source.quality || "原画")} + ${escapeHtml(source.enabled ? "启用" : "停用")} + ${escapeHtml(getProjectNameById(source.project_id || ""))} + ${source.recording_count ? `${escapeHtml(formatNumber(source.recording_count))} 个活动录制` : ""} +
+
+ 编辑 + ${escapeHtml(source.enabled ? "停用" : "启用")} + 删除 +
+
+ `).join("") || `

还没有录制源

先导入或新增一个直播源,后端会自动同步到租户视图。

`} +
+
+
+

健康检查与运行状态

+

${escapeHtml([ + health.available ? `健康接口:${health.reachable ? "在线" : "不可达"}` : "还没有拉取健康接口", + status.url_info?.service_url ? `服务地址:${status.url_info.service_url}` : "", + activeItems.length ? `活动录制:${activeItems.length}` : "当前没有活动录制" + ].filter(Boolean).join(" · "))}

+
+ ${status.pid ? `PID ${escapeHtml(status.pid)}` : ""} + ${status.last_exit_code != null ? `${escapeHtml(`退出码 ${status.last_exit_code}`)}` : ""} + ${status.url_info?.base_url ? `打开服务` : ""} +
+
+
+

最近文件

+

文件沿用当前租户视图,支持直接打开查看。

+
+ ${files.slice(0, 5).map((file) => ` +
+

${escapeHtml(file.title || file.name || file.relative_path || "录像文件")}

+

${escapeHtml(file.relative_path || file.name || file.content_url || "-")}

+
+ ${file.mtime ? `${escapeHtml(formatDateTime(file.mtime))}` : ""} + ${file.id ? `打开文件` : ""} +
+
+ `).join("") || `

还没有文件

开始录制后,最新文件会出现在这里。

`} +
+
+
+
+ `; +} + +function renderAdminFixRunsPanel() { + if (!isSuperAdmin()) return ""; + const overview = appState.adminOpsOverview || {}; + const items = safeArray(appState.adminFixRuns.length ? appState.adminFixRuns : overview.recent_fix_runs); + if (!items.length) { + return ` +
+
+
+

修复计划列表

+
还没有拉到修复计划
+
+
+

暂无修复计划

生成修复计划后,这里会展示完整的 audit 列表。

+
+ `; + } + return ` +
+
+
+

修复计划列表

+
完整展示最近的 fix runs,并支持直接审计
+
+
+ ${escapeHtml(formatNumber(items.length))} 条 + ${escapeHtml(formatNumber(items.filter((item) => item.audit_status === "approved").length))} 已通过 + ${escapeHtml(formatNumber(items.filter((item) => item.audit_status === "watching").length))} 观察中 + 刷新 +
+
+
+ ${items.map((item) => { + const plan = item.plan || {}; + const verification = item.verification || {}; + return ` +
+

${escapeHtml(plan.summary || item.id || "修复计划")}

+

${escapeHtml(brief(safeArray(plan.steps).join(";") || verification.summary || "暂无修复步骤", 140))}

+
+ ${escapeHtml(item.plan_scope || "plan")} + ${escapeHtml(item.audit_status || "pending")} + ${item.status ? `${escapeHtml(item.status)}` : ""} + ${item.incident_id ? `${escapeHtml(brief(item.incident_id, 12))}` : ""} + ${item.updated_at ? `${escapeHtml(formatDateTime(item.updated_at))}` : ""} + 查看详情 + 审计放行 +
+
+ `; + }).join("")} +
+
+ `; +} + function renderDashboardScreen() { if (!appState.session) { return screenShell( @@ -3101,7 +3466,7 @@ function renderDiscoveryScreen() { const selectedPlatform = getAccountPlatform(selected); const effectivePlatform = selectedPlatform || currentPlatform; const workbenchReason = !isWorkbenchPlatform(effectivePlatform) ? getPendingWorkbenchReason(effectivePlatform) : ""; - const reports = safeArray(appState.selectedWorkspace?.recent_reports); + const reports = safeArray(appState.analysisReports.length ? appState.analysisReports : appState.selectedWorkspace?.recent_reports); const linkedAccounts = safeArray(appState.selectedWorkspace?.linked_accounts); const videos = safeArray(appState.selectedVideos?.items); const fallbackVideos = safeArray(selected?.video_summary?.videos); @@ -3281,6 +3646,7 @@ function renderDiscoveryScreen() { `).join("") || `

还没有最近作品

当前账号只同步了基础信息,还没拉到完整作品列表。

`} + ${renderDouyinInsightPanel()}
@@ -3673,6 +4039,9 @@ function renderProductionScreen() { AI 视频 ${escapeHtml(formatNumber(jobs.filter((item) => item.line_type === "ai_video").length))}
+
+ ${renderLiveRecorderManagementPanel()} +
@@ -5087,12 +5456,59 @@ function openAdminRepairPlanAction(incidentId) { }); } +function openAdminFixRunDetailAction(runId) { + if (!isSuperAdmin()) { + alert("只有平台管理者才能查看修复计划。"); + return; + } + const run = safeArray(appState.adminFixRuns.length ? appState.adminFixRuns : appState.adminOpsOverview?.recent_fix_runs).find((item) => item.id === runId); + if (!run) { + alert("没有找到这条修复计划。"); + return; + } + openActionModal({ + title: "修复计划详情", + description: "查看这条修复计划的完整上下文,再决定是否放行。", + hideSubmit: true, + fields: [ + { + type: "html", + label: "详情", + html: ` +
+
+

${escapeHtml(run.plan?.summary || run.id)}

+

${escapeHtml(safeArray(run.plan?.steps).join(";") || "暂无步骤")}

+
+ ${escapeHtml(run.plan_scope || "plan")} + ${escapeHtml(run.audit_status || "pending")} + ${run.status ? `${escapeHtml(run.status)}` : ""} + ${run.incident_id ? `${escapeHtml(brief(run.incident_id, 12))}` : ""} +
+
+
+
+

Plan

+
${escapeHtml(JSON.stringify(run.plan || {}, null, 2))}
+
+
+

Verification

+
${escapeHtml(JSON.stringify(run.verification || {}, null, 2))}
+
+
+
+ ` + } + ] + }); +} + function openAdminFixRunAuditAction(runId) { if (!isSuperAdmin()) { alert("只有平台管理者才能审计修复计划。"); return; } - const run = safeArray(appState.adminOpsOverview?.recent_fix_runs).find((item) => item.id === runId); + const run = safeArray(appState.adminFixRuns.length ? appState.adminFixRuns : appState.adminOpsOverview?.recent_fix_runs).find((item) => item.id === runId); if (!run) { alert("没有找到这条修复计划。"); return; @@ -5110,7 +5526,12 @@ function openAdminFixRunAuditAction(runId) {

${escapeHtml(run.plan?.summary || run.id)}

-

${escapeHtml((run.plan?.steps || []).join(";") || "暂无步骤")}

+

${escapeHtml(safeArray(run.plan?.steps).join(";") || "暂无步骤")}

+
+ ${run.incident_id ? `事件 ${escapeHtml(brief(run.incident_id, 12))}` : ""} + ${run.updated_at ? `${escapeHtml(formatDateTime(run.updated_at))}` : ""} + 查看详情 +
` @@ -5357,11 +5778,19 @@ function openCreateRealCutAction(defaults = {}) { } function openLiveRecorderAction() { + setScreen("production"); + renderAll(); + window.requestAnimationFrame(() => { + document.getElementById("live-recorder-maintenance-anchor")?.scrollIntoView({ behavior: "smooth", block: "start" }); + }); +} + +function openLiveRecorderCreateAction() { const status = getIntegrationDetail("live_recorder"); const project = getSelectedProject() || appState.dashboard?.projects?.[0] || null; const assistants = getAssistantOptions(project?.id || ""); openActionModal({ - title: "直播录制控制", + title: "新增录制源", description: status.reachable ? "新增的是你当前租户名下的录制源。文件访问和录制状态也只会回到你的账号视图里。" : "当前 NAS 录制服务不可达,先检查集成健康。", @@ -5404,6 +5833,133 @@ function openLiveRecorderAction() { }); } +function openLiveRecorderSourceAction(sourceId) { + const source = safeArray(appState.liveRecorderSources).find((item) => item.id === sourceId); + if (!source) { + alert("没有找到这条录制源。"); + return; + } + const currentProject = getSelectedProject() || safeArray(appState.dashboard?.projects).find((item) => item.id === source.project_id) || appState.dashboard?.projects?.[0] || null; + const assistants = getAssistantOptions(currentProject?.id || source.project_id || ""); + openActionModal({ + title: "编辑录制源", + description: "可以更新项目归属、Agent、标题、清晰度和启停状态;链接本身若要变更,请删除后重建。", + submitLabel: "保存修改", + fields: [ + { + type: "html", + label: "源信息", + html: ` +
+
+

${escapeHtml(source.title || source.remote_name || "录制源")}

+

${escapeHtml(source.source_url || "暂无链接")}

+
+ ${escapeHtml(platformLabel(source.platform || "kuaishou"))} + ${escapeHtml(source.quality || "原画")} + ${escapeHtml(source.enabled ? "启用" : "停用")} +
+
+
+ ` + }, + { name: "projectId", label: "归属项目", type: "select", value: source.project_id || currentProject?.id || "", options: getProjectOptions() }, + { name: "assistantId", label: "关联 Agent", type: "select", value: source.assistant_id || "", options: [{ value: "", label: "暂不绑定" }, ...assistants] }, + { name: "title", label: "录制名称", value: source.title || "", placeholder: "例如:A 类目直播跟踪" }, + { name: "quality", label: "清晰度", type: "select", value: source.quality || "原画", options: ["原画", "蓝光", "超清", "高清", "标清", "流畅"].map((item) => ({ value: item, label: item })) }, + { name: "enabled", label: "启用录制源", type: "checkbox", value: Boolean(source.enabled) } + ], + onSubmit: async (values) => { + const saved = await storyforgeFetch(`/v2/live-recorder/sources/${encodeURIComponent(source.id)}`, { + method: "PATCH", + body: { + project_id: values.projectId || "", + assistant_id: values.assistantId || "", + title: values.title || "", + quality: values.quality || "原画", + enabled: Boolean(values.enabled) + } + }); + rememberAction("录制源已更新", `已保存「${saved.item?.title || source.title || "录制源"}」。`, "green", saved); + await bootstrap(); + } + }); +} + +function openLiveRecorderImportAction() { + const samples = [ + "https://live.douyin.com/1234567890", + "# 关闭的源会以 # 开头", + "高清, https://live.kuaishou.com/u/abcdef, 测试录制源" + ].join("\n"); + openActionModal({ + title: "导入 URL 配置", + description: "按行粘贴直播源,支持用逗号附带清晰度和标题,注释行会被视为停用源。", + submitLabel: "导入并同步", + fields: [ + { + name: "raw", + label: "配置文本", + type: "textarea", + rows: 10, + value: samples, + placeholder: "一行一个 URL,支持 # 注释和 逗号分隔的清晰度/标题" + } + ], + onSubmit: async (values) => { + if (!String(values.raw || "").trim()) throw new Error("请先粘贴配置文本"); + const saved = await storyforgeFetch("/v2/live-recorder/url-config/import", { + method: "POST", + body: { raw: values.raw } + }); + rememberAction("URL 配置已导入", `已导入 ${formatNumber(saved.count || 0)} 条录制源。`, "green", saved); + await bootstrap(); + } + }); +} + +async function toggleLiveRecorderSourceAction(sourceId, nextEnabled) { + const source = safeArray(appState.liveRecorderSources).find((item) => item.id === sourceId); + if (!source) { + alert("没有找到这条录制源。"); + return; + } + setBusy(true, nextEnabled ? "正在启用录制源..." : "正在停用录制源..."); + try { + await storyforgeFetch(`/v2/live-recorder/sources/${encodeURIComponent(source.id)}`, { + method: "PATCH", + body: { + enabled: Boolean(nextEnabled) + } + }); + rememberAction(nextEnabled ? "录制源已启用" : "录制源已停用", `${source.title || source.source_url || "录制源"} 已更新。`, "green"); + await bootstrap(); + } finally { + setBusy(false, ""); + } +} + +async function deleteLiveRecorderSourceAction(sourceId) { + const source = safeArray(appState.liveRecorderSources).find((item) => item.id === sourceId); + if (!source) { + alert("没有找到这条录制源。"); + return; + } + if (!window.confirm(`确认删除「${source.title || source.source_url || "录制源"}」吗?删除后需要重新导入。`)) { + return; + } + setBusy(true, "正在删除录制源..."); + try { + await storyforgeFetch(`/v2/live-recorder/sources/${encodeURIComponent(source.id)}`, { + method: "DELETE" + }); + rememberAction("录制源已删除", `${source.title || source.source_url || "录制源"} 已从租户视图中移除。`, "green"); + await bootstrap(); + } finally { + setBusy(false, ""); + } +} + async function openLiveRecorderFileAction(fileId) { const target = safeArray(appState.liveRecorderFiles).find((item) => item.id === fileId); if (!target?.content_url) { @@ -5608,6 +6164,26 @@ document.addEventListener("click", async (event) => { openLiveRecorderAction(); return; } + if (name === "open-live-recorder-create") { + openLiveRecorderCreateAction(); + return; + } + if (name === "import-live-recorder-config") { + openLiveRecorderImportAction(); + return; + } + if (name === "edit-live-recorder-source") { + openLiveRecorderSourceAction(action.dataset.sourceId || ""); + return; + } + if (name === "toggle-live-recorder-source") { + await toggleLiveRecorderSourceAction(action.dataset.sourceId || "", action.dataset.nextEnabled === "true"); + return; + } + if (name === "delete-live-recorder-source") { + await deleteLiveRecorderSourceAction(action.dataset.sourceId || ""); + return; + } if (name === "open-live-recorder-file") { await openLiveRecorderFileAction(action.dataset.fileId || ""); return; @@ -5836,6 +6412,14 @@ document.addEventListener("click", async (event) => { openAdminFixRunAuditAction(action.dataset.runId || ""); return; } + if (name === "open-admin-fix-run-detail") { + openAdminFixRunDetailAction(action.dataset.runId || ""); + return; + } + if (name === "select-douyin-snapshot") { + await openDouyinSnapshotDetailAction(action.dataset.snapshotId || ""); + return; + } if (name === "job-to-ai-video") { const jobId = action.dataset.jobId || ""; const detail = appState.lastJobDetail?.job?.id === jobId ? appState.lastJobDetail.job : null;