diff --git a/CONTENT_LEARNING_WORKFLOW.md b/CONTENT_LEARNING_WORKFLOW.md index 8db0ab9..4fb5bec 100644 --- a/CONTENT_LEARNING_WORKFLOW.md +++ b/CONTENT_LEARNING_WORKFLOW.md @@ -13,6 +13,5 @@ - 使用 `whisper.cpp` 转写,若环境未就绪则保留原始素材并进入降级流程 6. collector-service 调用本机 OpenAI 兼容模型提炼文案风格 7. 结果写入用户自己的知识库文档 -8. 如果配置了 `FASTGPT_DATASET_API_KEY` - - 同步到 FastGPT 数据集 +8. 如有需要,可继续同步到租户自己的外部知识系统 9. 文案助手生成时按知识库关联关系取素材,结合提示词输出文案 diff --git a/Common/MAC_NODE_CONNECTIVITY_SPEC.md b/Common/MAC_NODE_CONNECTIVITY_SPEC.md index 2667548..8147dd5 100644 --- a/Common/MAC_NODE_CONNECTIVITY_SPEC.md +++ b/Common/MAC_NODE_CONNECTIVITY_SPEC.md @@ -2,17 +2,16 @@ The Mac node should only do the following: -1. Deploy FastGPT locally -2. Ensure the cloud backend can reach FastGPT +1. Deploy StoryForge collector-service locally +2. Ensure the cloud backend can reach collector-service 3. Maintain a private network connection to the server -4. Provide the FastGPT endpoint to the backend +4. Provide the collector-service endpoint to the backend Recommended ports: -- FastGPT: 3000 -- MongoDB: 27017 -- PostgreSQL: 5432 -- Redis: 6379 -- MinIO: 9000 +- Collector Service: 8081 +- n8n: 5670 +- Local Model API: 8317 +- ASR: 8088 -FastGPT must not be exposed to the public internet directly. +The local admin/control surfaces must not be exposed to the public internet directly. diff --git a/Common/STORYFORGE_MAC_AI_NODE_TASKS.md b/Common/STORYFORGE_MAC_AI_NODE_TASKS.md index d3b0e87..2a527be 100644 --- a/Common/STORYFORGE_MAC_AI_NODE_TASKS.md +++ b/Common/STORYFORGE_MAC_AI_NODE_TASKS.md @@ -2,13 +2,11 @@ You are responsible for the StoryForge Mac AI node. Tasks: -- Deploy FastGPT using Docker. +- Deploy StoryForge runtime services on the Mac node. - Services: - - FastGPT - - MongoDB - - PostgreSQL + pgvector - - Redis - - MinIO + - collector-service + - n8n + - cli-proxy-api - Build collector-service in Python. - Collector features: - yt-dlp video download diff --git a/MAC_NODE_CONNECTIVITY.md b/MAC_NODE_CONNECTIVITY.md index 5800a52..b531d44 100644 --- a/MAC_NODE_CONNECTIVITY.md +++ b/MAC_NODE_CONNECTIVITY.md @@ -1,6 +1,6 @@ # Mac Node Connectivity -- FastGPT 默认本机端口:`3000` - Collector Service 默认本机端口:`8081` - Local OpenAI Compatible API:`127.0.0.1:8317/v1` -- 如需通过云端访问,优先使用内网或隧道,不直接暴露 Mac 上的 FastGPT 管理接口 +- n8n 默认本机端口:`5670` +- 如需通过云端访问,优先使用内网或隧道,不直接暴露 Mac 上的本地管理接口 diff --git a/README.md b/README.md index b2621ed..5590a57 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ N8N_BASE_URL=http://127.0.0.1:5670 - 触发 `content_source_sync_pipeline` - 触发 `real_cut_pipeline` - 触发 `ai_video_pipeline` -- FastGPT 已从主流程设计中移除,不再作为运行时依赖 +- 历史 FastGPT 运行链已完成移除,当前运行时只保留 StoryForge 自身服务与外部执行引擎 ## 说明 diff --git a/TECH_ARCHITECTURE.md b/TECH_ARCHITECTURE.md index e6f37d5..2b3fecd 100644 --- a/TECH_ARCHITECTURE.md +++ b/TECH_ARCHITECTURE.md @@ -3,14 +3,14 @@ ## Core Components - Android App: 素材探索、文案生产、个人配置、管理员审批、OTA -- Collector Service: FastAPI + SQLite,负责业务流程编排 +- Collector Service: FastAPI + SQLite,负责多租户业务边界、任务状态和 Agent 数据 +- n8n: 负责异步流程编排与 webhook 触发 - Local Model API: 默认指向本机 `cli-proxy-api` -- FastGPT: 负责数据集和后续工作流扩展 -- MongoDB / PostgreSQL + pgvector / Redis / MinIO: FastGPT 运行依赖 +- NAS / 外部执行器: 负责大文件缓存、录像、剪辑和 AI 视频执行 ## Main Flow -User -> Android App -> Collector Service -> Local Model / FastGPT +User -> Android App / Web / OneLiner -> Collector Service -> Local Model / n8n / 执行引擎 ## Data Isolation @@ -23,4 +23,4 @@ User -> Android App -> Collector Service -> Local Model / FastGPT - `model_profiles` - `app_updates` -每个用户的数据通过 `user_id` 进行隔离。 +每个用户/项目的数据通过 `user_id + project_id` 进行隔离。 diff --git a/collector-service/app/__init__.py b/collector-service/app/__init__.py index 28673b9..7bcd463 100644 --- a/collector-service/app/__init__.py +++ b/collector-service/app/__init__.py @@ -1 +1 @@ -# StoryForge collector-service package +"""Collector service source overlay for legacy pyc-backed app.""" diff --git a/collector-service/app/bilibili_features.py b/collector-service/app/bilibili_features.py new file mode 100644 index 0000000..062e03e --- /dev/null +++ b/collector-service/app/bilibili_features.py @@ -0,0 +1,545 @@ +from __future__ import annotations + +import json +from typing import Any + +from fastapi import Depends, HTTPException, Query +from pydantic import BaseModel, Field + + +def _safe_json_dumps(value: Any) -> str: + return json.dumps(value, ensure_ascii=False, separators=(",", ":")) + + +def _first_non_empty(*values: Any) -> str: + for value in values: + if value is None: + continue + if isinstance(value, str): + stripped = value.strip() + if stripped: + return stripped + elif value not in ("", [], {}, ()): + return str(value) + return "" + + +class BilibiliContentSourceCreateRequest(BaseModel): + project_id: str = "" + source_kind: str = "creator_account" + platform: str = "" + handle: str = "" + source_url: str = "" + title: str = "" + local_path: str = "" + metadata: dict[str, Any] = Field(default_factory=dict) + + +class BilibiliContentSourceSyncRequest(BaseModel): + project_id: str = "" + knowledge_base_id: str = "" + assistant_id: str = "" + content_source_id: str = "" + platform: str = "" + handle: str = "" + source_url: str = "" + title: str = "" + analysis_model_profile_id: str = "" + language: str = "auto" + max_items: int = Field(default=5, ge=1, le=20) + skip_existing: bool = True + auto_trigger_analysis: bool = True + + +class BilibiliReviewCreateRequest(BaseModel): + project_id: str = "" + source_job_id: str = "" + assistant_id: str = "" + title: str = "" + platform: str = "bilibili" + content_type: str = "video" + publish_url: str = "" + published_at: str = "" + metrics: dict[str, Any] = Field(default_factory=dict) + verdict: str = "" + highlights: str = "" + next_actions: str = "" + notes: str = "" + + +class BilibiliReviewUpdateRequest(BaseModel): + title: str | None = None + platform: str | None = None + content_type: str | None = None + publish_url: str | None = None + published_at: str | None = None + metrics: dict[str, Any] | None = None + verdict: str | None = None + highlights: str | None = None + next_actions: str | None = None + notes: str | None = None + assistant_id: str | None = None + + +def _is_youtube_url(source_url: str) -> bool: + lowered = source_url.strip().lower() + return "youtube.com" in lowered or "youtu.be" in lowered + + +def _resolve_bilibili_platform(legacy: Any, platform: str, source_url: str = "") -> str: + if _is_youtube_url(source_url): + raise HTTPException(status_code=400, detail="YouTube sources are not supported in the bilibili routes") + + inferred = legacy.infer_platform_from_url(source_url) if source_url.strip() else "" + normalized = legacy.normalize_platform_slug(platform, allow_blank=True) + if not normalized: + normalized = inferred or "bilibili" + + if normalized == "youtube": + raise HTTPException(status_code=400, detail="YouTube sources are not supported in the bilibili routes") + if inferred and inferred not in {"bilibili", "youtube"} and not platform.strip(): + raise HTTPException( + status_code=400, + detail=f"Bilibili routes only accept bilibili sources, not {inferred}", + ) + if normalized != "bilibili": + raise HTTPException( + status_code=400, + detail=f"Bilibili routes only accept bilibili sources, not {normalized}", + ) + return "bilibili" + + +def _content_source_query(legacy: Any, account_id: str, project_id: str | None = None) -> tuple[str, tuple[Any, ...]]: + clauses = ["user_id = ?", "platform = 'bilibili'"] + params: list[Any] = [account_id] + if project_id is not None: + normalized_project = project_id.strip() + if normalized_project: + clauses.append("project_id = ?") + params.append(normalized_project) + else: + clauses.append("(project_id IS NULL OR project_id = '')") + sql = f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY created_at DESC" + return sql, tuple(params) + + +def _job_query( + source_id: str | None = None, + project_id: str | None = None, + limit: int = 50, +) -> tuple[str, tuple[Any, ...]]: + clauses = ["j.user_id = ?", "cs.platform = 'bilibili'"] + params: list[Any] = [] + if source_id: + clauses.append("j.content_source_id = ?") + params.append(source_id) + if project_id is not None: + normalized_project = project_id.strip() + if normalized_project: + clauses.append("j.project_id = ?") + params.append(normalized_project) + else: + clauses.append("(j.project_id IS NULL OR j.project_id = '')") + sql = ( + "SELECT j.* " + "FROM jobs j " + "JOIN content_sources cs ON cs.id = j.content_source_id " + f"WHERE {' AND '.join(clauses)} " + "ORDER BY j.created_at DESC " + "LIMIT ?" + ) + params = [*params] + return sql, tuple([*params, limit]) + + +def _review_query(project_id: str | None = None, limit: int = 50) -> tuple[str, tuple[Any, ...]]: + clauses = ["r.user_id = ?", "r.platform = 'bilibili'"] + params: list[Any] = [] + if project_id is not None: + normalized_project = project_id.strip() + if normalized_project: + clauses.append("r.project_id = ?") + params.append(normalized_project) + else: + clauses.append("(r.project_id IS NULL OR r.project_id = '')") + sql = ( + "SELECT r.* " + "FROM publish_reviews r " + f"WHERE {' AND '.join(clauses)} " + "ORDER BY COALESCE(NULLIF(r.published_at, ''), r.created_at) DESC, r.created_at DESC " + "LIMIT ?" + ) + return sql, tuple([*params, limit]) + + +def _build_sync_result(legacy: Any, row: dict[str, Any], content_source: dict[str, Any]) -> dict[str, Any]: + payload = legacy.job_payload(row) + payload["content_source"] = legacy.content_source_payload(content_source) + return payload + + +def register_bilibili_routes(app: Any, legacy: Any) -> None: + def now() -> str: + return legacy.utc_now() + + def make_id(prefix: str) -> str: + return legacy.make_id(prefix) + + def resolve_project(account: dict[str, Any], project_id: str) -> dict[str, Any]: + return legacy.resolve_target_project(account["id"], project_id or None, username=account["username"]) + + def resolve_kb(account: dict[str, Any], kb_id: str, project_id: str) -> dict[str, Any]: + return legacy.resolve_target_kb(account["id"], kb_id or None, project_id, username=account["username"]) + + def resolve_assistant(account: dict[str, Any], assistant_id: str, project_id: str) -> dict[str, Any] | None: + return legacy.resolve_target_assistant(account["id"], assistant_id or None, project_id) + + def create_or_update_source( + *, + account: dict[str, Any], + request: BilibiliContentSourceCreateRequest, + sync_request: BilibiliContentSourceSyncRequest | None = None, + ) -> dict[str, Any]: + source_url = _first_non_empty(request.source_url, sync_request.source_url if sync_request else "") + _resolve_bilibili_platform(legacy, request.platform or (sync_request.platform if sync_request else ""), source_url) + + project = resolve_project(account, request.project_id or (sync_request.project_id if sync_request else "")) + title = _first_non_empty(request.title, sync_request.title if sync_request else "", request.handle, source_url) + metadata: dict[str, Any] = dict(request.metadata) + metadata.setdefault("platform", "bilibili") + if sync_request: + metadata.update( + { + "sync_mode": "recent_uploads", + "max_items": sync_request.max_items, + "analysis_model_profile_id": sync_request.analysis_model_profile_id, + } + ) + + return legacy.create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind=(request.source_kind or "creator_account").strip(), + platform="bilibili", + handle=request.handle.strip(), + source_url=source_url.strip(), + title=title.strip(), + local_path=request.local_path.strip(), + metadata=metadata, + ) + + async def sync_source( + *, + account: dict[str, Any], + request: BilibiliContentSourceSyncRequest, + content_source: dict[str, Any] | None = None, + ) -> dict[str, Any]: + source_row = content_source + if request.content_source_id.strip(): + source_row = legacy.load_owned_content_source(request.content_source_id.strip(), account["id"]) + + source_url = _first_non_empty( + request.source_url, + (source_row or {}).get("source_url", ""), + ) + _resolve_bilibili_platform( + legacy, + request.platform or (source_row or {}).get("platform", ""), + source_url, + ) + + project_id = request.project_id or (source_row or {}).get("project_id", "") + project = resolve_project(account, project_id) + kb = resolve_kb(account, request.knowledge_base_id, project["id"]) + assistant = resolve_assistant(account, request.assistant_id, project["id"]) + source_title = _first_non_empty( + request.title, + (source_row or {}).get("title", ""), + request.handle, + source_url, + ) + + if source_row and source_row.get("project_id") and source_row["project_id"] != project["id"]: + raise HTTPException(status_code=400, detail="Content source does not belong to the target project") + + if not source_row: + source_row = create_or_update_source( + account=account, + request=BilibiliContentSourceCreateRequest( + project_id=project["id"], + source_kind="creator_account", + platform="bilibili", + handle=request.handle.strip(), + source_url=source_url, + title=source_title, + local_path="", + metadata={ + "sync_mode": "recent_uploads", + "max_items": request.max_items, + "analysis_model_profile_id": request.analysis_model_profile_id, + }, + ), + sync_request=request, + ) + + job_row = legacy.create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="content_source_sync", + line_type="content_source_sync", + workflow_key="content_source_sync_pipeline", + title=f"{source_title} 内容源同步", + language=request.language, + source_url=source_url, + assistant_id=(assistant or {}).get("id"), + content_source_id=source_row["id"], + artifacts={ + "platform": "bilibili", + "source_kind": source_row.get("source_kind", "creator_account"), + "source_title": source_title, + "source_url": source_url, + "max_items": request.max_items, + "skip_existing": request.skip_existing, + "auto_trigger_analysis": request.auto_trigger_analysis, + "analysis_model_profile_id": request.analysis_model_profile_id, + }, + analysis_model_profile_id=request.analysis_model_profile_id, + ) + legacy.update_content_source_metadata( + source_row["id"], + { + "platform": "bilibili", + "last_sync_job_id": job_row["id"], + "last_sync_requested_at": now(), + "max_items": request.max_items, + "analysis_model_profile_id": request.analysis_model_profile_id, + }, + ) + return _build_sync_result(legacy, await legacy.trigger_orchestrated_job(job_row), source_row) + + @app.get("/v2/bilibili/content-sources") + def list_bilibili_content_sources( + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + sql, params = _content_source_query(legacy, account["id"], project_id) + return [legacy.content_source_payload(row) for row in legacy.db.fetch_all(sql, params)] + + @app.post("/v2/bilibili/content-sources") + def create_bilibili_content_source( + request: BilibiliContentSourceCreateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + row = create_or_update_source(account=account, request=request) + return legacy.content_source_payload(row) + + @app.get("/v2/bilibili/content-sources/{source_id}") + def get_bilibili_content_source( + source_id: str, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + row = legacy.load_owned_content_source(source_id, account["id"]) + if row.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili content source not found") + return legacy.content_source_payload(row) + + @app.post("/v2/bilibili/content-sources/{source_id}/sync") + async def sync_bilibili_content_source( + source_id: str, + request: BilibiliContentSourceSyncRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + row = legacy.load_owned_content_source(source_id, account["id"]) + if row.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili content source not found") + return await sync_source(account=account, request=request, content_source=row) + + @app.post("/v2/bilibili/pipelines/content-source-sync") + async def create_bilibili_content_source_sync_job( + request: BilibiliContentSourceSyncRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + return await sync_source(account=account, request=request) + + @app.get("/v2/bilibili/content-sources/{source_id}/jobs") + def list_bilibili_content_source_jobs( + source_id: str, + limit: int = Query(default=50, ge=1, le=200), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + row = legacy.load_owned_content_source(source_id, account["id"]) + if row.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili content source not found") + sql, params = _job_query(source_id=source_id, limit=limit) + rows = legacy.db.fetch_all(sql, (account["id"], *params)) + return [legacy.job_payload(item) for item in rows] + + @app.get("/v2/bilibili/jobs") + def list_bilibili_jobs( + project_id: str | None = Query(default=None), + content_source_id: str | None = Query(default=None), + limit: int = Query(default=50, ge=1, le=200), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + if content_source_id: + row = legacy.load_owned_content_source(content_source_id.strip(), account["id"]) + if row.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili content source not found") + sql, params = _job_query(source_id=content_source_id.strip() if content_source_id else None, project_id=project_id, limit=limit) + rows = legacy.db.fetch_all(sql, (account["id"], *params)) + return [legacy.job_payload(item) for item in rows] + + @app.get("/v2/bilibili/jobs/{job_id}") + def get_bilibili_job( + job_id: str, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + row = legacy.load_owned_job(job_id, account["id"]) + if row.get("content_source_id"): + source = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ? AND user_id = ?", (row["content_source_id"], account["id"])) + if not source or source.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili job not found") + return legacy.job_context_payload(row) + + @app.get("/v2/bilibili/reviews") + def list_bilibili_reviews( + project_id: str | None = Query(default=None), + limit: int = Query(default=50, ge=1, le=200), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + sql, params = _review_query(project_id=project_id, limit=limit) + rows = legacy.db.fetch_all(sql, (account["id"], *params)) + return [legacy.review_payload(item) for item in rows] + + @app.get("/v2/bilibili/reviews/{review_id}") + def get_bilibili_review( + review_id: str, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + row = legacy.load_owned_review(review_id, account["id"]) + if row.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili review not found") + return legacy.review_payload(row) + + @app.post("/v2/bilibili/reviews") + def create_bilibili_review( + request: BilibiliReviewCreateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_job = None + if request.source_job_id.strip(): + source_job = legacy.load_owned_job(request.source_job_id.strip(), account["id"]) + if source_job.get("content_source_id"): + source = legacy.db.fetch_one( + "SELECT * FROM content_sources WHERE id = ? AND user_id = ?", + (source_job["content_source_id"], account["id"]), + ) + if not source or source.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili source job not found") + normalized_platform = _resolve_bilibili_platform(legacy, request.platform, source_job.get("source_url", "") if source_job else "") + requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "") + project = resolve_project(account, requested_project_id) + assistant = resolve_assistant(account, request.assistant_id, project["id"]) + review_id = make_id("review") + title = _first_non_empty(request.title, source_job.get("title", "") if source_job else "", f"{project['name']} 复盘") + timestamp = now() + legacy.db.execute( + """ + INSERT INTO publish_reviews ( + id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, + publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + review_id, + account["id"], + project["id"], + source_job["id"] if source_job else None, + (assistant or {}).get("id") or None, + title, + normalized_platform, + request.content_type.strip() or "video", + request.publish_url.strip(), + request.published_at.strip(), + _safe_json_dumps(request.metrics), + request.verdict.strip(), + request.highlights.strip(), + request.next_actions.strip(), + request.notes.strip(), + timestamp, + timestamp, + ), + ) + row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) + return legacy.review_payload(row) + + @app.patch("/v2/bilibili/reviews/{review_id}") + def update_bilibili_review( + review_id: str, + request: BilibiliReviewUpdateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + current = legacy.load_owned_review(review_id, account["id"]) + if current.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili review not found") + assistant_id = current.get("assistant_id") or None + if request.assistant_id is not None: + assistant = resolve_assistant(account, request.assistant_id or "", current.get("project_id", "")) + assistant_id = (assistant or {}).get("id") or None + if request.platform is not None: + _resolve_bilibili_platform(legacy, request.platform, current.get("publish_url", "")) + legacy.db.execute( + """ + UPDATE publish_reviews + SET title = ?, platform = ?, content_type = ?, publish_url = ?, published_at = ?, + metrics_json = ?, verdict = ?, highlights = ?, next_actions = ?, notes = ?, + assistant_id = ?, updated_at = ? + WHERE id = ? AND user_id = ? + """, + ( + request.title if request.title is not None else current.get("title", ""), + "bilibili", + request.content_type if request.content_type is not None else current.get("content_type", "video"), + request.publish_url if request.publish_url is not None else current.get("publish_url", ""), + request.published_at if request.published_at is not None else current.get("published_at", ""), + _safe_json_dumps(request.metrics if request.metrics is not None else legacy.parse_json_object(current.get("metrics_json") or "{}")), + request.verdict if request.verdict is not None else current.get("verdict", ""), + request.highlights if request.highlights is not None else current.get("highlights", ""), + request.next_actions if request.next_actions is not None else current.get("next_actions", ""), + request.notes if request.notes is not None else current.get("notes", ""), + assistant_id, + now(), + review_id, + account["id"], + ), + ) + row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) + return legacy.review_payload(row) + + @app.get("/v2/bilibili/content-sources/{source_id}/reviews") + def list_bilibili_content_source_reviews( + source_id: str, + limit: int = Query(default=50, ge=1, le=200), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + row = legacy.load_owned_content_source(source_id, account["id"]) + if row.get("platform") != "bilibili": + raise HTTPException(status_code=404, detail="Bilibili content source not found") + rows = legacy.db.fetch_all( + """ + SELECT r.* + FROM publish_reviews r + JOIN jobs j ON j.id = r.source_job_id + WHERE r.user_id = ? AND r.platform = 'bilibili' AND j.content_source_id = ? + ORDER BY COALESCE(NULLIF(r.published_at, ''), r.created_at) DESC, r.created_at DESC + LIMIT ? + """, + (account["id"], source_id, limit), + ) + return [legacy.review_payload(item) for item in rows] + + +__all__ = ["register_bilibili_routes"] diff --git a/collector-service/app/core_main.py b/collector-service/app/core_main.py new file mode 100644 index 0000000..2f66c13 --- /dev/null +++ b/collector-service/app/core_main.py @@ -0,0 +1,4833 @@ +from __future__ import annotations + +import asyncio +import base64 +import httpx +import json +import mimetypes +import os +import re +import secrets +import shutil +import socket +import subprocess +import uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import Any +from urllib.parse import quote, urljoin, urlparse + +from fastapi import Body, Depends, FastAPI, File, Form, Header, HTTPException, Query, UploadFile +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse, StreamingResponse +from pydantic import BaseModel, Field + +from .database import Database, utc_now +from .integrations import AsrHttpClient, CutVideoClient, HuobaoDramaClient, N8NClient +from .openai_compat import OpenAICompatClient + +BASE_DIR = Path(__file__).resolve().parents[2] +DATA_DIR = Path(os.getenv("DATA_DIR", BASE_DIR / "data" / "collector")) +DOWNLOADS_DIR = Path(os.getenv("DOWNLOADS_DIR", str(DATA_DIR / "downloads"))) +JOBS_DIR = Path(os.getenv("JOBS_DIR", str(DATA_DIR / "jobs"))) +MODELS_DIR = Path(os.getenv("MODELS_DIR", str(DATA_DIR / "models"))) +DB_PATH = os.getenv("DATABASE_PATH", str(DATA_DIR / "storyforge.db")) +DEFAULT_EXTERNAL_BASE_URL = os.getenv("DEFAULT_EXTERNAL_BASE_URL", "https://test.hyzq.net/storyforge") +LOCAL_OPENAI_BASE_URL = os.getenv("LOCAL_OPENAI_BASE_URL", "http://127.0.0.1:8317/v1") +LOCAL_OPENAI_MODEL = os.getenv("LOCAL_OPENAI_MODEL", "GLM-5") +LOCAL_OPENAI_API_KEY = os.getenv("LOCAL_OPENAI_API_KEY", "") +YTDLP_BIN = os.getenv("YTDLP_BIN", "yt-dlp") +FFMPEG_BIN = os.getenv("FFMPEG_BIN", "ffmpeg") +WHISPER_BIN = os.getenv("WHISPER_BIN", "") +WHISPER_MODEL = os.getenv("WHISPER_MODEL", str(MODELS_DIR / "ggml-base.en.bin")) +ASR_HTTP_BASE_URL = os.getenv("ASR_HTTP_BASE_URL", "http://127.0.0.1:8088") +ASR_HTTP_TRANSCRIBE_PATH = os.getenv("ASR_HTTP_TRANSCRIBE_PATH", "/transcribe") +ASR_HTTP_FIELD_NAME = os.getenv("ASR_HTTP_FIELD_NAME", "wav") +ASR_HTTP_TIMEOUT_SEC = float(os.getenv("ASR_HTTP_TIMEOUT_SEC", "120")) +N8N_BASE_URL = os.getenv("N8N_BASE_URL", "http://127.0.0.1:5670") +N8N_ANALYSIS_WEBHOOK_PATH = os.getenv("N8N_ANALYSIS_WEBHOOK_PATH", "/webhook/storyforge-analysis") +N8N_REAL_CUT_WEBHOOK_PATH = os.getenv("N8N_REAL_CUT_WEBHOOK_PATH", "/webhook/storyforge-real-cut") +N8N_AI_VIDEO_WEBHOOK_PATH = os.getenv("N8N_AI_VIDEO_WEBHOOK_PATH", "/webhook/storyforge-ai-video") +N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH = os.getenv("N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH", "/webhook/storyforge-content-source-sync") +ORCHESTRATOR_SHARED_SECRET = os.getenv("ORCHESTRATOR_SHARED_SECRET", "") +CUTVIDEO_BASE_URL = os.getenv("CUTVIDEO_BASE_URL", "http://192.168.31.18:7860") +CUTVIDEO_API_KEY = os.getenv("CUTVIDEO_API_KEY", "") +HUOBAO_BASE_URL = os.getenv("HUOBAO_BASE_URL", "http://127.0.0.1:5678") +LIVE_RECORDER_BASE_URL = os.getenv("LIVE_RECORDER_BASE_URL", "http://192.168.31.188:19106") +CUTVIDEO_BASE_CONFIG = os.getenv("CUTVIDEO_BASE_CONFIG", "example.job.yaml") +CUTVIDEO_POLL_INTERVAL_SEC = int(os.getenv("CUTVIDEO_POLL_INTERVAL_SEC", "10")) +CUTVIDEO_MAX_WAIT_SEC = int(os.getenv("CUTVIDEO_MAX_WAIT_SEC", "1800")) +CUTVIDEO_UPLOAD_TIMEOUT_SEC = int(os.getenv("CUTVIDEO_UPLOAD_TIMEOUT_SEC", "1800")) +HUOBAO_POLL_INTERVAL_SEC = int(os.getenv("HUOBAO_POLL_INTERVAL_SEC", "10")) +HUOBAO_MAX_WAIT_SEC = int(os.getenv("HUOBAO_MAX_WAIT_SEC", "900")) + +for path in (DATA_DIR, DOWNLOADS_DIR, JOBS_DIR, MODELS_DIR): + path.mkdir(parents=True, exist_ok=True) + +db = Database(DB_PATH) +openai_client = OpenAICompatClient() +asr_http_client = AsrHttpClient( + base_url=ASR_HTTP_BASE_URL, + transcribe_path=ASR_HTTP_TRANSCRIBE_PATH, + field_name=ASR_HTTP_FIELD_NAME, + timeout=ASR_HTTP_TIMEOUT_SEC, +) +n8n_client = N8NClient( + base_url=N8N_BASE_URL, + workflow_paths={ + "analysis_pipeline": N8N_ANALYSIS_WEBHOOK_PATH, + "real_cut_pipeline": N8N_REAL_CUT_WEBHOOK_PATH, + "ai_video_pipeline": N8N_AI_VIDEO_WEBHOOK_PATH, + "content_source_sync_pipeline": N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH, + }, + shared_secret=ORCHESTRATOR_SHARED_SECRET, +) +cutvideo_client = CutVideoClient( + base_url=CUTVIDEO_BASE_URL, + api_key=CUTVIDEO_API_KEY, + upload_timeout=CUTVIDEO_UPLOAD_TIMEOUT_SEC, +) +huobao_client = HuobaoDramaClient(base_url=HUOBAO_BASE_URL) + +app = FastAPI(title="StoryForge Collector Service", version="0.2.0") +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +class RegisterAccountRequest(BaseModel): + username: str + password: str + display_name: str = "" + + +class LoginRequest(BaseModel): + username: str + password: str + + +class ModelProfileRequest(BaseModel): + name: str + base_url: str + api_key: str = "" + model_name: str + is_default: bool = False + + +class PreferredModelRequest(BaseModel): + model_profile_id: str + + +class KnowledgeBaseCreateRequest(BaseModel): + name: str + description: str = "" + project_id: str = "" + + +class ExploreVideoLinkRequest(BaseModel): + video_url: str + title: str | None = None + project_id: str | None = None + knowledge_base_id: str | None = None + assistant_id: str | None = None + analysis_model_profile_id: str | None = None + language: str = "auto" + + +class ExploreTextRequest(BaseModel): + title: str + content: str + project_id: str | None = None + knowledge_base_id: str | None = None + assistant_id: str | None = None + analysis_model_profile_id: str | None = None + + +class AssistantCreateRequest(BaseModel): + name: str + description: str = "" + system_prompt: str = "" + generation_goal: str = "" + knowledge_base_ids: list[str] = Field(default_factory=list) + project_id: str = "" + model_profile_id: str = "" + + +class AssistantUpdateRequest(BaseModel): + name: str | None = None + description: str | None = None + system_prompt: str | None = None + generation_goal: str | None = None + knowledge_base_ids: list[str] | None = None + project_id: str | None = None + model_profile_id: str | None = None + + +class GenerateCopyRequest(BaseModel): + brief: str + platform: str = "douyin" + audience: str = "创业者" + extra_requirements: str = "" + knowledge_base_ids: list[str] = Field(default_factory=list) + + +class PublishAppUpdateRequest(BaseModel): + platform: str = "android" + channel: str = "stable" + versionCode: int + versionName: str + minSupportedCode: int + apkUrl: str + apkSha256: str = "" + notes: str = "" + forceUpdate: bool = False + isActive: bool = True + + +class ProjectCreateRequest(BaseModel): + name: str + description: str = "" + + +class ContentSourceCreateRequest(BaseModel): + project_id: str = "" + source_kind: str + platform: str = "" + handle: str = "" + source_url: str = "" + title: str = "" + local_path: str = "" + metadata: dict[str, Any] = Field(default_factory=dict) + + +class ContentSourceSyncRequest(BaseModel): + project_id: str = "" + knowledge_base_id: str = "" + assistant_id: str = "" + content_source_id: str = "" + platform: str = "" + handle: str = "" + source_url: str = "" + title: str = "" + analysis_model_profile_id: str = "" + language: str = "auto" + max_items: int = Field(default=5, ge=1, le=20) + skip_existing: bool = True + auto_trigger_analysis: bool = True + + +class RealCutJobRequest(BaseModel): + project_id: str = "" + title: str + input_dir: str = "" + source_job_id: str = "" + base_config: str = "" + objective: str = "保留高信息密度片段,输出适合短视频平台的粗剪结果" + target_duration_sec: int = 60 + target_aspect_ratio: str = "9:16" + ideal_segment_duration_sec: int = 8 + max_segment_duration_sec: int = 18 + transcript_backend: str = "auto" + transcript_device: str = "cuda" + review_enabled: bool = False + dry_run: bool = False + + +class AiVideoJobRequest(BaseModel): + project_id: str = "" + assistant_id: str = "" + knowledge_base_id: str = "" + source_job_id: str = "" + title: str + brief: str + style: str = "realistic" + shots: int = 4 + image_provider: str = "openai" + image_model: str = "" + video_provider: str = "doubao" + video_model: str = "" + aspect_ratio: str = "9:16" + duration: int = 5 + + +class ReviewCreateRequest(BaseModel): + project_id: str = "" + source_job_id: str = "" + assistant_id: str = "" + title: str = "" + platform: str = "douyin" + content_type: str = "video" + publish_url: str = "" + published_at: str = "" + metrics: dict[str, Any] = Field(default_factory=dict) + verdict: str = "" + highlights: str = "" + next_actions: str = "" + notes: str = "" + + +class ReviewUpdateRequest(BaseModel): + title: str | None = None + platform: str | None = None + content_type: str | None = None + publish_url: str | None = None + published_at: str | None = None + metrics: dict[str, Any] | None = None + verdict: str | None = None + highlights: str | None = None + next_actions: str | None = None + notes: str | None = None + + +class LiveRecorderImportRequest(BaseModel): + raw: str + + +class LiveRecorderEnabledRequest(BaseModel): + line_no: int + enabled: bool + assistant_id: str | None = None + + +class LiveRecorderSourceCreateRequest(BaseModel): + project_id: str = "" + assistant_id: str = "" + platform: str = "" + source_url: str + title: str = "" + quality: str = "原画" + enabled: bool = True + + +class LiveRecorderSourceUpdateRequest(BaseModel): + project_id: str | None = None + assistant_id: str | None = None + title: str | None = None + quality: str | None = None + enabled: bool | None = None + + +class InternalStepRequest(BaseModel): + job_id: str = "" + jobId: str = "" + payload: dict[str, Any] = Field(default_factory=dict) + + +class JobStatusUpdateRequest(BaseModel): + status: str + error: str = "" + provider_name: str = "" + provider_task_id: str = "" + artifacts: dict[str, Any] = Field(default_factory=dict) + result: dict[str, Any] = Field(default_factory=dict) + + +def now_ts() -> int: + return int(datetime.now(timezone.utc).timestamp()) + + +def make_id(prefix: str) -> str: + return f"{prefix}_{uuid.uuid4().hex}" + + +def hash_password(password: str, salt: str) -> str: + import hashlib + + return hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt.encode("utf-8"), 120_000).hex() + + +def create_password_hash(password: str) -> tuple[str, str]: + salt = secrets.token_hex(16) + return hash_password(password, salt), salt + + +def verify_password(password: str, hashed: str, salt: str) -> bool: + return secrets.compare_digest(hash_password(password, salt), hashed) + + +def mask_api_key(value: str) -> str: + if not value: + return "" + if len(value) <= 8: + return "*" * len(value) + return f"{value[:4]}***{value[-4:]}" + + +def normalize_model_profile(row: dict[str, Any]) -> dict[str, Any]: + return { + "id": row["id"], + "owner_account_id": row.get("owner_account_id"), + "name": row["name"], + "provider": row["provider"], + "base_url": row["base_url"], + "api_key_masked": mask_api_key(row.get("api_key", "")), + "model_name": row["model_name"], + "is_system": bool(row.get("is_system", 0)), + "is_default": bool(row.get("is_default", 0)), + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def normalize_account(row: dict[str, Any]) -> dict[str, Any]: + return { + "id": row["id"], + "username": row["username"], + "display_name": row["display_name"], + "role": row["role"], + "approval_status": row["approval_status"], + "approved_by": row.get("approved_by"), + "approved_at": row.get("approved_at"), + "preferred_analysis_model_id": row.get("preferred_analysis_model_id") or "", + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def model_profile_for_account(account_id: str, requested_id: str | None) -> dict[str, Any]: + if requested_id: + row = db.fetch_one( + "SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)", + (requested_id, account_id), + ) + if row: + return row + account = db.fetch_one("SELECT preferred_analysis_model_id FROM accounts WHERE id = ?", (account_id,)) + preferred_id = (account or {}).get("preferred_analysis_model_id") or "" + if preferred_id: + row = db.fetch_one( + "SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)", + (preferred_id, account_id), + ) + if row: + return row + row = db.fetch_one("SELECT * FROM model_profiles WHERE is_default = 1 ORDER BY is_system DESC, created_at ASC LIMIT 1") + if not row: + raise HTTPException(status_code=500, detail="No model profile configured") + return row + + +def project_payload(row: dict[str, Any]) -> dict[str, Any]: + return { + "id": row["id"], + "user_id": row["user_id"], + "name": row["name"], + "description": row.get("description", ""), + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def content_source_payload(row: dict[str, Any]) -> dict[str, Any]: + metadata = row.get("metadata_json") or "{}" + try: + metadata_map = json.loads(metadata) + except json.JSONDecodeError: + metadata_map = {} + return { + "id": row["id"], + "user_id": row["user_id"], + "project_id": row.get("project_id", ""), + "source_kind": row["source_kind"], + "platform": row.get("platform", ""), + "handle": row.get("handle", ""), + "source_url": row.get("source_url", ""), + "title": row.get("title", ""), + "local_path": row.get("local_path", ""), + "metadata": metadata_map, + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +LIVE_RECORDER_MANAGED_PREFIX = "sfsrc_" +LIVE_RECORDER_QUALITY_RANKS = { + "流畅": 1, + "标清": 2, + "高清": 3, + "超清": 4, + "蓝光": 5, + "原画": 6, +} + + +def normalize_live_recorder_quality(value: str | None) -> str: + normalized = str(value or "").strip() + if normalized in LIVE_RECORDER_QUALITY_RANKS: + return normalized + return "原画" + + +def storage_token(value: str | None, fallback: str) -> str: + normalized = re.sub(r"[^a-zA-Z0-9_-]+", "-", str(value or "").strip()).strip("-") + return normalized or fallback + + +def job_storage_dir(*, account_id: str, project_id: str | None, job_id: str) -> Path: + project_token = storage_token(project_id, "default-project") + return JOBS_DIR / storage_token(account_id, "anonymous") / project_token / storage_token(job_id, "job") + + +def legacy_job_storage_dir(job_id: str) -> Path: + return JOBS_DIR / storage_token(job_id, "job") + + +def job_account_root(account_id: str) -> Path: + return JOBS_DIR / storage_token(account_id, "anonymous") + + +def job_project_root(account_id: str, project_id: str | None) -> Path: + return job_account_root(account_id) / storage_token(project_id, "default-project") + + +def tenant_download_root(account_id: str, project_id: str | None) -> Path: + return DOWNLOADS_DIR / storage_token(account_id, "anonymous") / storage_token(project_id, "default-project") + + +def download_account_root(account_id: str) -> Path: + return DOWNLOADS_DIR / storage_token(account_id, "anonymous") + + +def download_job_root(account_id: str, project_id: str | None, job_id: str) -> Path: + return tenant_download_root(account_id, project_id) / storage_token(job_id, "job") + + +def download_relative_path(path: Path) -> str: + try: + return path.resolve().relative_to(DOWNLOADS_DIR.resolve()).as_posix() + except Exception: + return "" + + +def download_content_url(relative_path: str) -> str: + normalized = relative_path.strip().lstrip("/") + if not normalized: + return "" + return f"/v2/storage/artifacts/{encode_storage_artifact_id('downloads', normalized)}/content" + + +def download_content_url_for_path(path: Path) -> str: + return download_content_url(download_relative_path(path)) + + +def write_json_snapshot(target_path: Path, payload: Any) -> None: + target_path.parent.mkdir(parents=True, exist_ok=True) + target_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") + + +def copy_file_if_needed(source_path: Path, target_path: Path) -> bool: + source_resolved = source_path.resolve() + try: + if target_path.exists(): + target_resolved = target_path.resolve() + if target_resolved == source_resolved: + return False + source_stat = source_path.stat() + target_stat = target_path.stat() + if ( + source_stat.st_size == target_stat.st_size + and int(source_stat.st_mtime_ns) == int(target_stat.st_mtime_ns) + ): + return False + except OSError: + pass + target_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source_path, target_path) + return True + + +def make_archive_item(label: str, source_path: Path, archived_path: Path, *, copied: bool) -> dict[str, Any]: + payload = { + "label": label, + "source_path": str(source_path), + "archived_path": str(archived_path), + "copied": copied, + } + try: + payload.update(build_storage_artifact_payload(archived_path, base_dir=DOWNLOADS_DIR, kind="downloads")) + except Exception: + relative_path = download_relative_path(archived_path) + try: + size_bytes = archived_path.stat().st_size + except OSError: + size_bytes = 0 + payload.update( + { + "kind": "downloads", + "name": archived_path.name, + "relative_path": relative_path, + "path": str(archived_path), + "size_bytes": size_bytes, + "updated_at": "", + "mime_type": mimetypes.guess_type(archived_path.name)[0] or "application/octet-stream", + "content_url": download_content_url(relative_path), + } + ) + return payload + + +def sync_directory_to_archive( + source_dir: Path, + target_dir: Path, + *, + label_prefix: str, + seen_targets: set[str], +) -> list[dict[str, Any]]: + items: list[dict[str, Any]] = [] + if not source_dir.exists() or not source_dir.is_dir(): + return items + for source_path in sorted(source_dir.rglob("*")): + if not source_path.is_file(): + continue + relative_name = source_path.relative_to(source_dir) + target_path = target_dir / relative_name + target_key = str(target_path.resolve()) + if target_key in seen_targets: + continue + copied = copy_file_if_needed(source_path, target_path) + seen_targets.add(target_key) + items.append(make_archive_item(f"{label_prefix}/{relative_name.as_posix()}", source_path, target_path, copied=copied)) + return items + + +def looks_like_external_url(value: str) -> bool: + text = str(value or "").strip().lower() + return text.startswith("http://") or text.startswith("https://") + + +def existing_local_path(value: Any) -> Path | None: + text = str(value or "").strip() + if not text or looks_like_external_url(text): + return None + if "/" not in text and "\\" not in text: + return None + candidate = Path(os.path.expandvars(os.path.expanduser(text))) + if not candidate.is_absolute(): + candidate = (BASE_DIR / candidate).resolve() + else: + candidate = candidate.resolve() + return candidate if candidate.exists() else None + + +def iter_payload_paths(value: Any, prefix: str = "") -> list[dict[str, Any]]: + items: list[dict[str, Any]] = [] + if isinstance(value, dict): + for key, child in value.items(): + if key == "download_archive": + continue + child_prefix = f"{prefix}.{key}" if prefix else str(key) + items.extend(iter_payload_paths(child, child_prefix)) + elif isinstance(value, list): + for index, child in enumerate(value): + child_prefix = f"{prefix}[{index}]" if prefix else f"[{index}]" + items.extend(iter_payload_paths(child, child_prefix)) + else: + candidate = existing_local_path(value) + if candidate is not None: + items.append({"label": prefix or "value", "path": candidate}) + return items + + +def iter_external_links(value: Any, prefix: str = "") -> list[dict[str, str]]: + items: list[dict[str, str]] = [] + if isinstance(value, dict): + for key, child in value.items(): + if key == "download_archive": + continue + child_prefix = f"{prefix}.{key}" if prefix else str(key) + items.extend(iter_external_links(child, child_prefix)) + elif isinstance(value, list): + for index, child in enumerate(value): + child_prefix = f"{prefix}[{index}]" if prefix else f"[{index}]" + items.extend(iter_external_links(child, child_prefix)) + else: + text = str(value or "").strip() + if looks_like_external_url(text): + items.append({"label": prefix or "value", "url": text}) + return items + + +def archive_target_for_label(files_root: Path, label: str, source_path: Path) -> Path: + suffix = source_path.suffix or "" + stem = storage_token(label, source_path.stem or "file") + return files_root / "linked" / f"{stem}{suffix}" + + +def materialize_job_download_archive( + row: dict[str, Any], + *, + artifacts: dict[str, Any], + result: dict[str, Any], +) -> dict[str, Any]: + archive_root = download_job_root(row.get("user_id", ""), row.get("project_id", ""), row["id"]) + files_root = archive_root / "files" + files_root.mkdir(parents=True, exist_ok=True) + + seen_targets: set[str] = set() + items: list[dict[str, Any]] = [] + job_dir = job_storage_dir( + account_id=row.get("user_id", ""), + project_id=row.get("project_id", ""), + job_id=row["id"], + ) + if job_dir.exists() and job_dir.is_dir(): + items.extend(sync_directory_to_archive(job_dir, files_root / "job", label_prefix="job", seen_targets=seen_targets)) + + job_dir_resolved = job_dir.resolve() if job_dir.exists() else None + downloads_root_resolved = DOWNLOADS_DIR.resolve() + for source in iter_payload_paths({"artifacts": artifacts, "result": result}): + source_path = source["path"] + label = source["label"] + try: + source_resolved = source_path.resolve() + except OSError: + continue + if source_path.is_file(): + if str(source_resolved).startswith(str(downloads_root_resolved)): + target_path = source_path + target_key = str(target_path.resolve()) + if target_key in seen_targets: + continue + seen_targets.add(target_key) + items.append(make_archive_item(label, source_path, target_path, copied=False)) + continue + if job_dir_resolved is not None and source_resolved.is_relative_to(job_dir_resolved): + target_path = files_root / "job" / source_resolved.relative_to(job_dir_resolved) + target_key = str(target_path.resolve()) + if target_key in seen_targets: + continue + copied = copy_file_if_needed(source_path, target_path) + seen_targets.add(target_key) + items.append(make_archive_item(label, source_path, target_path, copied=copied)) + continue + target_path = archive_target_for_label(files_root, label, source_path) + target_key = str(target_path.resolve()) + if target_key in seen_targets: + continue + copied = copy_file_if_needed(source_path, target_path) + seen_targets.add(target_key) + items.append(make_archive_item(label, source_path, target_path, copied=copied)) + elif source_path.is_dir(): + if job_dir_resolved is not None and source_resolved.is_relative_to(job_dir_resolved): + continue + target_dir = files_root / "dirs" / storage_token(label, source_path.name or "dir") + items.extend(sync_directory_to_archive(source_path, target_dir, label_prefix=label, seen_targets=seen_targets)) + + unique_links: list[dict[str, str]] = [] + seen_urls: set[str] = set() + for item in iter_external_links({"artifacts": artifacts, "result": result}): + url = item["url"] + if url in seen_urls: + continue + seen_urls.add(url) + unique_links.append(item) + + artifacts_snapshot_path = archive_root / "artifacts.json" + result_snapshot_path = archive_root / "result.json" + links_snapshot_path = archive_root / "external-links.json" + job_snapshot_path = archive_root / "job.json" + manifest_path = archive_root / "archive-manifest.json" + write_json_snapshot( + job_snapshot_path, + { + "id": row["id"], + "user_id": row.get("user_id", ""), + "project_id": row.get("project_id", ""), + "source_type": row.get("source_type", ""), + "line_type": row.get("line_type", ""), + "workflow_key": row.get("workflow_key", ""), + "title": row.get("title", ""), + "status": row.get("status", ""), + "updated_at": row.get("updated_at", ""), + }, + ) + write_json_snapshot(artifacts_snapshot_path, artifacts) + write_json_snapshot(result_snapshot_path, result) + write_json_snapshot(links_snapshot_path, unique_links) + + generated_at = utc_now() + manifest_payload = { + "job_id": row["id"], + "user_id": row.get("user_id", ""), + "project_id": row.get("project_id", ""), + "status": row.get("status", ""), + "line_type": row.get("line_type", ""), + "workflow_key": row.get("workflow_key", ""), + "title": row.get("title", ""), + "download_root": str(archive_root), + "items": items, + "external_links": unique_links, + "generated_at": generated_at, + } + write_json_snapshot(manifest_path, manifest_payload) + + manifest_relative_path = download_relative_path(manifest_path) + return { + "mode": storage_mode(archive_root), + "download_dir": str(archive_root), + "download_root_relative_path": download_relative_path(archive_root), + "job_snapshot_path": str(job_snapshot_path), + "job_snapshot_url": download_content_url_for_path(job_snapshot_path), + "manifest_path": str(manifest_path), + "manifest_relative_path": manifest_relative_path, + "manifest_url": download_content_url(manifest_relative_path), + "artifacts_snapshot_path": str(artifacts_snapshot_path), + "artifacts_snapshot_url": download_content_url_for_path(artifacts_snapshot_path), + "result_snapshot_path": str(result_snapshot_path), + "result_snapshot_url": download_content_url_for_path(result_snapshot_path), + "external_links_path": str(links_snapshot_path), + "external_links_url": download_content_url_for_path(links_snapshot_path), + "item_count": len(items), + "items": items[:40], + "external_link_count": len(unique_links), + "external_links_preview": unique_links[:40], + "generated_at": generated_at, + } + + +def best_effort_job_download_archive( + row: dict[str, Any], + *, + artifacts: dict[str, Any], + result: dict[str, Any], +) -> dict[str, Any]: + try: + return materialize_job_download_archive(row, artifacts=artifacts, result=result) + except Exception as exc: + return { + "mode": storage_mode(download_job_root(row.get("user_id", ""), row.get("project_id", ""), row["id"])), + "error": str(exc)[:500], + "generated_at": utc_now(), + } + + +def storage_mode(path: Path) -> str: + text = str(path) + if text.startswith("/Users/kris/mnt/fnos-share"): + return "nas" + return "local" + + +def disk_usage_payload(path: Path) -> dict[str, Any]: + probe = path if path.exists() else path.parent + try: + total, used, free = shutil.disk_usage(probe) + return { + "path": str(path), + "mode": storage_mode(path), + "total_bytes": total, + "used_bytes": used, + "free_bytes": free, + } + except Exception as exc: + return { + "path": str(path), + "mode": storage_mode(path), + "error": str(exc), + } + + +def directory_usage_payload(path: Path) -> dict[str, Any]: + if not path.exists(): + return { + "path": str(path), + "mode": storage_mode(path), + "exists": False, + "file_count": 0, + "dir_count": 0, + "bytes": 0, + } + total_bytes = 0 + file_count = 0 + dir_count = 0 + for root, dirnames, filenames in os.walk(path): + dir_count += len(dirnames) + for filename in filenames: + file_count += 1 + try: + total_bytes += (Path(root) / filename).stat().st_size + except OSError: + continue + return { + "path": str(path), + "mode": storage_mode(path), + "exists": True, + "file_count": file_count, + "dir_count": dir_count, + "bytes": total_bytes, + } + + +def recent_job_storage_examples(account_id: str, project_id: str | None, limit: int = 5) -> list[dict[str, Any]]: + clauses = ["user_id = ?"] + params: list[Any] = [account_id] + normalized_project_id = str(project_id or "").strip() + if normalized_project_id: + clauses.append("project_id = ?") + params.append(normalized_project_id) + rows = db.fetch_all( + f""" + SELECT id, title, status, updated_at, artifacts_json + FROM jobs + WHERE {" AND ".join(clauses)} + ORDER BY updated_at DESC + LIMIT ? + """, + tuple(params + [limit]), + ) + items: list[dict[str, Any]] = [] + for row in rows: + artifacts = parse_json_object(row.get("artifacts_json") or "{}") + paths = [] + for key in ( + "uploaded_path", + "source_path", + "audio_path", + "transcript_path", + "project_job_dir", + "download_bundle_dir", + ): + value = str(artifacts.get(key) or "").strip() + if value: + paths.append({"key": key, "path": value}) + archive_payload = artifacts.get("download_archive") if isinstance(artifacts.get("download_archive"), dict) else {} + for key in ( + "download_dir", + "job_snapshot_path", + "manifest_path", + "artifacts_snapshot_path", + "result_snapshot_path", + "external_links_path", + ): + value = str(archive_payload.get(key) or "").strip() + if value: + paths.append({"key": f"archive.{key}", "path": value}) + items.append( + { + "job_id": row["id"], + "title": row.get("title", "") or row["id"], + "status": row.get("status", ""), + "updated_at": row.get("updated_at", ""), + "paths": paths, + } + ) + return items + + +def encode_storage_artifact_id(kind: str, relative_path: str) -> str: + raw = f"{kind}:{relative_path}" + return base64.urlsafe_b64encode(raw.encode("utf-8")).decode("ascii").rstrip("=") + + +def decode_storage_artifact_id(file_id: str) -> tuple[str, str]: + padding = "=" * (-len(file_id) % 4) + try: + raw = base64.urlsafe_b64decode((file_id + padding).encode("ascii")).decode("utf-8") + except Exception as exc: + raise HTTPException(status_code=400, detail=f"Invalid storage artifact id: {exc}") + kind, _, relative_path = raw.partition(":") + normalized_kind = kind.strip() + normalized_relative = relative_path.strip().lstrip("/") + if normalized_kind not in {"downloads", "jobs"} or not normalized_relative: + raise HTTPException(status_code=400, detail="Invalid storage artifact token") + return normalized_kind, normalized_relative + + +def _storage_base_dir(kind: str) -> Path: + return DOWNLOADS_DIR if kind == "downloads" else JOBS_DIR + + +def _storage_scope_root(kind: str, account_id: str, project_id: str | None) -> Path: + if kind == "downloads": + return tenant_download_root(account_id, project_id) if project_id else download_account_root(account_id) + return job_project_root(account_id, project_id) if project_id else job_account_root(account_id) + + +def build_storage_artifact_payload(path: Path, *, base_dir: Path, kind: str) -> dict[str, Any]: + stat = path.stat() + relative_path = str(path.relative_to(base_dir)) + return { + "id": encode_storage_artifact_id(kind, relative_path), + "kind": kind, + "name": path.name, + "relative_path": relative_path, + "path": str(path), + "size_bytes": stat.st_size, + "updated_at": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(), + "mime_type": mimetypes.guess_type(path.name)[0] or "application/octet-stream", + "content_url": f"/v2/storage/artifacts/{encode_storage_artifact_id(kind, relative_path)}/content", + } + + +def recent_storage_artifacts(kind: str, account_id: str, project_id: str | None, limit: int = 8) -> list[dict[str, Any]]: + scope_root = _storage_scope_root(kind, account_id, project_id) + base_dir = _storage_base_dir(kind) + if not scope_root.exists(): + return [] + items: list[dict[str, Any]] = [] + for file_path in scope_root.rglob("*"): + if not file_path.is_file(): + continue + try: + items.append(build_storage_artifact_payload(file_path, base_dir=base_dir, kind=kind)) + except OSError: + continue + items.sort(key=lambda item: str(item.get("updated_at") or ""), reverse=True) + return items[:limit] + + +def resolve_owned_storage_artifact(file_id: str, account_id: str, project_id: str | None) -> dict[str, Any]: + kind, relative_path = decode_storage_artifact_id(file_id) + base_dir = _storage_base_dir(kind) + allowed_root = _storage_scope_root(kind, account_id, project_id).resolve() + target_path = (base_dir / relative_path).resolve() + try: + target_path.relative_to(allowed_root) + except ValueError: + raise HTTPException(status_code=404, detail="Storage artifact not found") + if not target_path.exists() or not target_path.is_file(): + raise HTTPException(status_code=404, detail="Storage artifact not found") + return build_storage_artifact_payload(target_path, base_dir=base_dir, kind=kind) + + +def _write_download_text(path: Path, content: str) -> bool: + if not str(content or "").strip(): + return False + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(str(content), encoding="utf-8") + return True + + +def _write_download_json(path: Path, payload: Any) -> bool: + if payload in (None, "", [], {}): + return False + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") + return True + + +def persist_download_bundle( + *, + account_id: str, + project_id: str | None, + job_id: str, + title: str, + source_type: str, + source_url: str, + transcript_text: str, + style_summary: str, + combined_text: str, + content_blueprint: dict[str, Any], +) -> dict[str, Any]: + bundle_root = download_job_root(account_id, project_id, job_id) + bundle_root.mkdir(parents=True, exist_ok=True) + + manifest = { + "job_id": job_id, + "title": title, + "source_type": source_type, + "source_url": source_url, + "generated_at": utc_now(), + } + files: list[dict[str, Any]] = [] + writers = [ + (bundle_root / "manifest.json", lambda: _write_download_json(bundle_root / "manifest.json", manifest)), + (bundle_root / "transcript.txt", lambda: _write_download_text(bundle_root / "transcript.txt", transcript_text)), + (bundle_root / "style_summary.md", lambda: _write_download_text(bundle_root / "style_summary.md", style_summary)), + (bundle_root / "combined.md", lambda: _write_download_text(bundle_root / "combined.md", combined_text)), + (bundle_root / "rewrite.md", lambda: _write_download_text(bundle_root / "rewrite.md", str((content_blueprint.get("rewrite") or {}).get("script") or ""))), + (bundle_root / "analysis.json", lambda: _write_download_json(bundle_root / "analysis.json", content_blueprint.get("analysis") or {})), + (bundle_root / "storyboards.json", lambda: _write_download_json(bundle_root / "storyboards.json", content_blueprint.get("storyboards") or [])), + ] + for path, writer in writers: + if writer(): + files.append(build_storage_artifact_payload(path, base_dir=DOWNLOADS_DIR, kind="downloads")) + + return { + "download_bundle_dir": str(bundle_root), + "download_artifacts": files, + } + + +def live_recorder_remote_name(source_id: str) -> str: + return f"{LIVE_RECORDER_MANAGED_PREFIX}{source_id.replace('-', '')[:12]}" + + +def parse_live_recorder_import_text(raw_text: str) -> list[dict[str, Any]]: + entries: list[dict[str, Any]] = [] + for raw_line in str(raw_text or "").replace("\r\n", "\n").replace("\r", "\n").split("\n"): + line = raw_line.strip() + if not line: + continue + enabled = not line.startswith("#") + if not enabled: + line = line.lstrip("#").strip() + if not line: + continue + parts = [part.strip() for part in re.split(r"[,,]", line) if part.strip()] + quality = "原画" + source_url = "" + title = "" + if len(parts) == 1: + source_url = parts[0] + elif len(parts) == 2: + if parts[0] in LIVE_RECORDER_QUALITY_RANKS: + quality = parts[0] + source_url = parts[1] + else: + source_url = parts[0] + title = parts[1] + else: + if parts[0] in LIVE_RECORDER_QUALITY_RANKS: + quality = parts[0] + source_url = parts[1] + title = " ".join(parts[2:]) + else: + source_url = parts[0] + title = " ".join(parts[1:]) + source_url = source_url.strip() + if not source_url: + continue + entries.append( + { + "source_url": source_url, + "quality": normalize_live_recorder_quality(quality), + "title": title.strip(), + "enabled": enabled, + } + ) + deduped: list[dict[str, Any]] = [] + seen: set[str] = set() + for entry in entries: + key = entry["source_url"] + if key in seen: + continue + seen.add(key) + deduped.append(entry) + return deduped + + +def live_recorder_binding_payload(row: dict[str, Any], *, active_recordings: list[dict[str, Any]] | None = None) -> dict[str, Any]: + metadata = parse_json_object(row.get("source_metadata_json") or "{}") + items = active_recordings or [] + return { + "id": row["binding_id"], + "user_id": row["user_id"], + "project_id": row.get("project_id", "") or "", + "assistant_id": row.get("assistant_id", "") or "", + "source_id": row["source_id"], + "platform": row.get("platform", "") or "", + "source_url": row.get("source_url", "") or "", + "remote_name": row.get("remote_name", "") or "", + "title": row.get("binding_title") or row.get("source_title") or "", + "quality": normalize_live_recorder_quality(row.get("quality")), + "enabled": bool(row.get("enabled", 1)), + "metadata": metadata, + "active_recordings": items, + "recording_count": len(items), + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def live_recorder_binding_rows(user_id: str, project_id: str = "") -> list[dict[str, Any]]: + clauses = ["b.user_id = ?"] + params: list[Any] = [user_id] + if project_id: + clauses.append("b.project_id = ?") + params.append(project_id) + sql = f""" + SELECT + b.id AS binding_id, + b.user_id, + b.project_id, + b.assistant_id, + b.source_id, + b.title AS binding_title, + b.quality, + b.enabled, + b.created_at, + b.updated_at, + s.platform, + s.source_url, + s.remote_name, + s.title AS source_title, + s.metadata_json AS source_metadata_json + FROM live_recorder_bindings b + JOIN live_recorder_sources s ON s.id = b.source_id + WHERE {" AND ".join(clauses)} + ORDER BY b.updated_at DESC, b.created_at DESC + """ + return db.fetch_all(sql, tuple(params)) + + +def load_owned_live_recorder_binding(binding_id: str, user_id: str) -> dict[str, Any]: + row = db.fetch_one( + """ + SELECT + b.id AS binding_id, + b.user_id, + b.project_id, + b.assistant_id, + b.source_id, + b.title AS binding_title, + b.quality, + b.enabled, + b.created_at, + b.updated_at, + s.platform, + s.source_url, + s.remote_name, + s.title AS source_title, + s.metadata_json AS source_metadata_json + FROM live_recorder_bindings b + JOIN live_recorder_sources s ON s.id = b.source_id + WHERE b.id = ? AND b.user_id = ? + """, + (binding_id, user_id), + ) + if not row: + raise HTTPException(status_code=404, detail="Live recorder source not found") + return row + + +def get_or_create_live_recorder_source(*, platform: str, source_url: str, title: str = "") -> dict[str, Any]: + normalized_platform = ensure_domestic_platform(platform or infer_platform_from_url(source_url), allow_blank=False) + normalized_url = source_url.strip() + existing = db.fetch_one( + "SELECT * FROM live_recorder_sources WHERE platform = ? AND source_url = ?", + (normalized_platform, normalized_url), + ) + timestamp = utc_now() + if existing: + if title.strip() and title.strip() != (existing.get("title") or ""): + db.execute( + "UPDATE live_recorder_sources SET title = ?, updated_at = ? WHERE id = ?", + (title.strip(), timestamp, existing["id"]), + ) + existing = db.fetch_one("SELECT * FROM live_recorder_sources WHERE id = ?", (existing["id"],)) + return existing + source_id = make_id("lrsrc") + remote_name = live_recorder_remote_name(source_id) + db.execute( + """ + INSERT INTO live_recorder_sources (id, platform, source_url, remote_name, title, metadata_json, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + source_id, + normalized_platform, + normalized_url, + remote_name, + title.strip(), + json.dumps({}, ensure_ascii=False), + timestamp, + timestamp, + ), + ) + return db.fetch_one("SELECT * FROM live_recorder_sources WHERE id = ?", (source_id,)) + + +def upsert_live_recorder_binding( + *, + user_id: str, + project_id: str, + assistant_id: str = "", + platform: str, + source_url: str, + title: str = "", + quality: str = "原画", + enabled: bool = True, +) -> dict[str, Any]: + source_row = get_or_create_live_recorder_source(platform=platform, source_url=source_url, title=title) + normalized_project_id = project_id.strip() or None + normalized_assistant_id = assistant_id.strip() or None + existing = db.fetch_one( + "SELECT * FROM live_recorder_bindings WHERE user_id = ? AND source_id = ?", + (user_id, source_row["id"]), + ) + timestamp = utc_now() + normalized_quality = normalize_live_recorder_quality(quality) + if existing: + db.execute( + """ + UPDATE live_recorder_bindings + SET project_id = ?, assistant_id = ?, title = ?, quality = ?, enabled = ?, updated_at = ? + WHERE id = ? + """, + ( + normalized_project_id if normalized_project_id is not None else existing.get("project_id"), + normalized_assistant_id, + title.strip(), + normalized_quality, + 1 if enabled else 0, + timestamp, + existing["id"], + ), + ) + return load_owned_live_recorder_binding(existing["id"], user_id) + binding_id = make_id("lrbind") + db.execute( + """ + INSERT INTO live_recorder_bindings ( + id, user_id, project_id, assistant_id, source_id, title, quality, enabled, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + binding_id, + user_id, + normalized_project_id, + normalized_assistant_id, + source_row["id"], + title.strip(), + normalized_quality, + 1 if enabled else 0, + timestamp, + timestamp, + ), + ) + return load_owned_live_recorder_binding(binding_id, user_id) + + +def live_recorder_source_groups() -> dict[str, dict[str, Any]]: + rows = db.fetch_all( + """ + SELECT + s.id AS source_id, + s.platform, + s.source_url, + s.remote_name, + s.title AS source_title, + b.id AS binding_id, + b.user_id, + b.project_id, + b.assistant_id, + b.title AS binding_title, + b.quality, + b.enabled + FROM live_recorder_sources s + LEFT JOIN live_recorder_bindings b ON b.source_id = s.id + ORDER BY s.created_at ASC, b.created_at ASC + """ + ) + grouped: dict[str, dict[str, Any]] = {} + for row in rows: + source_id = row["source_id"] + target = grouped.setdefault( + source_id, + { + "source_id": source_id, + "platform": row.get("platform", ""), + "source_url": row.get("source_url", ""), + "remote_name": row.get("remote_name", ""), + "source_title": row.get("source_title", ""), + "bindings": [], + }, + ) + if row.get("binding_id"): + target["bindings"].append( + { + "binding_id": row["binding_id"], + "user_id": row["user_id"], + "project_id": row.get("project_id", "") or "", + "assistant_id": row.get("assistant_id", "") or "", + "binding_title": row.get("binding_title", "") or "", + "quality": normalize_live_recorder_quality(row.get("quality")), + "enabled": bool(row.get("enabled", 1)), + } + ) + return grouped + + +def build_live_recorder_managed_raw() -> tuple[str, dict[str, Any]]: + existing = live_recorder_request("GET", "/api/url-config", timeout=15.0) + preserved_lines: list[str] = [] + for line in existing.get("lines") or []: + raw = str(line.get("raw") or "") + name = str(line.get("name") or "").strip() + if line.get("type") == "url" and name.startswith(LIVE_RECORDER_MANAGED_PREFIX): + continue + preserved_lines.append(raw) + + generated_lines: list[str] = [] + active_source_count = 0 + active_binding_count = 0 + for source in live_recorder_source_groups().values(): + active_bindings = [binding for binding in source["bindings"] if binding.get("enabled")] + if not active_bindings: + continue + quality = max( + (binding.get("quality") or "原画" for binding in active_bindings), + key=lambda item: LIVE_RECORDER_QUALITY_RANKS.get(item, 0), + ) + generated_lines.append(f"{quality},{source['source_url']},{source['remote_name']}") + active_source_count += 1 + active_binding_count += len(active_bindings) + + final_lines = [line for line in preserved_lines if str(line).strip()] + final_lines.extend(generated_lines) + raw = "\n".join(final_lines).strip() + if raw: + raw += "\n" + stats = { + "preserved_count": len([line for line in preserved_lines if str(line).strip()]), + "managed_source_count": active_source_count, + "managed_binding_count": active_binding_count, + "generated_count": len(generated_lines), + } + return raw, stats + + +def sync_live_recorder_remote_config() -> dict[str, Any]: + raw, stats = build_live_recorder_managed_raw() + payload = live_recorder_request("POST", "/api/url-config", {"raw": raw}, timeout=20.0) + return { + "ok": True, + "stats": stats, + "remote": payload, + } + + +def live_recorder_runtime_payload() -> dict[str, Any]: + payload = live_recorder_request("GET", "/api/status-lite", timeout=8.0) + return payload if isinstance(payload, dict) else {"recordings": []} + + +def live_recorder_recordings_payload() -> dict[str, Any]: + payload = live_recorder_request("GET", "/api/recordings", timeout=12.0) + return payload if isinstance(payload, dict) else {"recordings": []} + + +def live_recorder_downloads_payload() -> dict[str, Any]: + payload = live_recorder_request("GET", "/api/downloads", timeout=20.0) + return payload if isinstance(payload, dict) else {"files": []} + + +def owned_live_recorder_sources(user_id: str, project_id: str = "") -> tuple[list[dict[str, Any]], dict[str, dict[str, Any]]]: + rows = live_recorder_binding_rows(user_id, project_id) + mapping = {row["remote_name"]: row for row in rows} + return rows, mapping + + +def filter_owned_live_recorder_recordings(user_id: str, project_id: str = "") -> list[dict[str, Any]]: + rows, mapping = owned_live_recorder_sources(user_id, project_id) + if not rows: + return [] + status_payload = live_recorder_recordings_payload() + items = [] + for recording in status_payload.get("recordings") or []: + haystack = " ".join( + [ + str(recording.get("anchor_name") or ""), + str(recording.get("record_name") or ""), + str(recording.get("save_dir") or ""), + str(recording.get("save_file") or ""), + ] + ) + matched = None + for remote_name, row in mapping.items(): + if remote_name and remote_name in haystack: + matched = row + break + if str(recording.get("record_url") or "").strip() == str(row.get("source_url") or "").strip(): + matched = row + break + if not matched: + continue + payload = dict(recording) + payload["source_id"] = matched["source_id"] + payload["binding_id"] = matched["binding_id"] + payload["project_id"] = matched.get("project_id", "") or "" + payload["title"] = matched.get("binding_title") or matched.get("source_title") or matched.get("remote_name") or "" + items.append(payload) + return items + + +def encode_live_recorder_file_id(relative_path: str) -> str: + return base64.urlsafe_b64encode(relative_path.encode("utf-8")).decode("ascii").rstrip("=") + + +def decode_live_recorder_file_id(file_id: str) -> str: + padding = "=" * (-len(file_id) % 4) + try: + return base64.urlsafe_b64decode((file_id + padding).encode("ascii")).decode("utf-8") + except Exception as exc: + raise HTTPException(status_code=400, detail=f"Invalid live recorder file id: {exc}") + + +def filter_owned_live_recorder_files(user_id: str, project_id: str = "", limit: int = 200) -> list[dict[str, Any]]: + rows, mapping = owned_live_recorder_sources(user_id, project_id) + if not rows: + return [] + downloads_payload = live_recorder_downloads_payload() + files = [] + for item in downloads_payload.get("files") or []: + relative_path = str(item.get("relative_path") or "") + name = str(item.get("name") or "") + haystack = f"{relative_path} {name}" + matched = None + for remote_name, row in mapping.items(): + if remote_name and remote_name in haystack: + matched = row + break + if not matched: + continue + payload = dict(item) + payload["id"] = encode_live_recorder_file_id(relative_path) + payload["source_id"] = matched["source_id"] + payload["binding_id"] = matched["binding_id"] + payload["project_id"] = matched.get("project_id", "") or "" + payload["platform"] = matched.get("platform", "") or "" + payload["title"] = matched.get("binding_title") or matched.get("source_title") or matched.get("remote_name") or "" + payload["content_url"] = f"/v2/live-recorder/files/{payload['id']}/content" + files.append(payload) + if len(files) >= limit: + break + return files + + +def job_event_payload(row: dict[str, Any]) -> dict[str, Any]: + return { + "id": row["id"], + "job_id": row["job_id"], + "event_type": row["event_type"], + "payload": parse_json_object(row.get("payload_json") or "{}"), + "created_at": row["created_at"], + } + + +def ensure_default_project(account_id: str, username: str = "默认用户") -> dict[str, Any]: + project = db.fetch_one( + "SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC LIMIT 1", + (account_id,), + ) + if project: + return project + now = utc_now() + project_id = make_id("project") + db.execute( + """ + INSERT INTO projects (id, user_id, name, description, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?) + """, + ( + project_id, + account_id, + f"{username} 默认项目", + "系统自动创建", + now, + now, + ), + ) + return db.fetch_one("SELECT * FROM projects WHERE id = ?", (project_id,)) + + +def resolve_target_project(account_id: str, requested_project_id: str | None, username: str = "默认用户") -> dict[str, Any]: + if requested_project_id: + project = db.fetch_one( + "SELECT * FROM projects WHERE id = ? AND user_id = ?", + (requested_project_id, account_id), + ) + if project: + return project + raise HTTPException(status_code=404, detail="Project not found") + return ensure_default_project(account_id, username=username) + + +def resolve_target_assistant(account_id: str, requested_assistant_id: str | None, project_id: str = "") -> dict[str, Any] | None: + if not requested_assistant_id: + return None + assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (requested_assistant_id, account_id)) + if not assistant: + raise HTTPException(status_code=404, detail="Assistant not found") + if project_id and assistant.get("project_id") and assistant.get("project_id") != project_id: + raise HTTPException(status_code=400, detail="Assistant does not belong to target project") + return assistant + + +def append_job_event(job_id: str, event_type: str, payload: dict[str, Any] | None = None) -> None: + db.execute( + """ + INSERT INTO job_events (id, job_id, event_type, payload_json, created_at) + VALUES (?, ?, ?, ?, ?) + """, + ( + make_id("evt"), + job_id, + event_type, + json.dumps(payload or {}, ensure_ascii=False), + utc_now(), + ), + ) + + +def parse_json_object(raw_text: str) -> dict[str, Any]: + cleaned = raw_text.strip() + if not cleaned: + return {} + try: + data = json.loads(cleaned) + return data if isinstance(data, dict) else {} + except json.JSONDecodeError: + match = re.search(r"\{.*\}", cleaned, re.S) + if not match: + return {} + try: + data = json.loads(match.group(0)) + return data if isinstance(data, dict) else {} + except json.JSONDecodeError: + return {} + + +def knowledge_base_payload(row: dict[str, Any]) -> dict[str, Any]: + document_count = db.fetch_one( + "SELECT COUNT(*) AS count FROM knowledge_documents WHERE knowledge_base_id = ?", + (row["id"],), + )["count"] + linked_count = db.fetch_one( + "SELECT COUNT(*) AS count FROM assistant_knowledge_bases WHERE knowledge_base_id = ?", + (row["id"],), + )["count"] + return { + "id": row["id"], + "user_id": row["user_id"], + "project_id": row.get("project_id", ""), + "name": row["name"], + "description": row.get("description", ""), + "sync_status": row.get("sync_status", "ready"), + "document_count": document_count, + "linked_assistant_count": linked_count, + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def assistant_payload(row: dict[str, Any]) -> dict[str, Any]: + kb_rows = db.fetch_all( + "SELECT knowledge_base_id FROM assistant_knowledge_bases WHERE assistant_id = ? ORDER BY knowledge_base_id ASC", + (row["id"],), + ) + return { + "id": row["id"], + "user_id": row["user_id"], + "project_id": row.get("project_id", ""), + "name": row["name"], + "description": row.get("description", ""), + "system_prompt": row.get("system_prompt", ""), + "generation_goal": row.get("generation_goal", ""), + "knowledge_base_ids": [item["knowledge_base_id"] for item in kb_rows], + "config": parse_json_object(row.get("config_json") or "{}"), + "model_profile_id": row.get("model_profile_id", ""), + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def review_payload(row: dict[str, Any]) -> dict[str, Any]: + metrics = parse_json_object(row.get("metrics_json") or "{}") + source_job = None + assistant = None + if row.get("source_job_id"): + source_job_row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (row["source_job_id"],)) + if source_job_row: + source_job = job_payload(source_job_row) + if row.get("assistant_id"): + assistant_row = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) + if assistant_row: + assistant = assistant_payload(assistant_row) + return { + "id": row["id"], + "user_id": row["user_id"], + "project_id": row.get("project_id", ""), + "source_job_id": row.get("source_job_id", ""), + "assistant_id": row.get("assistant_id", ""), + "title": row.get("title", ""), + "platform": row.get("platform", "douyin"), + "content_type": row.get("content_type", "video"), + "publish_url": row.get("publish_url", ""), + "published_at": row.get("published_at", ""), + "metrics": metrics, + "verdict": row.get("verdict", ""), + "highlights": row.get("highlights", ""), + "next_actions": row.get("next_actions", ""), + "notes": row.get("notes", ""), + "source_job": source_job, + "assistant": assistant, + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def document_payload(row: dict[str, Any]) -> dict[str, Any]: + analysis_map = parse_json_object(row.get("analysis_json") or "{}") + source_artifacts = parse_json_object(row.get("source_artifact_json") or "{}") + storyboard_raw = row.get("storyboard_json") or "[]" + try: + storyboard_items = json.loads(storyboard_raw) + except json.JSONDecodeError: + storyboard_items = [] + return { + "id": row["id"], + "knowledge_base_id": row["knowledge_base_id"], + "title": row["title"], + "source_type": row["source_type"], + "source_url": row.get("source_url", ""), + "transcript_text": row.get("transcript_text", ""), + "style_summary": row.get("style_summary", ""), + "combined_text": row.get("combined_text", ""), + "analysis": analysis_map, + "storyboards": storyboard_items, + "source_artifacts": source_artifacts, + "analysis_model_profile_id": row.get("analysis_model_profile_id", ""), + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def job_payload(row: dict[str, Any]) -> dict[str, Any]: + artifacts = row.get("artifacts_json") or "{}" + result = row.get("result_json") or "{}" + try: + artifacts_map = json.loads(artifacts) + except json.JSONDecodeError: + artifacts_map = {} + try: + result_map = json.loads(result) + except json.JSONDecodeError: + result_map = {} + return { + "id": row["id"], + "user_id": row["user_id"], + "project_id": row.get("project_id", ""), + "parent_job_id": row.get("parent_job_id", ""), + "assistant_id": row.get("assistant_id"), + "knowledge_base_id": row["knowledge_base_id"], + "content_source_id": row.get("content_source_id", ""), + "source_type": row["source_type"], + "line_type": row.get("line_type", "analysis"), + "workflow_key": row.get("workflow_key", ""), + "orchestrator": row.get("orchestrator", "n8n"), + "provider_name": row.get("provider_name", ""), + "provider_task_id": row.get("provider_task_id", ""), + "source_url": row.get("source_url"), + "title": row["title"], + "language": row.get("language", "auto"), + "status": row["status"], + "transcript_text": row.get("transcript_text", ""), + "style_summary": row.get("style_summary", ""), + "upload_status": row.get("upload_status", "pending"), + "error": row.get("error", ""), + "artifacts": artifacts_map, + "result": result_map, + "archive": artifacts_map.get("download_archive") if isinstance(artifacts_map.get("download_archive"), dict) else {}, + "analysis_model_profile_id": row.get("analysis_model_profile_id", ""), + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def require_auth(authorization: str | None = Header(default=None)) -> dict[str, Any]: + if not authorization or not authorization.startswith("Bearer "): + raise HTTPException(status_code=401, detail="Missing bearer token") + token = authorization.split(" ", 1)[1].strip() + token_row = db.fetch_one("SELECT * FROM auth_tokens WHERE token = ?", (token,)) + if not token_row: + raise HTTPException(status_code=401, detail="Invalid token") + account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (token_row["account_id"],)) + if not account: + raise HTTPException(status_code=401, detail="Account not found") + return account + + +def require_approved(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]: + if account["approval_status"] != "approved": + raise HTTPException(status_code=403, detail="Account pending approval") + return account + + +def require_super_admin(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]: + if account["role"] != "super_admin": + raise HTTPException(status_code=403, detail="Super admin required") + return account + + +def require_orchestrator(x_orchestrator_secret: str | None = Header(default=None)) -> bool: + if ORCHESTRATOR_SHARED_SECRET and x_orchestrator_secret != ORCHESTRATOR_SHARED_SECRET: + raise HTTPException(status_code=401, detail="Invalid orchestrator secret") + return True + + +def create_content_source( + *, + account_id: str, + project_id: str, + source_kind: str, + platform: str = "", + handle: str = "", + source_url: str = "", + title: str = "", + local_path: str = "", + metadata: dict[str, Any] | None = None, +) -> dict[str, Any]: + source_id = make_id("source") + now = utc_now() + db.execute( + """ + INSERT INTO content_sources ( + id, user_id, project_id, source_kind, platform, handle, + source_url, title, local_path, metadata_json, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + source_id, + account_id, + project_id, + source_kind, + platform, + handle, + source_url, + title, + local_path, + json.dumps(metadata or {}, ensure_ascii=False), + now, + now, + ), + ) + return db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,)) + + +def merge_json_field(current_raw: str | None, updates: dict[str, Any]) -> str: + current = parse_json_object(current_raw or "{}") + current.update(updates) + return json.dumps(current, ensure_ascii=False) + + +def update_content_source_metadata(source_id: str, updates: dict[str, Any]) -> dict[str, Any]: + row = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,)) + if not row: + raise HTTPException(status_code=404, detail="Content source not found") + db.execute( + "UPDATE content_sources SET metadata_json = ?, updated_at = ? WHERE id = ?", + (merge_json_field(row.get("metadata_json") or "{}", updates), utc_now(), source_id), + ) + return db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,)) + + +def update_job_state( + job_id: str, + *, + status: str, + error: str = "", + provider_name: str | None = None, + provider_task_id: str | None = None, + artifacts: dict[str, Any] | None = None, + result: dict[str, Any] | None = None, +) -> dict[str, Any]: + row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) + if not row: + raise HTTPException(status_code=404, detail="Job not found") + merged_artifacts_map = parse_json_object(row.get("artifacts_json") or "{}") + merged_artifacts_map.update(artifacts or {}) + merged_result_map = parse_json_object(row.get("result_json") or "{}") + merged_result_map.update(result or {}) + archive_payload = best_effort_job_download_archive( + {**row, "status": status}, + artifacts=merged_artifacts_map, + result=merged_result_map, + ) + merged_artifacts_map["download_archive"] = archive_payload + if archive_payload.get("download_dir") and not str(merged_artifacts_map.get("download_bundle_dir") or "").strip(): + merged_artifacts_map["download_bundle_dir"] = archive_payload["download_dir"] + if archive_payload.get("items") and not isinstance(merged_artifacts_map.get("download_artifacts"), list): + merged_artifacts_map["download_artifacts"] = archive_payload["items"] + merged_artifacts = json.dumps(merged_artifacts_map, ensure_ascii=False) + merged_result = json.dumps(merged_result_map, ensure_ascii=False) + db.execute( + """ + UPDATE jobs + SET status = ?, error = ?, provider_name = ?, provider_task_id = ?, + artifacts_json = ?, result_json = ?, updated_at = ? + WHERE id = ? + """, + ( + status, + error, + provider_name if provider_name is not None else row.get("provider_name", ""), + provider_task_id if provider_task_id is not None else row.get("provider_task_id", ""), + merged_artifacts, + merged_result, + utc_now(), + job_id, + ), + ) + append_job_event( + job_id, + f"job.{status}", + { + "provider_name": provider_name if provider_name is not None else row.get("provider_name", ""), + "provider_task_id": provider_task_id if provider_task_id is not None else row.get("provider_task_id", ""), + "error": error, + "artifacts": artifacts or {}, + "result": result or {}, + }, + ) + return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) + + +def job_context_payload(row: dict[str, Any]) -> dict[str, Any]: + payload = job_payload(row) + payload["parent_job"] = None + payload["child_jobs"] = [] + payload["project"] = None + payload["assistant"] = None + payload["knowledge_base"] = None + payload["content_source"] = None + payload["events"] = [] + + if row.get("project_id"): + project = db.fetch_one("SELECT * FROM projects WHERE id = ?", (row["project_id"],)) + if project: + payload["project"] = project_payload(project) + + if row.get("assistant_id"): + assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) + if assistant: + payload["assistant"] = assistant_payload(assistant) + + kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (row["knowledge_base_id"],)) + if kb: + payload["knowledge_base"] = knowledge_base_payload(kb) + + if row.get("content_source_id"): + source = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["content_source_id"],)) + if source: + payload["content_source"] = content_source_payload(source) + + if row.get("parent_job_id"): + parent = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (row["parent_job_id"],)) + if parent: + payload["parent_job"] = job_payload(parent) + + payload["child_jobs"] = [ + job_payload(item) + for item in db.fetch_all("SELECT * FROM jobs WHERE parent_job_id = ? ORDER BY created_at ASC", (row["id"],)) + ] + + payload["events"] = [ + job_event_payload(item) + for item in db.fetch_all("SELECT * FROM job_events WHERE job_id = ? ORDER BY created_at ASC", (row["id"],)) + ] + + return payload + + +async def run_local_orchestrated_job(job_id: str, workflow_key: str) -> None: + try: + if workflow_key == "analysis_pipeline": + await internal_run_analysis(None, job_id, True) + return + if workflow_key == "content_source_sync_pipeline": + await internal_content_source_sync(None, job_id, True) + return + if workflow_key == "real_cut_pipeline": + await internal_real_cut_run(None, job_id, True) + return + if workflow_key == "ai_video_pipeline": + await internal_ai_video_render(None, job_id, True) + return + raise HTTPException(status_code=400, detail=f"Unsupported local workflow fallback: {workflow_key}") + except HTTPException as exc: + row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) + if row: + update_job_state( + job_id, + status="failed", + provider_name="collector-local", + provider_task_id="", + error=str(exc.detail), + result=merge_json_field(row.get("result_json") or "{}", {"local_orchestrator": {"error": str(exc.detail)}}), + ) + append_job_event(job_id, "workflow.local.failed", {"workflow_key": workflow_key, "error": str(exc.detail)}) + except Exception as exc: + row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) + if row: + update_job_state( + job_id, + status="failed", + provider_name="collector-local", + provider_task_id="", + error=f"Local orchestrator failed: {exc}", + result=merge_json_field(row.get("result_json") or "{}", {"local_orchestrator": {"error": str(exc)}}), + ) + append_job_event(job_id, "workflow.local.failed", {"workflow_key": workflow_key, "error": str(exc)}) + + +async def trigger_orchestrated_job(job_row: dict[str, Any]) -> dict[str, Any]: + workflow_key = job_row.get("workflow_key") or "analysis_pipeline" + append_job_event(job_row["id"], "workflow.trigger.requested", {"workflow_key": workflow_key}) + update_job_state( + job_row["id"], + status="queued", + provider_name="n8n", + provider_task_id="", + result={"n8n_trigger": {"requested": True}}, + ) + payload = { + "jobId": job_row["id"], + "job_id": job_row["id"], + "workflowKey": workflow_key, + "workflow_key": workflow_key, + "lineType": job_row.get("line_type", "analysis"), + "line_type": job_row.get("line_type", "analysis"), + } + if not n8n_client.enabled: + append_job_event(job_row["id"], "workflow.trigger.fallback", {"workflow_key": workflow_key, "reason": "n8n is not configured"}) + asyncio.create_task(run_local_orchestrated_job(job_row["id"], workflow_key)) + db.execute( + """ + UPDATE jobs + SET provider_name = ?, provider_task_id = ?, result_json = ?, updated_at = ? + WHERE id = ? + """, + ( + "collector-local", + "", + merge_json_field( + db.fetch_one("SELECT result_json FROM jobs WHERE id = ?", (job_row["id"],)).get("result_json") or "{}", + {"n8n_trigger": {"requested": True, "fallback": "local", "reason": "n8n is not configured"}}, + ), + utc_now(), + job_row["id"], + ), + ) + return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["id"],)) + + try: + trigger_result = await n8n_client.trigger(workflow_key, payload) + except Exception as exc: + append_job_event(job_row["id"], "workflow.trigger.fallback", {"workflow_key": workflow_key, "reason": str(exc)}) + asyncio.create_task(run_local_orchestrated_job(job_row["id"], workflow_key)) + db.execute( + """ + UPDATE jobs + SET provider_name = ?, provider_task_id = ?, result_json = ?, updated_at = ? + WHERE id = ? + """, + ( + "collector-local", + "", + merge_json_field( + db.fetch_one("SELECT result_json FROM jobs WHERE id = ?", (job_row["id"],)).get("result_json") or "{}", + {"n8n_trigger": {"requested": True, "fallback": "local", "reason": str(exc)}}, + ), + utc_now(), + job_row["id"], + ), + ) + return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["id"],)) + provider_task_id = str(trigger_result.get("executionId") or "") + db.execute( + """ + UPDATE jobs + SET provider_name = ?, provider_task_id = ?, result_json = ?, updated_at = ? + WHERE id = ? + """, + ( + "n8n", + provider_task_id, + merge_json_field( + db.fetch_one("SELECT result_json FROM jobs WHERE id = ?", (job_row["id"],)).get("result_json") or "{}", + {"n8n_trigger": trigger_result}, + ), + utc_now(), + job_row["id"], + ), + ) + append_job_event( + job_row["id"], + "workflow.trigger.accepted", + {"provider_task_id": provider_task_id, "trigger_result": trigger_result}, + ) + return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["id"],)) + + +async def call_model(profile: dict[str, Any], system_prompt: str, user_prompt: str, temperature: float = 0.4) -> str: + try: + content = await openai_client.chat_completion( + base_url=profile["base_url"], + api_key=profile.get("api_key", ""), + model=profile["model_name"], + system_prompt=system_prompt, + user_prompt=user_prompt, + temperature=temperature, + ) + if content: + return content.strip() + except Exception: + pass + excerpt = user_prompt.strip().replace("\n", " ")[:220] + return f"风格摘要:内容以强结论开头,节奏偏短句,强调冲突转折和行动指令。素材摘要:{excerpt}" + + +async def summarize_style(profile: dict[str, Any], transcript_text: str, title: str) -> str: + prompt = ( + f"标题:{title}\n\n" + f"素材全文:\n{transcript_text}\n\n" + "请提炼这段素材的文案风格、结构节奏、开头钩子、情绪推进、收尾 CTA,并给出可复用的学习结论。" + ) + system_prompt = "你是短视频文案拆解师,输出简洁、结构化、适合沉淀进知识库。" + return await call_model(profile, system_prompt, prompt, temperature=0.3) + + +async def generate_content_blueprint( + profile: dict[str, Any], + *, + title: str, + transcript_text: str, + style_summary: str, + agent_prompt: str = "", + generation_goal: str = "", +) -> dict[str, Any]: + system_prompt = ( + "你是短视频内容策略师。" + "必须输出 JSON 对象,不要输出 Markdown,不要输出多余解释。" + ) + user_prompt = ( + f"标题:{title}\n\n" + f"素材转写:\n{transcript_text}\n\n" + f"风格拆解:\n{style_summary}\n\n" + f"智能体补充约束:\n{agent_prompt or '无'}\n\n" + f"生成目标:\n{generation_goal or '围绕原素材做二创短视频'}\n\n" + "请输出如下 JSON 结构:" + "{" + '"analysis":{"hook":"","structure":[],"style_tags":[],"cta":""},' + '"rewrite":{"title":"","script":"","summary":""},' + '"storyboards":[' + '{"shot_index":1,"title":"","narration":"","visual":"","first_frame_prompt":"","last_frame_prompt":"","video_prompt":"","duration_sec":5}' + "]" + "}" + ) + raw = await call_model(profile, system_prompt, user_prompt, temperature=0.5) + parsed = parse_json_object(raw) + if parsed.get("storyboards"): + return parsed + + fallback_storyboards: list[dict[str, Any]] = [] + paragraphs = [part.strip() for part in transcript_text.split("\n") if part.strip()] + seed_segments = paragraphs[:4] or [transcript_text[:1200]] + for idx, segment in enumerate(seed_segments, start=1): + snippet = segment[:180] + fallback_storyboards.append( + { + "shot_index": idx, + "title": f"镜头{idx}", + "narration": snippet, + "visual": f"围绕这段内容构建具象画面:{snippet}", + "first_frame_prompt": f"短视频首帧,突出主题:{snippet}", + "last_frame_prompt": f"短视频尾帧,强化结论和行动指令:{snippet}", + "video_prompt": f"基于首尾帧生成连贯镜头,内容是:{snippet}", + "duration_sec": 5, + } + ) + + return { + "analysis": { + "hook": title, + "structure": ["结论开场", "核心论点", "例证推进", "收尾行动"], + "style_tags": ["短句", "结论先行", "强 CTA"], + "cta": "引导用户采取下一步行动", + }, + "rewrite": { + "title": title, + "script": transcript_text[:3000], + "summary": style_summary[:500], + }, + "storyboards": fallback_storyboards, + } + + +def fallback_transcript_from_text(title: str, content: str) -> str: + return f"标题:{title}\n\n正文:\n{content.strip()}" + + +DOMESTIC_PLATFORMS = {"douyin", "xiaohongshu", "bilibili", "kuaishou", "wechat_video"} +PLATFORM_ALIASES = { + "抖音": "douyin", + "douyin": "douyin", + "小红书": "xiaohongshu", + "xiaohongshu": "xiaohongshu", + "哔哩哔哩": "bilibili", + "b站": "bilibili", + "bilibili": "bilibili", + "快手": "kuaishou", + "kuaishou": "kuaishou", + "微信视频号": "wechat_video", + "视频号": "wechat_video", + "wechat_video": "wechat_video", + "youtube": "youtube", +} +PLATFORM_LABELS = { + "douyin": "抖音", + "xiaohongshu": "小红书", + "bilibili": "哔哩哔哩", + "kuaishou": "快手", + "wechat_video": "微信视频号", +} + + +def normalize_platform_slug(value: str | None, *, allow_blank: bool = True) -> str: + normalized = str(value or "").strip().lower() + if not normalized: + return "" if allow_blank else "douyin" + normalized = PLATFORM_ALIASES.get(normalized, normalized) + return normalized + + +def ensure_domestic_platform(value: str | None, *, allow_blank: bool = True) -> str: + normalized = normalize_platform_slug(value, allow_blank=allow_blank) + if not normalized: + return "" + if normalized not in DOMESTIC_PLATFORMS: + raise HTTPException(status_code=400, detail=f"Unsupported platform for domestic build: {value}") + return normalized + + +def platform_label(platform: str | None) -> str: + normalized = normalize_platform_slug(platform, allow_blank=True) + return PLATFORM_LABELS.get(normalized, normalized or "抖音") + + +def infer_platform_from_url(source_url: str) -> str: + normalized = source_url.strip().lower() + if "bilibili.com" in normalized or "b23.tv" in normalized: + return "bilibili" + if "douyin.com" in normalized or "iesdouyin.com" in normalized: + return "douyin" + if "xiaohongshu.com" in normalized or "xhslink.com" in normalized: + return "xiaohongshu" + if ( + "kuaishou.com" in normalized + or "chenzhongtech.com" in normalized + or "v.kuaishou.com" in normalized + ): + return "kuaishou" + if "channels.weixin.qq.com" in normalized or "mp.weixin.qq.com/s" in normalized: + return "wechat_video" + return "" + + +def command_exists(name: str) -> bool: + return shutil.which(name) is not None + + +def run_command(command: list[str], cwd: Path | None = None, timeout: float | None = None) -> tuple[int, str, str]: + try: + proc = subprocess.run( + command, + cwd=str(cwd) if cwd else None, + capture_output=True, + text=True, + timeout=timeout, + ) + return proc.returncode, proc.stdout, proc.stderr + except subprocess.TimeoutExpired as exc: + stdout = exc.stdout if isinstance(exc.stdout, str) else (exc.stdout or b"").decode("utf-8", errors="ignore") + stderr = exc.stderr if isinstance(exc.stderr, str) else (exc.stderr or b"").decode("utf-8", errors="ignore") + detail = stderr or f"Command timed out after {timeout} seconds" + return 124, stdout, detail + + +def discover_account_video_links(source_url: str, max_items: int) -> tuple[list[dict[str, Any]], dict[str, Any]]: + if not command_exists(YTDLP_BIN): + raise HTTPException(status_code=503, detail="yt-dlp is not configured") + + discovery_cmd = [ + YTDLP_BIN, + "--flat-playlist", + "--playlist-end", + str(max_items), + "--print", + "%(webpage_url)s\t%(title)s\t%(id)s", + source_url, + ] + code, stdout, stderr = run_command(discovery_cmd, timeout=180) + raw_lines = [line.strip() for line in stdout.splitlines() if line.strip()] + items: list[dict[str, Any]] = [] + seen_urls: set[str] = set() + for line in raw_lines: + parts = line.split("\t") + video_url = parts[0].strip() if parts else "" + raw_title = parts[1].strip() if len(parts) > 1 else "" + raw_external_id = parts[2].strip() if len(parts) > 2 else "" + if not video_url or video_url == "NA" or video_url in seen_urls: + continue + seen_urls.add(video_url) + items.append( + { + "video_url": video_url, + "title": raw_title if raw_title and raw_title != "NA" else "短视频素材", + "external_id": raw_external_id if raw_external_id != "NA" else "", + } + ) + + debug_payload = { + "discovery_command": discovery_cmd, + "discovery_stdout_preview": raw_lines[: min(len(raw_lines), max_items)], + "discovery_stderr": stderr.strip()[:1000], + "discovery_exit_code": code, + } + if code != 0: + raise HTTPException(status_code=502, detail=f"Failed to inspect content source: {stderr.strip()[:200] or 'yt-dlp error'}") + return items, debug_payload + + +def validate_real_cut_source_job(source_job: dict[str, Any]) -> None: + source_type = source_job.get("source_type", "") + if source_type not in {"upload_video", "video_link"}: + raise HTTPException(status_code=400, detail="Real-cut source job must come from upload_video or video_link") + if source_type == "video_link" and source_job.get("status") != "completed": + raise HTTPException(status_code=409, detail="Video link source job must be completed before real-cut staging") + + +def resolve_real_cut_source_file(source_job: dict[str, Any]) -> tuple[Path, dict[str, Any] | None]: + validate_real_cut_source_job(source_job) + artifacts = parse_job_artifacts(source_job) + candidates: list[Path] = [] + + if artifacts.get("uploaded_path"): + candidates.append(Path(str(artifacts["uploaded_path"]))) + if artifacts.get("source_path"): + candidates.append(Path(str(artifacts["source_path"]))) + if source_job.get("content_source_id"): + source_row = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_job["content_source_id"],)) + if source_row and source_row.get("local_path"): + candidates.append(Path(str(source_row["local_path"]))) + if source_job.get("source_type") == "video_link": + candidates.append( + job_storage_dir( + account_id=source_job.get("user_id", ""), + project_id=source_job.get("project_id", ""), + job_id=source_job["id"], + ) / "source.mp4" + ) + candidates.append(legacy_job_storage_dir(source_job["id"]) / "source.mp4") + + seen: set[str] = set() + for candidate in candidates: + candidate_str = str(candidate) + if not candidate_str or candidate_str in seen: + continue + seen.add(candidate_str) + if candidate.exists() and candidate.is_file(): + return candidate, artifacts + + raise HTTPException(status_code=409, detail="Source job media file is not available for real-cut staging") + + +async def stage_real_cut_source_to_cutvideo(source_job: dict[str, Any]) -> dict[str, Any]: + if not cutvideo_client.enabled: + raise HTTPException(status_code=503, detail="CutVideo is not configured") + + source_path, source_artifacts = resolve_real_cut_source_file(source_job) + folder_name = f"storyforge-{storage_token(source_job.get('user_id', ''), 'acct')[:16]}-{source_job['id']}" + upload_payload = await cutvideo_client.upload_source_file(source_path, folder_name=folder_name) + input_dir = str(upload_payload.get("input_dir") or "").strip() + if not input_dir: + raise HTTPException(status_code=502, detail="CutVideo upload did not return input_dir") + return { + "input_dir": input_dir, + "source_path": str(source_path), + "upload": upload_payload, + "source_artifacts": source_artifacts, + } + + +def cutvideo_run_has_materialized_outputs(run_payload: dict[str, Any]) -> bool: + for key in ( + "manifest", + "assets", + "segments", + "top_segments", + "tool_report", + "llm_review_summary", + "exports", + "timeline", + "summary_markdown", + "clips", + "downloads", + "transcripts", + "files", + ): + value = run_payload.get(key) + if value not in (None, "", [], {}, 0): + return True + return bool(str(run_payload.get("generated_at") or "").strip()) + + +async def find_cutvideo_run_for_job(row: dict[str, Any]) -> dict[str, Any] | None: + result_payload = parse_job_result(row) + submit_payload = result_payload.get("cutvideo_submit") or {} + if not isinstance(submit_payload, dict): + submit_payload = {} + request_payload = submit_payload.get("request") or {} + if not isinstance(request_payload, dict): + request_payload = {} + expected_name = str(request_payload.get("name") or row.get("title") or "").strip() + if not expected_name: + return None + + runs_payload = await cutvideo_client.list_runs() + items = runs_payload.get("items") + if not isinstance(items, list): + return None + + normalized_expected = expected_name.casefold() + for item in items: + if not isinstance(item, dict): + continue + run_id = str(item.get("run_id") or item.get("id") or "").strip() + job_name = str(item.get("job_name") or item.get("name") or "").strip() + normalized_job_name = job_name.casefold() + normalized_run_id = run_id.casefold() + if ( + normalized_job_name == normalized_expected + or normalized_run_id == normalized_expected + or normalized_job_name.endswith(normalized_expected) + or normalized_run_id.endswith(normalized_expected) + ): + detail = await cutvideo_client.get_run(run_id or job_name) + return { + "run_id": run_id, + "summary": item, + "detail": detail, + } + return None + + +def create_job_record( + *, + account_id: str, + project_id: str, + knowledge_base_id: str, + parent_job_id: str | None = None, + source_type: str, + line_type: str, + workflow_key: str, + title: str, + language: str = "auto", + source_url: str = "", + assistant_id: str | None = None, + content_source_id: str | None = None, + artifacts: dict[str, Any] | None = None, + analysis_model_profile_id: str = "", +) -> dict[str, Any]: + job_id = make_id("job") + now = utc_now() + artifacts_map = dict(artifacts or {}) + archive_payload = best_effort_job_download_archive( + { + "id": job_id, + "user_id": account_id, + "project_id": project_id, + "source_type": source_type, + "line_type": line_type, + "workflow_key": workflow_key, + "title": title, + "status": "pending", + }, + artifacts=artifacts_map, + result={}, + ) + artifacts_map["download_archive"] = archive_payload + if archive_payload.get("download_dir"): + artifacts_map.setdefault("download_bundle_dir", archive_payload["download_dir"]) + if archive_payload.get("items"): + artifacts_map.setdefault("download_artifacts", archive_payload["items"]) + db.execute( + """ + INSERT INTO jobs ( + id, user_id, project_id, parent_job_id, assistant_id, knowledge_base_id, content_source_id, + source_type, line_type, workflow_key, orchestrator, provider_name, provider_task_id, + source_url, title, language, status, transcript_text, style_summary, upload_status, + error, artifacts_json, result_json, analysis_model_profile_id, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'n8n', '', '', ?, ?, ?, 'pending', '', '', 'pending', '', ?, '{}', ?, ?, ?) + """, + ( + job_id, + account_id, + project_id, + parent_job_id, + assistant_id, + knowledge_base_id, + content_source_id, + source_type, + line_type, + workflow_key, + source_url or None, + title, + language, + json.dumps(artifacts_map, ensure_ascii=False), + analysis_model_profile_id, + now, + now, + ), + ) + return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) + + +async def wait_for_huobao_image(image_id: str | int) -> dict[str, Any]: + deadline = now_ts() + CUTVIDEO_MAX_WAIT_SEC + last_payload: dict[str, Any] = {} + while True: + last_payload = await huobao_client.get_image(str(image_id)) + status = str(last_payload.get("status") or "").lower() + if status in {"completed", "failed"}: + return last_payload + if now_ts() >= deadline: + raise RuntimeError(f"Huobao image task timed out: {image_id}") + await asyncio.sleep(HUOBAO_POLL_INTERVAL_SEC) + + +async def wait_for_huobao_video(video_id: str | int) -> dict[str, Any]: + deadline = now_ts() + HUOBAO_MAX_WAIT_SEC + last_payload: dict[str, Any] = {} + while True: + last_payload = await huobao_client.get_video(str(video_id)) + status = str(last_payload.get("status") or "").lower() + if status in {"completed", "failed"}: + return last_payload + if now_ts() >= deadline: + raise RuntimeError(f"Huobao video task timed out: {video_id}") + await asyncio.sleep(HUOBAO_POLL_INTERVAL_SEC) + + +def coerce_storyboards(items: Any) -> list[dict[str, Any]]: + if not isinstance(items, list): + return [] + return [item for item in items if isinstance(item, dict)] + + +def huobao_image_size_for_aspect_ratio(aspect_ratio: str) -> str: + normalized = str(aspect_ratio or "").strip() + if normalized == "9:16": + return "1024x1536" + if normalized == "16:9": + return "1536x1024" + if normalized == "1:1": + return "1024x1024" + return "1024x1536" + + +async def transcribe_media(job_dir: Path, source_path: Path, title: str, source_url: str = "") -> tuple[str, dict[str, Any]]: + artifacts: dict[str, Any] = {} + transcript = "" + media_path = source_path + artifacts["source_path"] = str(media_path) + + if not source_path.exists(): + transcript = ( + f"素材标题:{title}\n" + f"素材来源:{source_url or source_path.name}\n\n" + "当前环境未找到可直接处理的本地视频文件,已记录来源信息并进入降级学习流程。" + ) + return transcript, artifacts + + audio_path = job_dir / "audio.wav" + if command_exists(FFMPEG_BIN): + code, _, err = run_command([FFMPEG_BIN, "-y", "-i", str(source_path), "-ar", "16000", "-ac", "1", str(audio_path)]) + if code == 0 and audio_path.exists(): + artifacts["audio_path"] = str(audio_path) + media_path = audio_path + elif err: + artifacts["ffmpeg_error"] = err.strip()[:500] + + if asr_http_client.enabled and media_path.exists(): + try: + asr_payload = await asr_http_client.transcribe_audio(media_path) + artifacts["asr_http_payload"] = { + "success": bool(asr_payload.get("success", True)), + "duration_ms": asr_payload.get("duration_ms"), + "error_message": str(asr_payload.get("error_message") or "")[:500], + } + transcript = str(asr_payload.get("text") or "").strip() + if transcript: + artifacts["asr_backend"] = "http" + except Exception as exc: + error_detail = str(exc).strip() or exc.__class__.__name__ + artifacts["asr_http_error"] = error_detail[:500] + + if WHISPER_BIN and Path(WHISPER_BIN).exists() and Path(WHISPER_MODEL).exists(): + out_prefix = job_dir / "whisper" + code, stdout, stderr = run_command([ + WHISPER_BIN, + "-m", + WHISPER_MODEL, + "-f", + str(media_path), + "-otxt", + "-of", + str(out_prefix), + ]) + txt_path = Path(str(out_prefix) + ".txt") + if code == 0 and txt_path.exists(): + cli_transcript = txt_path.read_text(encoding="utf-8", errors="ignore").strip() + if cli_transcript: + transcript = cli_transcript + artifacts["transcript_path"] = str(txt_path) + artifacts["asr_backend"] = artifacts.get("asr_backend") or "whisper_cli" + else: + artifacts["whisper_stdout"] = stdout.strip()[:500] + artifacts["whisper_error"] = stderr.strip()[:500] + + if not transcript: + transcript = ( + f"素材标题:{title}\n" + f"素材来源:{source_url or source_path.name}\n\n" + "当前环境未完成真实 ASR,已保留原始素材供后续转写。请结合标题、来源和上下文进行初步风格学习。" + ) + return transcript, artifacts + + +def ensure_user_kb(account_id: str, project_id: str = "", username: str = "默认用户") -> dict[str, Any]: + project = resolve_target_project(account_id, project_id or None, username=username) + row = db.fetch_one( + "SELECT * FROM knowledge_bases WHERE user_id = ? AND project_id = ? ORDER BY created_at ASC LIMIT 1", + (account_id, project["id"]), + ) + if row: + return row + kb_id = make_id("kb") + now = utc_now() + db.execute( + """ + INSERT INTO knowledge_bases (id, user_id, project_id, name, description, sync_status, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + (kb_id, account_id, project["id"], "默认知识库", "系统为新用户自动创建", "ready", now, now), + ) + return db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (kb_id,)) + + +async def process_job(job_id: str) -> None: + row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) + if not row: + return + now = utc_now() + db.execute("UPDATE jobs SET status = ?, updated_at = ? WHERE id = ?", ("processing", now, job_id)) + append_job_event(job_id, "job.processing", {}) + + try: + artifacts = json.loads(row.get("artifacts_json") or "{}") + transcript_text = row.get("transcript_text", "") + job_dir = job_storage_dir( + account_id=row.get("user_id", ""), + project_id=row.get("project_id", ""), + job_id=job_id, + ) + job_dir.mkdir(parents=True, exist_ok=True) + + if row["source_type"] == "text": + transcript_text = fallback_transcript_from_text(row["title"], artifacts.get("input_text", "")) + elif row["source_type"] == "video_link": + downloaded = job_dir / "source.mp4" + if command_exists(YTDLP_BIN): + code, stdout, stderr = run_command([ + YTDLP_BIN, + "--no-playlist", + "-o", + str(downloaded), + row.get("source_url") or "", + ], cwd=job_dir) + if code == 0 and downloaded.exists(): + artifacts["download_stdout"] = stdout.strip()[:500] + else: + artifacts["download_error"] = stderr.strip()[:500] + transcript_text, extra = await transcribe_media(job_dir, downloaded if downloaded.exists() else job_dir / "placeholder.mp4", row["title"], row.get("source_url") or "") + artifacts.update(extra) + elif row["source_type"] == "upload_video": + source_path = Path(artifacts.get("uploaded_path", "")) + transcript_text, extra = await transcribe_media(job_dir, source_path, row["title"], row.get("source_url") or "") + artifacts.update(extra) + + profile = model_profile_for_account(row["user_id"], row.get("analysis_model_profile_id") or None) + style_summary = await summarize_style(profile, transcript_text, row["title"]) + assistant = None + if row.get("assistant_id"): + assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) + content_blueprint = await generate_content_blueprint( + profile, + title=row["title"], + transcript_text=transcript_text, + style_summary=style_summary, + agent_prompt=(assistant or {}).get("system_prompt", ""), + generation_goal=(assistant or {}).get("generation_goal", ""), + ) + combined_text = ( + f"{transcript_text}\n\n" + "------\n" + f"风格学习结论:\n{style_summary}\n\n" + "------\n" + f"二创文案:\n{(content_blueprint.get('rewrite') or {}).get('script', '')}\n\n" + "------\n" + f"分镜:\n{json.dumps(content_blueprint.get('storyboards') or [], ensure_ascii=False, indent=2)}" + ) + download_bundle = persist_download_bundle( + account_id=row.get("user_id", ""), + project_id=row.get("project_id", ""), + job_id=job_id, + title=row["title"], + source_type=row["source_type"], + source_url=row.get("source_url") or "", + transcript_text=transcript_text, + style_summary=style_summary, + combined_text=combined_text, + content_blueprint=content_blueprint, + ) + kb_row = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (row["knowledge_base_id"],)) + if not kb_row: + raise RuntimeError("Knowledge base not found") + document_id = make_id("doc") + timestamp = utc_now() + db.execute( + """ + INSERT INTO knowledge_documents ( + id, knowledge_base_id, title, source_type, source_url, transcript_text, + style_summary, combined_text, analysis_json, storyboard_json, source_artifact_json, + analysis_model_profile_id, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + document_id, + row["knowledge_base_id"], + row["title"], + row["source_type"], + row.get("source_url") or "", + transcript_text, + style_summary, + combined_text, + json.dumps(content_blueprint.get("analysis") or {}, ensure_ascii=False), + json.dumps(content_blueprint.get("storyboards") or [], ensure_ascii=False), + json.dumps(artifacts, ensure_ascii=False), + profile["id"], + timestamp, + timestamp, + ), + ) + update_job_state( + job_id, + status="completed", + artifacts={ + "document_id": document_id, + "project_job_dir": str(job_dir), + **download_bundle, + **artifacts, + }, + result={ + "analysis": content_blueprint.get("analysis") or {}, + "rewrite": content_blueprint.get("rewrite") or {}, + "storyboards": content_blueprint.get("storyboards") or [], + "document_id": document_id, + }, + ) + db.execute( + """ + UPDATE jobs + SET transcript_text = ?, style_summary = ?, upload_status = ?, updated_at = ? + WHERE id = ? + """, + (transcript_text, style_summary, "ready", timestamp, job_id), + ) + db.execute( + "UPDATE knowledge_bases SET sync_status = ?, updated_at = ? WHERE id = ?", + ("ready", timestamp, kb_row["id"]), + ) + except Exception as exc: + update_job_state(job_id, status="failed", error=str(exc)) + + +@app.on_event("startup") +def on_startup() -> None: + db.init_schema() + seed_defaults() + + +def probe_tcp(url: str, timeout: float = 3.0) -> dict[str, Any]: + if not url: + return {"configured": False, "reachable": False, "status_code": 0, "error": "not_configured", "url": ""} + parsed = urlparse(url) + host = parsed.hostname + port = parsed.port or (443 if parsed.scheme == "https" else 80) + if not host: + return {"configured": True, "reachable": False, "status_code": 0, "error": "invalid_url", "url": url} + sock = socket.socket() + sock.settimeout(timeout) + try: + sock.connect((host, port)) + return {"configured": True, "reachable": True, "status_code": 0, "error": "", "url": url} + except Exception as exc: # pragma: no cover - operational probe + return {"configured": True, "reachable": False, "status_code": 0, "error": str(exc), "url": url} + finally: + sock.close() + + +def probe_http(url: str, path: str = "", timeout: float = 3.0) -> dict[str, Any]: + tcp = probe_tcp(url, timeout=timeout) + target_url = urljoin(url if url.endswith("/") else f"{url}/", path.lstrip("/")) if url else "" + if not tcp["configured"] or not tcp["reachable"]: + if target_url: + tcp["url"] = target_url + return tcp + try: + response = httpx.get(target_url or url, timeout=timeout, follow_redirects=True) + tcp["status_code"] = response.status_code + tcp["reachable"] = response.status_code < 500 + tcp["error"] = "" if response.status_code < 500 else f"http_{response.status_code}" + except Exception as exc: # pragma: no cover - operational probe + tcp["reachable"] = False + tcp["error"] = str(exc) + tcp["url"] = target_url or url + return tcp + + +def live_recorder_request(method: str, path: str, payload: dict[str, Any] | None = None, timeout: float = 20.0) -> Any: + if not LIVE_RECORDER_BASE_URL: + raise HTTPException(status_code=503, detail="LIVE_RECORDER_BASE_URL is not configured") + target_url = urljoin(LIVE_RECORDER_BASE_URL if LIVE_RECORDER_BASE_URL.endswith("/") else f"{LIVE_RECORDER_BASE_URL}/", path.lstrip("/")) + try: + response = httpx.request( + method=method.upper(), + url=target_url, + json=payload, + timeout=timeout, + follow_redirects=True, + ) + response.raise_for_status() + if "application/json" in (response.headers.get("content-type") or ""): + return response.json() + return {"ok": True, "text": response.text, "url": target_url} + except httpx.HTTPStatusError as exc: + detail: Any + try: + detail = exc.response.json() + except Exception: + detail = exc.response.text.strip() or f"http_{exc.response.status_code}" + raise HTTPException(status_code=exc.response.status_code, detail=detail) + except Exception as exc: + raise HTTPException(status_code=502, detail=f"live recorder request failed: {exc}") + + +def local_model_public_base_url() -> str: + if not LOCAL_OPENAI_BASE_URL: + return "" + parsed = urlparse(LOCAL_OPENAI_BASE_URL) + scheme = parsed.scheme or "http" + host = parsed.hostname or "127.0.0.1" + if host in {"host.docker.internal", "localhost"}: + host = "127.0.0.1" + port = parsed.port + root = f"{scheme}://{host}" + if port: + root = f"{root}:{port}" + return root + + +def fetch_local_model_catalog(timeout: float = 8.0) -> dict[str, Any]: + detail = probe_http(LOCAL_OPENAI_BASE_URL, "/models", timeout=timeout) + public_base_url = local_model_public_base_url() + management_url = f"{public_base_url}/management.html" if public_base_url else "" + payload = { + "configured": detail.get("configured", False), + "reachable": detail.get("reachable", False), + "base_url": LOCAL_OPENAI_BASE_URL, + "public_base_url": public_base_url, + "management_url": management_url, + "default_model": LOCAL_OPENAI_MODEL, + "models": [], + "status_code": detail.get("status_code", 0), + "error": detail.get("error", ""), + "url": detail.get("url", ""), + } + if not detail.get("configured") or not detail.get("reachable"): + return payload + try: + response = httpx.get( + urljoin(LOCAL_OPENAI_BASE_URL if LOCAL_OPENAI_BASE_URL.endswith("/") else f"{LOCAL_OPENAI_BASE_URL}/", "models"), + timeout=timeout, + ) + response.raise_for_status() + data = response.json() + payload["models"] = [ + { + "id": item.get("id", ""), + "owned_by": item.get("owned_by", ""), + "created": item.get("created", 0), + } + for item in (data.get("data") or []) + if isinstance(item, dict) + ] + except Exception as exc: # pragma: no cover - operational probe + payload["reachable"] = False + payload["error"] = str(exc) + return payload + + +@app.get("/healthz") +def healthz() -> dict[str, Any]: + return { + "status": "ok", + "dbPath": DB_PATH, + "downloadsDir": str(DOWNLOADS_DIR), + "jobsDir": str(JOBS_DIR), + "modelsDir": str(MODELS_DIR), + "defaultExternalBaseUrl": DEFAULT_EXTERNAL_BASE_URL, + "localModelBaseUrl": LOCAL_OPENAI_BASE_URL, + "asrHttpBaseUrl": ASR_HTTP_BASE_URL, + "n8nBaseUrl": N8N_BASE_URL, + "cutvideoBaseUrl": CUTVIDEO_BASE_URL, + "cutvideoUploadTimeoutSec": CUTVIDEO_UPLOAD_TIMEOUT_SEC, + "huobaoBaseUrl": HUOBAO_BASE_URL, + "liveRecorderBaseUrl": LIVE_RECORDER_BASE_URL, + } + + +@app.get("/v2/integrations/health") +def integrations_health(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + _ = account + cutvideo_bootstrap = probe_http(CUTVIDEO_BASE_URL, "/api/bootstrap", timeout=5.0) + cutvideo_uploads = probe_http(CUTVIDEO_BASE_URL, "/api/uploads", timeout=5.0) + cutvideo_supports_uploads = bool( + cutvideo_uploads.get("configured") + and cutvideo_uploads.get("reachable") + and int(cutvideo_uploads.get("status_code") or 0) != 404 + ) + return { + "local_model": { + "base_url": LOCAL_OPENAI_BASE_URL, + **probe_http(LOCAL_OPENAI_BASE_URL, "/models"), + }, + "cutvideo": { + "base_url": CUTVIDEO_BASE_URL, + **cutvideo_bootstrap, + "supports_uploads": cutvideo_supports_uploads, + "upload_status_code": int(cutvideo_uploads.get("status_code") or 0), + "upload_error": cutvideo_uploads.get("error", ""), + "upload_url": cutvideo_uploads.get("url", ""), + }, + "huobao": { + "base_url": HUOBAO_BASE_URL, + **probe_http(HUOBAO_BASE_URL, "/health"), + }, + "n8n": { + "base_url": N8N_BASE_URL, + **probe_http(N8N_BASE_URL, "/healthz"), + }, + "asr": { + "base_url": ASR_HTTP_BASE_URL, + **probe_tcp(ASR_HTTP_BASE_URL), + }, + "live_recorder": { + "base_url": LIVE_RECORDER_BASE_URL, + **probe_http(LIVE_RECORDER_BASE_URL, "/api/healthz"), + }, + } + + +@app.get("/v2/storage/status") +def storage_status( + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(require_approved), +) -> dict[str, Any]: + normalized_project_id = (project_id or "").strip() + if normalized_project_id: + resolve_target_project(account["id"], normalized_project_id, username=account["username"]) + jobs_account_root = job_account_root(account["id"]) + downloads_account_root = download_account_root(account["id"]) + jobs_project_root = job_project_root(account["id"], normalized_project_id or None) + downloads_project_root = tenant_download_root(account["id"], normalized_project_id or None) + return { + "strategy": { + "database": {"mode": storage_mode(Path(DB_PATH)), "path": DB_PATH}, + "jobs": {"mode": storage_mode(JOBS_DIR), "path": str(JOBS_DIR)}, + "downloads": {"mode": storage_mode(DOWNLOADS_DIR), "path": str(DOWNLOADS_DIR)}, + "models": {"mode": storage_mode(MODELS_DIR), "path": str(MODELS_DIR)}, + "live_recorder": {"mode": "nas_service", "base_url": LIVE_RECORDER_BASE_URL}, + }, + "disk": { + "database": disk_usage_payload(Path(DB_PATH)), + "jobs": disk_usage_payload(JOBS_DIR), + "downloads": disk_usage_payload(DOWNLOADS_DIR), + "models": disk_usage_payload(MODELS_DIR), + }, + "tenant_usage": { + "account_jobs": directory_usage_payload(jobs_account_root), + "account_downloads": directory_usage_payload(downloads_account_root), + "project_jobs": directory_usage_payload(jobs_project_root), + "project_downloads": directory_usage_payload(downloads_project_root), + "project_id": normalized_project_id, + "recent_jobs": recent_job_storage_examples(account["id"], normalized_project_id or None, limit=6), + "recent_download_artifacts": recent_storage_artifacts("downloads", account["id"], normalized_project_id or None, limit=8), + "recent_job_artifacts": recent_storage_artifacts("jobs", account["id"], normalized_project_id or None, limit=8), + }, + } + + +@app.get("/v2/storage/artifacts/{file_id}/content") +def stream_storage_artifact_content( + file_id: str, + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(require_approved), +) -> FileResponse: + normalized_project_id = (project_id or "").strip() + if normalized_project_id: + resolve_target_project(account["id"], normalized_project_id, username=account["username"]) + matched = resolve_owned_storage_artifact(file_id, account["id"], normalized_project_id or None) + filename = matched.get("name") or "artifact.bin" + media_type = matched.get("mime_type") or "application/octet-stream" + return FileResponse( + matched["path"], + media_type=media_type, + filename=filename, + headers={"Content-Disposition": f'inline; filename="{filename}"'}, + ) + + +@app.get("/v2/integrations/local-models") +def integrations_local_models(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + _ = account + return fetch_local_model_catalog() + + +@app.get("/v2/live-recorder/health") +def live_recorder_health(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + _ = account + return live_recorder_request("GET", "/api/healthz", timeout=8.0) + + +@app.get("/v2/live-recorder/sources") +def list_live_recorder_sources( + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(require_approved), +) -> dict[str, Any]: + normalized_project_id = (project_id or "").strip() + if normalized_project_id: + resolve_target_project(account["id"], normalized_project_id, username=account["username"]) + active_by_binding: dict[str, list[dict[str, Any]]] = {} + for item in filter_owned_live_recorder_recordings(account["id"], normalized_project_id): + active_by_binding.setdefault(item["binding_id"], []).append(item) + items = [ + live_recorder_binding_payload(row, active_recordings=active_by_binding.get(row["binding_id"], [])) + for row in live_recorder_binding_rows(account["id"], normalized_project_id) + ] + return {"items": items, "count": len(items)} + + +@app.post("/v2/live-recorder/sources") +def create_live_recorder_source( + request: LiveRecorderSourceCreateRequest, + account: dict[str, Any] = Depends(require_approved), +) -> dict[str, Any]: + project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) + assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + binding = upsert_live_recorder_binding( + user_id=account["id"], + project_id=project["id"], + assistant_id=(assistant or {}).get("id", ""), + platform=request.platform or infer_platform_from_url(request.source_url), + source_url=request.source_url, + title=request.title, + quality=request.quality, + enabled=request.enabled, + ) + sync_result = sync_live_recorder_remote_config() + return {"item": live_recorder_binding_payload(binding), "sync": sync_result} + + +@app.patch("/v2/live-recorder/sources/{binding_id}") +def update_live_recorder_source( + binding_id: str, + request: LiveRecorderSourceUpdateRequest, + account: dict[str, Any] = Depends(require_approved), +) -> dict[str, Any]: + current = load_owned_live_recorder_binding(binding_id, account["id"]) + project_id = current.get("project_id", "") or "" + if request.project_id is not None: + if request.project_id.strip(): + project = resolve_target_project(account["id"], request.project_id.strip(), username=account["username"]) + project_id = project["id"] + else: + project_id = "" + assistant_id = current.get("assistant_id", "") or "" + if request.assistant_id is not None: + assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project_id) + assistant_id = (assistant or {}).get("id", "") + db.execute( + """ + UPDATE live_recorder_bindings + SET project_id = ?, assistant_id = ?, title = ?, quality = ?, enabled = ?, updated_at = ? + WHERE id = ? AND user_id = ? + """, + ( + project_id or None, + assistant_id or None, + request.title.strip() if request.title is not None else current.get("binding_title", ""), + normalize_live_recorder_quality(request.quality if request.quality is not None else current.get("quality")), + 1 if (request.enabled if request.enabled is not None else bool(current.get("enabled", 1))) else 0, + utc_now(), + binding_id, + account["id"], + ), + ) + updated = load_owned_live_recorder_binding(binding_id, account["id"]) + sync_result = sync_live_recorder_remote_config() + return {"item": live_recorder_binding_payload(updated), "sync": sync_result} + + +@app.delete("/v2/live-recorder/sources/{binding_id}") +def delete_live_recorder_source(binding_id: str, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + current = load_owned_live_recorder_binding(binding_id, account["id"]) + db.execute("DELETE FROM live_recorder_bindings WHERE id = ? AND user_id = ?", (binding_id, account["id"])) + remaining = db.fetch_one("SELECT COUNT(*) AS count FROM live_recorder_bindings WHERE source_id = ?", (current["source_id"],)) + if not remaining or int(remaining.get("count") or 0) <= 0: + db.execute("DELETE FROM live_recorder_sources WHERE id = ?", (current["source_id"],)) + sync_result = sync_live_recorder_remote_config() + return {"ok": True, "deleted_id": binding_id, "sync": sync_result} + + +@app.get("/v2/live-recorder/status") +def live_recorder_status( + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(require_approved), +) -> Any: + normalized_project_id = (project_id or "").strip() + if normalized_project_id: + resolve_target_project(account["id"], normalized_project_id, username=account["username"]) + runtime = live_recorder_runtime_payload() + active_items = filter_owned_live_recorder_recordings(account["id"], normalized_project_id) + files = filter_owned_live_recorder_files(account["id"], normalized_project_id, limit=12) + return { + "backend": runtime.get("backend") or {}, + "running": bool(runtime.get("running")), + "pid": runtime.get("pid"), + "uptime_seconds": runtime.get("uptime_seconds", 0), + "started_at": runtime.get("started_at"), + "last_exit_code": runtime.get("last_exit_code"), + "managed": bool(runtime.get("managed")), + "url_info": runtime.get("url_info") or {}, + "active_recordings": active_items, + "recording_count": len(active_items), + "files_preview": files, + } + + +@app.post("/v2/live-recorder/url-config/import") +def live_recorder_import_urls( + request: LiveRecorderImportRequest, + account: dict[str, Any] = Depends(require_approved), +) -> Any: + project = ensure_default_project(account["id"], username=account["username"]) + entries = parse_live_recorder_import_text(request.raw) + items = [] + for entry in entries: + platform = infer_platform_from_url(entry["source_url"]) or "kuaishou" + binding = upsert_live_recorder_binding( + user_id=account["id"], + project_id=project["id"], + platform=platform, + source_url=entry["source_url"], + title=entry.get("title") or "", + quality=entry.get("quality") or "原画", + enabled=bool(entry.get("enabled", True)), + ) + items.append(live_recorder_binding_payload(binding)) + sync_result = sync_live_recorder_remote_config() + return {"ok": True, "items": items, "count": len(items), "sync": sync_result} + + +@app.post("/v2/live-recorder/url-config/set-enabled") +def live_recorder_set_enabled( + request: LiveRecorderEnabledRequest, + account: dict[str, Any] = Depends(require_approved), +) -> Any: + _ = request + _ = account + raise HTTPException(status_code=410, detail="Use /v2/live-recorder/sources/{binding_id} to update tenant-scoped recorder sources") + + +@app.get("/v2/live-recorder/files") +def list_live_recorder_files( + project_id: str | None = Query(default=None), + limit: int = Query(default=100, ge=1, le=500), + account: dict[str, Any] = Depends(require_approved), +) -> dict[str, Any]: + normalized_project_id = (project_id or "").strip() + if normalized_project_id: + resolve_target_project(account["id"], normalized_project_id, username=account["username"]) + items = filter_owned_live_recorder_files(account["id"], normalized_project_id, limit=limit) + return {"items": items, "count": len(items)} + + +@app.get("/v2/live-recorder/files/{file_id}/content") +def stream_live_recorder_file( + file_id: str, + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(require_approved), +) -> StreamingResponse: + normalized_project_id = (project_id or "").strip() + relative_path = decode_live_recorder_file_id(file_id) + owned_items = filter_owned_live_recorder_files(account["id"], normalized_project_id, limit=500) + matched = next((item for item in owned_items if item.get("relative_path") == relative_path), None) + if not matched: + raise HTTPException(status_code=404, detail="Live recorder file not found") + target_url = urljoin( + LIVE_RECORDER_BASE_URL if LIVE_RECORDER_BASE_URL.endswith("/") else f"{LIVE_RECORDER_BASE_URL}/", + f"downloads/{quote(relative_path.lstrip('/'), safe='/')}", + ) + + def iterator(): + try: + with httpx.stream("GET", target_url, timeout=120.0, follow_redirects=True) as response: + response.raise_for_status() + for chunk in response.iter_bytes(): + if chunk: + yield chunk + except httpx.HTTPStatusError as exc: + raise HTTPException(status_code=exc.response.status_code, detail="Failed to stream live recorder file") + except Exception as exc: + raise HTTPException(status_code=502, detail=f"Failed to proxy live recorder file: {exc}") + + media_type = mimetypes.guess_type(relative_path)[0] or "application/octet-stream" + headers = {"Content-Disposition": f'inline; filename="{matched.get("name") or "recording.bin"}"'} + return StreamingResponse(iterator(), media_type=media_type, headers=headers) + + +@app.post("/v2/live-recorder/recorder/start") +def live_recorder_start(account: dict[str, Any] = Depends(require_approved)) -> Any: + _ = account + return live_recorder_request("POST", "/api/recorder/start", timeout=30.0) + + +@app.post("/v2/live-recorder/recorder/stop") +def live_recorder_stop(account: dict[str, Any] = Depends(require_super_admin)) -> Any: + _ = account + return live_recorder_request("POST", "/api/recorder/stop", timeout=30.0) + + +def seed_defaults() -> None: + if not db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1"): + profile_id = make_id("model") + now = utc_now() + db.execute( + """ + INSERT INTO model_profiles (id, owner_account_id, name, provider, base_url, api_key, model_name, is_system, is_default, created_at, updated_at) + VALUES (?, NULL, ?, ?, ?, ?, ?, 1, 1, ?, ?) + """, + ( + profile_id, + "本机默认模型", + "openai_compat", + LOCAL_OPENAI_BASE_URL, + LOCAL_OPENAI_API_KEY, + LOCAL_OPENAI_MODEL, + now, + now, + ), + ) + if not db.fetch_one("SELECT id FROM accounts WHERE username = ?", ("kris",)): + account_id = make_id("acct") + password_hash, password_salt = create_password_hash("Asd123456.") + now = utc_now() + model_row = db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1") + db.execute( + """ + INSERT INTO accounts ( + id, username, password_hash, password_salt, display_name, role, + approval_status, approved_by, approved_at, preferred_analysis_model_id, + created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + account_id, + "kris", + password_hash, + password_salt, + "Kris", + "super_admin", + "approved", + account_id, + now, + model_row["id"] if model_row else "", + now, + now, + ), + ) + project = ensure_default_project(account_id, username="kris") + kb = ensure_user_kb(account_id, project["id"], username="kris") + assistant_id = make_id("assistant") + db.execute( + """ + INSERT INTO assistants (id, user_id, project_id, name, description, system_prompt, generation_goal, config_json, model_profile_id, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, '{}', ?, ?, ?) + """, + ( + assistant_id, + account_id, + project["id"], + "默认文案助手", + "系统为超级管理员预置", + "你是一个擅长学习短视频文案风格的 AI 助手。", + "为用户生成稳定风格的短视频文案。", + model_row["id"] if model_row else "", + now, + now, + ), + ) + db.execute( + "INSERT INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)", + (assistant_id, kb["id"]), + ) + + +@app.post("/v2/auth/register") +def register(request: RegisterAccountRequest) -> dict[str, Any]: + username = request.username.strip() + password = request.password.strip() + display_name = request.display_name.strip() or username + if not username or not password: + raise HTTPException(status_code=400, detail="username and password are required") + if db.fetch_one("SELECT id FROM accounts WHERE username = ?", (username,)): + raise HTTPException(status_code=409, detail="username already exists") + account_id = make_id("acct") + password_hash, password_salt = create_password_hash(password) + now = utc_now() + default_model = db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1") + db.execute( + """ + INSERT INTO accounts ( + id, username, password_hash, password_salt, display_name, role, + approval_status, approved_by, approved_at, preferred_analysis_model_id, + created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, 'user', 'pending', NULL, NULL, ?, ?, ?) + """, + ( + account_id, + username, + password_hash, + password_salt, + display_name, + default_model["id"] if default_model else "", + now, + now, + ), + ) + ensure_default_project(account_id, username=username) + account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) + return normalize_account(account) + + +@app.post("/v2/auth/login") +def login(request: LoginRequest) -> dict[str, Any]: + account = db.fetch_one("SELECT * FROM accounts WHERE username = ?", (request.username.strip(),)) + if not account or not verify_password(request.password, account["password_hash"], account["password_salt"]): + raise HTTPException(status_code=401, detail="Invalid credentials") + token = secrets.token_urlsafe(32) + db.execute( + "INSERT INTO auth_tokens (token, account_id, created_at) VALUES (?, ?, ?)", + (token, account["id"], utc_now()), + ) + return { + "token": token, + "account": normalize_account(account), + "default_external_base_url": DEFAULT_EXTERNAL_BASE_URL, + } + + +@app.post("/v2/auth/logout") +def logout(account: dict[str, Any] = Depends(require_auth), authorization: str | None = Header(default=None)) -> dict[str, bool]: + token = authorization.split(" ", 1)[1].strip() + db.execute("DELETE FROM auth_tokens WHERE token = ?", (token,)) + return {"saved": True} + + +@app.get("/v2/me") +def me(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]: + return normalize_account(account) + + +@app.get("/v2/me/dashboard") +def dashboard(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + projects = [project_payload(row) for row in db.fetch_all("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC", (account["id"],))] + knowledge_bases = [knowledge_base_payload(row) for row in db.fetch_all("SELECT * FROM knowledge_bases WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))] + assistants = [assistant_payload(row) for row in db.fetch_all("SELECT * FROM assistants WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))] + jobs = [job_payload(row) for row in db.fetch_all("SELECT * FROM jobs WHERE user_id = ? ORDER BY created_at DESC LIMIT 20", (account["id"],))] + model_profiles = [normalize_model_profile(row) for row in db.fetch_all("SELECT * FROM model_profiles WHERE owner_account_id IS NULL OR owner_account_id = ? ORDER BY is_default DESC, created_at ASC", (account["id"],))] + return { + "account": normalize_account(account), + "projects": projects, + "knowledge_bases": knowledge_bases, + "assistants": assistants, + "recent_jobs": jobs, + "model_profiles": model_profiles, + "supported_platforms": sorted(DOMESTIC_PLATFORMS), + "domestic_build": True, + } + + +@app.get("/v2/projects") +def list_projects(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: + return [project_payload(row) for row in db.fetch_all("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC", (account["id"],))] + + +@app.post("/v2/projects") +def create_project(request: ProjectCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + project_id = make_id("project") + now = utc_now() + db.execute( + """ + INSERT INTO projects (id, user_id, name, description, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?) + """, + ( + project_id, + account["id"], + request.name.strip(), + request.description.strip(), + now, + now, + ), + ) + ensure_user_kb(account["id"], project_id, username=account["username"]) + return project_payload(db.fetch_one("SELECT * FROM projects WHERE id = ?", (project_id,))) + + +@app.get("/v2/content-sources") +def list_content_sources( + project_id: str | None = Query(default=None), + platform: str | None = Query(default=None), + account: dict[str, Any] = Depends(require_approved), +) -> list[dict[str, Any]]: + normalized_platform = ensure_domestic_platform(platform, allow_blank=True) + clauses = ["user_id = ?"] + params: list[Any] = [account["id"]] + if project_id: + resolve_target_project(account["id"], project_id, username=account["username"]) + clauses.append("project_id = ?") + params.append(project_id) + if normalized_platform: + clauses.append("platform = ?") + params.append(normalized_platform) + rows = db.fetch_all( + f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY created_at DESC", + tuple(params), + ) + return [content_source_payload(row) for row in rows] + + +@app.post("/v2/content-sources") +def create_content_source_api(request: ContentSourceCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) + normalized_platform = ensure_domestic_platform(request.platform or infer_platform_from_url(request.source_url), allow_blank=True) + row = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind=request.source_kind.strip(), + platform=normalized_platform, + handle=request.handle.strip(), + source_url=request.source_url.strip(), + title=request.title.strip(), + local_path=request.local_path.strip(), + metadata=request.metadata, + ) + return content_source_payload(row) + + +@app.post("/v2/pipelines/content-source-sync") +async def create_content_source_sync_job( + request: ContentSourceSyncRequest, + account: dict[str, Any] = Depends(require_approved), +) -> dict[str, Any]: + source_row = None + if request.content_source_id.strip(): + source_row = load_owned_content_source(request.content_source_id.strip(), account["id"]) + + requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "") + project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + kb = resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"]) + assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + profile = model_profile_for_account(account["id"], request.analysis_model_profile_id or None) + + source_url = (request.source_url or (source_row or {}).get("source_url") or "").strip() + if not source_url: + raise HTTPException(status_code=400, detail="source_url or content_source_id is required") + platform = ensure_domestic_platform( + request.platform or (source_row or {}).get("platform") or infer_platform_from_url(source_url), + allow_blank=False, + ) + handle = (request.handle or (source_row or {}).get("handle") or "").strip() + source_title = ( + request.title.strip() + or (source_row or {}).get("title", "").strip() + or handle + or source_url + ) + + if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]: + raise HTTPException(status_code=400, detail="Content source does not belong to target project") + + if not source_row: + source_row = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind="creator_account", + platform=platform, + handle=handle, + source_url=source_url, + title=source_title, + metadata={ + "sync_mode": "recent_uploads", + "max_items": request.max_items, + "analysis_model_profile_id": profile["id"], + }, + ) + + job_row = create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="content_source_sync", + line_type="content_source_sync", + workflow_key="content_source_sync_pipeline", + title=f"{source_title} 内容源同步", + language=request.language, + source_url=source_url, + assistant_id=(assistant or {}).get("id"), + content_source_id=source_row["id"], + artifacts={ + "platform": platform, + "handle": handle, + "source_account_url": source_url, + "source_title": source_title, + "max_items": request.max_items, + "skip_existing": request.skip_existing, + "auto_trigger_analysis": request.auto_trigger_analysis, + }, + analysis_model_profile_id=profile["id"], + ) + update_content_source_metadata( + source_row["id"], + { + "sync_mode": "recent_uploads", + "max_items": request.max_items, + "analysis_model_profile_id": profile["id"], + "last_sync_job_id": job_row["id"], + "last_sync_requested_at": utc_now(), + }, + ) + return job_payload(await trigger_orchestrated_job(job_row)) + + +@app.get("/v2/model-profiles") +def list_model_profiles(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: + rows = db.fetch_all( + "SELECT * FROM model_profiles WHERE owner_account_id IS NULL OR owner_account_id = ? ORDER BY is_default DESC, is_system DESC, created_at ASC", + (account["id"],), + ) + return [normalize_model_profile(row) for row in rows] + + +@app.post("/v2/model-profiles") +def create_model_profile(request: ModelProfileRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + model_id = make_id("model") + now = utc_now() + if request.is_default: + db.execute("UPDATE model_profiles SET is_default = 0 WHERE owner_account_id = ?", (account["id"],)) + db.execute( + """ + INSERT INTO model_profiles (id, owner_account_id, name, provider, base_url, api_key, model_name, is_system, is_default, created_at, updated_at) + VALUES (?, ?, ?, 'openai_compat', ?, ?, ?, 0, ?, ?, ?) + """, + (model_id, account["id"], request.name.strip(), request.base_url.strip(), request.api_key.strip(), request.model_name.strip(), 1 if request.is_default else 0, now, now), + ) + row = db.fetch_one("SELECT * FROM model_profiles WHERE id = ?", (model_id,)) + return normalize_model_profile(row) + + +@app.post("/v2/me/preferences/analysis-model") +def set_preferred_analysis_model(request: PreferredModelRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + model = db.fetch_one( + "SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)", + (request.model_profile_id, account["id"]), + ) + if not model: + raise HTTPException(status_code=404, detail="Model profile not found") + db.execute( + "UPDATE accounts SET preferred_analysis_model_id = ?, updated_at = ? WHERE id = ?", + (request.model_profile_id, utc_now(), account["id"]), + ) + account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account["id"],)) + return normalize_account(account) + + +@app.get("/v2/knowledge-bases") +def list_knowledge_bases(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: + return [knowledge_base_payload(row) for row in db.fetch_all("SELECT * FROM knowledge_bases WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))] + + +@app.post("/v2/knowledge-bases") +def create_knowledge_base(request: KnowledgeBaseCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) + kb_id = make_id("kb") + now = utc_now() + db.execute( + """ + INSERT INTO knowledge_bases (id, user_id, project_id, name, description, sync_status, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, 'ready', ?, ?) + """, + (kb_id, account["id"], project["id"], request.name.strip(), request.description.strip(), now, now), + ) + row = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (kb_id,)) + return knowledge_base_payload(row) + + +@app.get("/v2/knowledge-bases/{knowledge_base_id}/documents") +def list_knowledge_documents(knowledge_base_id: str, account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: + kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ? AND user_id = ?", (knowledge_base_id, account["id"])) + if not kb: + raise HTTPException(status_code=404, detail="Knowledge base not found") + rows = db.fetch_all("SELECT * FROM knowledge_documents WHERE knowledge_base_id = ? ORDER BY created_at DESC", (knowledge_base_id,)) + return [document_payload(row) for row in rows] + + +@app.get("/v2/reviews") +def list_reviews( + project_id: str | None = Query(default=None), + limit: int = Query(default=50, ge=1, le=200), + account: dict[str, Any] = Depends(require_approved), +) -> list[dict[str, Any]]: + clauses = ["user_id = ?"] + params: list[Any] = [account["id"]] + if project_id is not None: + normalized_project = project_id.strip() + if normalized_project: + clauses.append("project_id = ?") + params.append(normalized_project) + else: + clauses.append("(project_id IS NULL OR project_id = '')") + sql = f"SELECT * FROM publish_reviews WHERE {' AND '.join(clauses)} ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC LIMIT ?" + params.append(limit) + return [review_payload(row) for row in db.fetch_all(sql, tuple(params))] + + +@app.post("/v2/reviews") +def create_review(request: ReviewCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + source_job = None + if request.source_job_id.strip(): + source_job = load_owned_job(request.source_job_id.strip(), account["id"]) + requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "") + project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + review_id = make_id("review") + title = request.title.strip() or (source_job.get("title", "") if source_job else "") + if not title: + title = f"{project['name']} 复盘" + timestamp = utc_now() + normalized_platform = ensure_domestic_platform(request.platform, allow_blank=False) + db.execute( + """ + INSERT INTO publish_reviews ( + id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, + publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + review_id, + account["id"], + project["id"], + source_job["id"] if source_job else None, + (assistant or {}).get("id") or None, + title, + normalized_platform or "douyin", + request.content_type or "video", + request.publish_url.strip(), + request.published_at.strip(), + json.dumps(request.metrics, ensure_ascii=False), + request.verdict.strip(), + request.highlights.strip(), + request.next_actions.strip(), + request.notes.strip(), + timestamp, + timestamp, + ), + ) + row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) + return review_payload(row) + + +@app.patch("/v2/reviews/{review_id}") +def update_review(review_id: str, request: ReviewUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + current = load_owned_review(review_id, account["id"]) + assistant_id = current.get("assistant_id") or None + if request.assistant_id is not None: + assistant = resolve_target_assistant(account["id"], request.assistant_id or None, current.get("project_id", "")) + assistant_id = (assistant or {}).get("id") or None + normalized_platform = ( + ensure_domestic_platform(request.platform, allow_blank=False) + if request.platform is not None + else current.get("platform", "douyin") + ) + db.execute( + """ + UPDATE publish_reviews + SET title = ?, platform = ?, content_type = ?, publish_url = ?, published_at = ?, + metrics_json = ?, verdict = ?, highlights = ?, next_actions = ?, notes = ?, + assistant_id = ?, updated_at = ? + WHERE id = ? AND user_id = ? + """, + ( + request.title if request.title is not None else current.get("title", ""), + normalized_platform, + request.content_type if request.content_type is not None else current.get("content_type", "video"), + request.publish_url if request.publish_url is not None else current.get("publish_url", ""), + request.published_at if request.published_at is not None else current.get("published_at", ""), + json.dumps(request.metrics if request.metrics is not None else parse_json_object(current.get("metrics_json") or "{}"), ensure_ascii=False), + request.verdict if request.verdict is not None else current.get("verdict", ""), + request.highlights if request.highlights is not None else current.get("highlights", ""), + request.next_actions if request.next_actions is not None else current.get("next_actions", ""), + request.notes if request.notes is not None else current.get("notes", ""), + assistant_id, + utc_now(), + review_id, + account["id"], + ), + ) + row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) + return review_payload(row) + + +@app.get("/v2/explore/jobs") +def list_jobs( + parent_job_id: str | None = Query(default=None), + line_type: str | None = Query(default=None), + account: dict[str, Any] = Depends(require_approved), +) -> list[dict[str, Any]]: + clauses = ["user_id = ?"] + params: list[Any] = [account["id"]] + if parent_job_id is not None: + normalized_parent = parent_job_id.strip() + if normalized_parent: + clauses.append("parent_job_id = ?") + params.append(normalized_parent) + else: + clauses.append("(parent_job_id IS NULL OR parent_job_id = '')") + if line_type: + clauses.append("line_type = ?") + params.append(line_type.strip()) + sql = f"SELECT * FROM jobs WHERE {' AND '.join(clauses)} ORDER BY created_at DESC" + return [job_payload(row) for row in db.fetch_all(sql, tuple(params))] + + +@app.get("/v2/explore/jobs/{job_id}") +def get_job(job_id: str, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + row = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"])) + if not row: + raise HTTPException(status_code=404, detail="Job not found") + return job_payload(row) + + +@app.get("/v2/explore/jobs/{job_id}/events") +def get_job_events(job_id: str, account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: + row = db.fetch_one("SELECT id FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"])) + if not row: + raise HTTPException(status_code=404, detail="Job not found") + return [ + job_event_payload(item) + for item in db.fetch_all("SELECT * FROM job_events WHERE job_id = ? ORDER BY created_at ASC", (job_id,)) + ] + + +def resolve_target_kb(account_id: str, requested_kb_id: str | None, project_id: str = "", username: str = "默认用户") -> dict[str, Any]: + if requested_kb_id: + kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ? AND user_id = ?", (requested_kb_id, account_id)) + if kb: + if project_id and kb.get("project_id") and kb.get("project_id") != project_id: + raise HTTPException(status_code=400, detail="Knowledge base does not belong to target project") + return kb + raise HTTPException(status_code=404, detail="Knowledge base not found") + return ensure_user_kb(account_id, project_id, username=username) + + +@app.post("/v2/explore/text") +async def create_text_job(request: ExploreTextRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) + kb = resolve_target_kb(account["id"], request.knowledge_base_id, project["id"], username=account["username"]) + assistant = resolve_target_assistant(account["id"], request.assistant_id, project["id"]) + profile = model_profile_for_account(account["id"], request.analysis_model_profile_id) + source = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind="inline_text", + title=request.title.strip(), + metadata={"content_preview": request.content[:280]}, + ) + job_row = create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="text", + line_type="analysis", + workflow_key="analysis_pipeline", + title=request.title.strip(), + language="zh-CN", + assistant_id=(assistant or {}).get("id"), + content_source_id=source["id"], + artifacts={"input_text": request.content}, + analysis_model_profile_id=profile["id"], + ) + return job_payload(await trigger_orchestrated_job(job_row)) + + +@app.post("/v2/explore/video-link") +async def create_video_link_job(request: ExploreVideoLinkRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) + kb = resolve_target_kb(account["id"], request.knowledge_base_id, project["id"], username=account["username"]) + assistant = resolve_target_assistant(account["id"], request.assistant_id, project["id"]) + profile = model_profile_for_account(account["id"], request.analysis_model_profile_id) + source = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind="video_link", + source_url=request.video_url.strip(), + title=(request.title or "短视频素材").strip(), + metadata={"platform": "video_link"}, + ) + job_row = create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="video_link", + line_type="analysis", + workflow_key="analysis_pipeline", + title=(request.title or "短视频素材").strip(), + language=request.language, + source_url=request.video_url.strip(), + assistant_id=(assistant or {}).get("id"), + content_source_id=source["id"], + artifacts={}, + analysis_model_profile_id=profile["id"], + ) + return job_payload(await trigger_orchestrated_job(job_row)) + + +@app.post("/v2/explore/upload-video") +async def upload_video( + file: UploadFile = File(...), + title: str = Form(""), + project_id: str = Form(""), + knowledge_base_id: str = Form(""), + assistant_id: str = Form(""), + analysis_model_profile_id: str = Form(""), + account: dict[str, Any] = Depends(require_approved), +) -> dict[str, Any]: + project = resolve_target_project(account["id"], project_id or None, username=account["username"]) + kb = resolve_target_kb(account["id"], knowledge_base_id or None, project["id"], username=account["username"]) + assistant = resolve_target_assistant(account["id"], assistant_id or None, project["id"]) + profile = model_profile_for_account(account["id"], analysis_model_profile_id or None) + job_id = make_id("job_upload") + job_dir = job_storage_dir(account_id=account["id"], project_id=project["id"], job_id=job_id) + job_dir.mkdir(parents=True, exist_ok=True) + suffix = Path(file.filename or "upload.mp4").suffix or ".mp4" + target_path = job_dir / f"source{suffix}" + with target_path.open("wb") as handle: + shutil.copyfileobj(file.file, handle) + source = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind="upload_video", + source_url=file.filename or "", + title=(title or file.filename or "上传视频素材").strip(), + local_path=str(target_path), + metadata={"filename": file.filename or "", "size_bytes": target_path.stat().st_size}, + ) + job_row = create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="upload_video", + line_type="analysis", + workflow_key="analysis_pipeline", + title=(title or file.filename or "上传视频素材").strip(), + source_url=file.filename or "", + assistant_id=(assistant or {}).get("id"), + content_source_id=source["id"], + artifacts={"uploaded_path": str(target_path)}, + analysis_model_profile_id=profile["id"], + ) + return job_payload(await trigger_orchestrated_job(job_row)) + + +@app.post("/v2/pipelines/real-cut") +async def create_real_cut_job(request: RealCutJobRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + source_job = None + source_job_id = request.source_job_id.strip() + if source_job_id: + source_job = load_owned_job(source_job_id, account["id"]) + + requested_project_id = request.project_id or (source_job.get("project_id", "") if source_job else "") + project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + if source_job and source_job.get("project_id") and source_job.get("project_id") != project["id"]: + raise HTTPException(status_code=400, detail="Source job does not belong to target project") + + kb = ensure_user_kb(account["id"], project["id"], username=account["username"]) + resolved_input_dir = request.input_dir.strip() + staged_payload: dict[str, Any] = {} + if not resolved_input_dir: + if not source_job: + raise HTTPException(status_code=400, detail="input_dir or source_job_id is required") + staged_payload = await stage_real_cut_source_to_cutvideo(source_job) + resolved_input_dir = staged_payload["input_dir"] + + source_url = resolved_input_dir + source_metadata: dict[str, Any] = {"line_type": "real_cut"} + if source_job: + source_url = source_job.get("source_url") or resolved_input_dir + source_metadata["source_job_id"] = source_job["id"] + source_metadata["source_job_type"] = source_job.get("source_type", "") + if staged_payload: + source_metadata["cutvideo_upload"] = staged_payload.get("upload", {}) + source_metadata["source_media_path"] = staged_payload.get("source_path", "") + + source = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind="real_cut_input", + title=request.title.strip(), + source_url=source_url, + local_path=resolved_input_dir, + metadata=source_metadata, + ) + job_row = create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="real_cut", + line_type="real_cut", + workflow_key="real_cut_pipeline", + title=request.title.strip(), + source_url=resolved_input_dir, + content_source_id=source["id"], + artifacts={ + "source_job_id": source_job["id"] if source_job else "", + "source_media_path": staged_payload.get("source_path", ""), + "cutvideo_upload": staged_payload.get("upload", {}), + "cutvideo_request": { + "base_config": request.base_config.strip() or CUTVIDEO_BASE_CONFIG, + "name": request.title.strip(), + "input_dir": resolved_input_dir, + "objective": request.objective, + "target_duration_sec": request.target_duration_sec, + "target_aspect_ratio": request.target_aspect_ratio, + "ideal_segment_duration_sec": request.ideal_segment_duration_sec, + "max_segment_duration_sec": request.max_segment_duration_sec, + "transcript_backend": request.transcript_backend, + "transcript_device": request.transcript_device, + "review_enabled": request.review_enabled, + "dry_run": request.dry_run, + } + }, + ) + return job_payload(await trigger_orchestrated_job(job_row)) + + +@app.post("/v2/pipelines/ai-video") +async def create_ai_video_job(request: AiVideoJobRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + source_job = None + source_project_id = "" + source_kb_id = "" + if request.source_job_id.strip(): + source_job = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (request.source_job_id.strip(), account["id"])) + if not source_job: + raise HTTPException(status_code=404, detail="Source job not found") + if source_job["status"] != "completed": + raise HTTPException(status_code=409, detail="Source job must be completed before AI video generation") + source_project_id = source_job.get("project_id", "") + source_kb_id = source_job.get("knowledge_base_id", "") + + requested_project_id = request.project_id or source_project_id + project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + kb = resolve_target_kb(account["id"], request.knowledge_base_id or source_kb_id or None, project["id"], username=account["username"]) + assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + source = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind="ai_video_brief", + title=request.title.strip(), + metadata={"source_job_id": request.source_job_id.strip()}, + ) + job_row = create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="ai_video", + line_type="ai_video", + workflow_key="ai_video_pipeline", + title=request.title.strip(), + assistant_id=(assistant or {}).get("id"), + content_source_id=source["id"], + artifacts={ + "brief": request.brief, + "style": request.style, + "shots": request.shots, + "image_provider": request.image_provider, + "image_model": request.image_model, + "video_provider": request.video_provider, + "video_model": request.video_model, + "aspect_ratio": request.aspect_ratio, + "duration": request.duration, + "source_job_id": request.source_job_id.strip(), + }, + ) + return job_payload(await trigger_orchestrated_job(job_row)) + + +@app.get("/v2/assistants") +def list_assistants(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: + return [assistant_payload(row) for row in db.fetch_all("SELECT * FROM assistants WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))] + + +@app.post("/v2/assistants") +def create_assistant(request: AssistantCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + assistant_id = make_id("assistant") + now = utc_now() + project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) + model_profile = model_profile_for_account(account["id"], request.model_profile_id or None) + db.execute( + """ + INSERT INTO assistants (id, user_id, project_id, name, description, system_prompt, generation_goal, config_json, model_profile_id, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, '{}', ?, ?, ?) + """, + ( + assistant_id, + account["id"], + project["id"], + request.name.strip(), + request.description.strip(), + request.system_prompt.strip(), + request.generation_goal.strip(), + model_profile["id"], + now, + now, + ), + ) + for kb_id in request.knowledge_base_ids: + kb = db.fetch_one("SELECT id FROM knowledge_bases WHERE id = ? AND user_id = ?", (kb_id, account["id"])) + if kb: + db.execute("INSERT OR IGNORE INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)", (assistant_id, kb_id)) + return assistant_payload(db.fetch_one("SELECT * FROM assistants WHERE id = ?", (assistant_id,))) + + +@app.patch("/v2/assistants/{assistant_id}") +def update_assistant(assistant_id: str, request: AssistantUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + current = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (assistant_id, account["id"])) + if not current: + raise HTTPException(status_code=404, detail="Assistant not found") + project_id = current.get("project_id", "") + if request.project_id is not None: + project_id = resolve_target_project(account["id"], request.project_id, username=account["username"])["id"] + payload = { + "name": request.name if request.name is not None else current["name"], + "description": request.description if request.description is not None else current.get("description", ""), + "system_prompt": request.system_prompt if request.system_prompt is not None else current.get("system_prompt", ""), + "generation_goal": request.generation_goal if request.generation_goal is not None else current.get("generation_goal", ""), + "project_id": project_id, + "model_profile_id": current.get("model_profile_id", ""), + } + if request.model_profile_id is not None: + payload["model_profile_id"] = model_profile_for_account(account["id"], request.model_profile_id)["id"] + db.execute( + """ + UPDATE assistants + SET project_id = ?, name = ?, description = ?, system_prompt = ?, generation_goal = ?, model_profile_id = ?, updated_at = ? + WHERE id = ? + """, + ( + payload["project_id"], + payload["name"], + payload["description"], + payload["system_prompt"], + payload["generation_goal"], + payload["model_profile_id"], + utc_now(), + assistant_id, + ), + ) + if request.knowledge_base_ids is not None: + db.execute("DELETE FROM assistant_knowledge_bases WHERE assistant_id = ?", (assistant_id,)) + for kb_id in request.knowledge_base_ids: + kb = db.fetch_one("SELECT id FROM knowledge_bases WHERE id = ? AND user_id = ?", (kb_id, account["id"])) + if kb: + db.execute("INSERT OR IGNORE INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)", (assistant_id, kb_id)) + return assistant_payload(db.fetch_one("SELECT * FROM assistants WHERE id = ?", (assistant_id,))) + + +@app.get("/v2/agents") +def list_agents(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: + return list_assistants(account) + + +@app.post("/v2/agents") +def create_agent(request: AssistantCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + return create_assistant(request, account) + + +@app.patch("/v2/agents/{assistant_id}") +def update_agent(assistant_id: str, request: AssistantUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + return update_assistant(assistant_id, request, account) + + +@app.post("/v2/assistants/{assistant_id}/generate") +async def generate_copy(assistant_id: str, request: GenerateCopyRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: + assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (assistant_id, account["id"])) + if not assistant: + raise HTTPException(status_code=404, detail="Assistant not found") + kb_ids = request.knowledge_base_ids or [row["knowledge_base_id"] for row in db.fetch_all("SELECT knowledge_base_id FROM assistant_knowledge_bases WHERE assistant_id = ?", (assistant_id,))] + used_documents: list[dict[str, Any]] = [] + excerpts: list[str] = [] + for kb_id in kb_ids: + docs = db.fetch_all("SELECT * FROM knowledge_documents WHERE knowledge_base_id = ? ORDER BY created_at DESC LIMIT 3", (kb_id,)) + for doc in docs: + payload = document_payload(doc) + used_documents.append(payload) + excerpt = payload["combined_text"] or payload["style_summary"] or payload["transcript_text"] + excerpts.append(f"[{payload['title']}]\n{excerpt[:1200]}") + prompt_excerpt = "\n\n".join(excerpts)[:6000] + system_prompt = assistant.get("system_prompt") or "你是文案助手。" + generation_goal = assistant.get("generation_goal") or "生成短视频文案。" + normalized_platform = ensure_domestic_platform(request.platform, allow_blank=False) + user_prompt = ( + f"任务目标:{generation_goal}\n" + f"创作需求:{request.brief}\n" + f"平台:{platform_label(normalized_platform)}\n" + f"受众:{request.audience}\n" + f"额外要求:{request.extra_requirements or '无'}\n\n" + f"参考知识库素材:\n{prompt_excerpt or '暂无参考素材,请按通用短视频结构输出。'}\n\n" + "请输出完整文案,包含标题、开场钩子、正文结构和结尾行动指令。" + ) + profile = model_profile_for_account(account["id"], assistant.get("model_profile_id") or None) + content = await call_model(profile, system_prompt, user_prompt, temperature=0.7) + return { + "assistant_id": assistant_id, + "knowledge_base_ids": kb_ids, + "content": content, + "prompt_excerpt": prompt_excerpt[:2000], + "used_documents": used_documents, + } + + +def load_owned_job(job_id: str, account_id: str) -> dict[str, Any]: + row = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account_id)) + if not row: + raise HTTPException(status_code=404, detail="Job not found") + return row + + +def load_owned_review(review_id: str, account_id: str) -> dict[str, Any]: + row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ? AND user_id = ?", (review_id, account_id)) + if not row: + raise HTTPException(status_code=404, detail="Review not found") + return row + + +def load_owned_content_source(source_id: str, account_id: str) -> dict[str, Any]: + row = db.fetch_one("SELECT * FROM content_sources WHERE id = ? AND user_id = ?", (source_id, account_id)) + if not row: + raise HTTPException(status_code=404, detail="Content source not found") + return row + + +def load_internal_job(job_id: str) -> dict[str, Any]: + row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) + if not row: + raise HTTPException(status_code=404, detail="Job not found") + return row + + +def parse_job_artifacts(row: dict[str, Any]) -> dict[str, Any]: + raw = row.get("artifacts_json") or "{}" + try: + return json.loads(raw) + except json.JSONDecodeError: + return {} + + +def parse_job_result(row: dict[str, Any]) -> dict[str, Any]: + raw = row.get("result_json") or "{}" + try: + data = json.loads(raw) + return data if isinstance(data, dict) else {} + except json.JSONDecodeError: + return {} + + +def extract_source_storyboards(source_job: dict[str, Any] | None) -> list[dict[str, Any]]: + if not source_job: + return [] + return coerce_storyboards(parse_job_result(source_job).get("storyboards")) + + +def resolve_internal_job_id(request: InternalStepRequest | None, query_job_id: str = "") -> str: + resolved = (query_job_id or "").strip() + if not resolved and request is not None: + resolved = ( + request.job_id + or request.jobId + or str(request.payload.get("job_id") or request.payload.get("jobId") or "") + ).strip() + return resolved + + +def load_step_job(request: InternalStepRequest | None, query_job_id: str, workflow_key: str) -> dict[str, Any]: + resolved_job_id = resolve_internal_job_id(request, query_job_id) + if resolved_job_id: + return load_internal_job(resolved_job_id) + row = db.fetch_one( + """ + SELECT * FROM jobs + WHERE workflow_key = ? AND status IN ('pending', 'queued') + ORDER BY created_at ASC + LIMIT 1 + """, + (workflow_key,), + ) + if not row: + raise HTTPException(status_code=400, detail="job_id is required") + return row + + +@app.get("/internal/jobs/{job_id}/context") +def internal_job_context(job_id: str, _: bool = Depends(require_orchestrator)) -> dict[str, Any]: + return job_context_payload(load_internal_job(job_id)) + + +@app.post("/internal/jobs/steps/analyze") +async def internal_run_analysis( + request: InternalStepRequest | None = Body(default=None), + job_id: str = Query(default=""), + _: bool = Depends(require_orchestrator), +) -> dict[str, Any]: + row = load_step_job(request, job_id, "analysis_pipeline") + await process_job(row["id"]) + return job_context_payload(load_internal_job(row["id"])) + + +@app.post("/internal/jobs/steps/content-source-sync") +async def internal_content_source_sync( + request: InternalStepRequest | None = Body(default=None), + job_id: str = Query(default=""), + _: bool = Depends(require_orchestrator), +) -> dict[str, Any]: + row = load_step_job(request, job_id, "content_source_sync_pipeline") + artifacts = parse_job_artifacts(row) + source_url = str(artifacts.get("source_account_url") or row.get("source_url") or "").strip() + if not source_url: + raise HTTPException(status_code=400, detail="Content source sync job is missing source URL") + max_items = max(1, min(int(artifacts.get("max_items") or 5), 20)) + skip_existing = bool(artifacts.get("skip_existing", True)) + auto_trigger_analysis = bool(artifacts.get("auto_trigger_analysis", True)) + + update_job_state( + row["id"], + status="processing", + provider_name="collector", + provider_task_id=row["id"], + result={"sync_started": True}, + ) + + try: + discovered_items, debug_payload = discover_account_video_links(source_url, max_items) + child_jobs: list[dict[str, Any]] = [] + queued_jobs: list[dict[str, Any]] = [] + skipped_items: list[dict[str, Any]] = [] + + for index, item in enumerate(discovered_items, start=1): + video_url = str(item.get("video_url") or "").strip() + if not video_url: + continue + existing_row = db.fetch_one( + """ + SELECT * FROM jobs + WHERE user_id = ? AND project_id = ? AND source_type = 'video_link' AND source_url = ? + ORDER BY created_at DESC + LIMIT 1 + """, + (row["user_id"], row.get("project_id", ""), video_url), + ) + if existing_row and skip_existing: + skipped_items.append( + { + "video_url": video_url, + "title": item.get("title") or existing_row.get("title") or "短视频素材", + "existing_job_id": existing_row["id"], + "existing_status": existing_row.get("status", ""), + } + ) + continue + + content_source = create_content_source( + account_id=row["user_id"], + project_id=row.get("project_id", ""), + source_kind="video_link", + platform=str(artifacts.get("platform") or infer_platform_from_url(video_url)), + handle=str(artifacts.get("handle") or ""), + source_url=video_url, + title=str(item.get("title") or f"内容源视频 {index}"), + metadata={ + "origin_content_source_id": row.get("content_source_id", ""), + "origin_sync_job_id": row["id"], + "external_id": str(item.get("external_id") or ""), + "source_account_url": source_url, + }, + ) + child_row = create_job_record( + account_id=row["user_id"], + project_id=row.get("project_id", ""), + parent_job_id=row["id"], + knowledge_base_id=row["knowledge_base_id"], + source_type="video_link", + line_type="analysis", + workflow_key="analysis_pipeline", + title=str(item.get("title") or f"内容源视频 {index}"), + language=row.get("language", "auto"), + source_url=video_url, + assistant_id=row.get("assistant_id"), + content_source_id=content_source["id"], + artifacts={ + "origin_content_source_id": row.get("content_source_id", ""), + "origin_sync_job_id": row["id"], + "source_account_url": source_url, + }, + analysis_model_profile_id=row.get("analysis_model_profile_id", ""), + ) + child_jobs.append(job_payload(child_row)) + if auto_trigger_analysis: + queued_child = await trigger_orchestrated_job(child_row) + queued_jobs.append(job_payload(queued_child)) + + if row.get("content_source_id"): + update_content_source_metadata( + row["content_source_id"], + { + "last_sync_job_id": row["id"], + "last_sync_completed_at": utc_now(), + "last_discovered_count": len(discovered_items), + "last_enqueued_job_ids": [item["id"] for item in queued_jobs] or [item["id"] for item in child_jobs], + "last_skipped_existing_count": len(skipped_items), + "last_source_account_url": source_url, + "last_sync_error": "", + }, + ) + + updated = update_job_state( + row["id"], + status="completed", + provider_name="collector", + provider_task_id=row["id"], + artifacts={ + **debug_payload, + "discovered_videos": discovered_items, + "skipped_existing": skipped_items, + "child_job_ids": [item["id"] for item in child_jobs], + "queued_job_ids": [item["id"] for item in queued_jobs], + }, + result={ + "discovered_count": len(discovered_items), + "queued_count": len(queued_jobs) if auto_trigger_analysis else len(child_jobs), + "skipped_count": len(skipped_items), + "child_jobs": queued_jobs or child_jobs, + "skipped_existing": skipped_items, + }, + ) + return job_context_payload(updated) + except HTTPException as exc: + error = str(exc.detail) + except Exception as exc: + error = str(exc) + + if row.get("content_source_id"): + update_content_source_metadata( + row["content_source_id"], + { + "last_sync_job_id": row["id"], + "last_sync_completed_at": utc_now(), + "last_sync_error": error[:500], + "last_source_account_url": source_url, + }, + ) + updated = update_job_state( + row["id"], + status="failed", + error=error[:500], + provider_name="collector", + provider_task_id=row["id"], + ) + return job_context_payload(updated) + + +@app.post("/internal/jobs/steps/real-cut/submit") +async def internal_real_cut_submit( + request: InternalStepRequest | None = Body(default=None), + job_id: str = Query(default=""), + _: bool = Depends(require_orchestrator), +) -> dict[str, Any]: + if not cutvideo_client.enabled: + raise HTTPException(status_code=503, detail="CutVideo is not configured") + row = load_step_job(request, job_id, "real_cut_pipeline") + artifacts = parse_job_artifacts(row) + cutvideo_request = artifacts.get("cutvideo_request") or {} + if not isinstance(cutvideo_request, dict): + raise HTTPException(status_code=400, detail="Invalid cutvideo request payload") + append_job_event(row["id"], "cutvideo.submit.requested", cutvideo_request) + submit_result = await cutvideo_client.submit_job(cutvideo_request) + task_id = str(submit_result.get("task_id") or "") + updated = update_job_state( + row["id"], + status="processing", + provider_name="cutvideo", + provider_task_id=task_id, + result={"cutvideo_submit": submit_result}, + ) + return job_context_payload(updated) + + +@app.post("/internal/jobs/steps/real-cut/poll") +async def internal_real_cut_poll( + request: InternalStepRequest | None = Body(default=None), + job_id: str = Query(default=""), + _: bool = Depends(require_orchestrator), +) -> dict[str, Any]: + row = load_step_job(request, job_id, "real_cut_pipeline") + if not row.get("provider_task_id"): + raise HTTPException(status_code=409, detail="CutVideo task has not been submitted") + task_payload = await cutvideo_client.get_task(row["provider_task_id"]) + status = str(task_payload.get("status") or "").lower() + run_payload: dict[str, Any] = {} + artifacts: dict[str, Any] = {"cutvideo_task": task_payload} + next_status = row["status"] + error = row.get("error", "") + if status == "completed": + next_status = "completed" + run_id = str(task_payload.get("run_id") or "") + if run_id: + run_payload = await cutvideo_client.get_run(run_id) + artifacts["cutvideo_run"] = run_payload + elif status == "failed": + next_status = "failed" + error = str(task_payload.get("error") or "CutVideo task failed") + else: + next_status = "processing" + + updated = update_job_state( + row["id"], + status=next_status, + error=error, + provider_name="cutvideo", + provider_task_id=row["provider_task_id"], + artifacts=artifacts, + result={"cutvideo_run": run_payload} if run_payload else {"cutvideo_task": task_payload}, + ) + return job_context_payload(updated) + + +@app.post("/internal/jobs/steps/real-cut/run") +async def internal_real_cut_run( + request: InternalStepRequest | None = Body(default=None), + job_id: str = Query(default=""), + _: bool = Depends(require_orchestrator), +) -> dict[str, Any]: + if not cutvideo_client.enabled: + raise HTTPException(status_code=503, detail="CutVideo is not configured") + + row = load_step_job(request, job_id, "real_cut_pipeline") + if not row.get("provider_task_id"): + artifacts = parse_job_artifacts(row) + cutvideo_request = artifacts.get("cutvideo_request") or {} + if not isinstance(cutvideo_request, dict): + raise HTTPException(status_code=400, detail="Invalid cutvideo request payload") + submit_result = await cutvideo_client.submit_job(cutvideo_request) + row = update_job_state( + row["id"], + status="processing", + provider_name="cutvideo", + provider_task_id=str(submit_result.get("task_id") or ""), + result={"cutvideo_submit": submit_result}, + ) + + deadline = now_ts() + HUOBAO_MAX_WAIT_SEC + while True: + run_fallback: dict[str, Any] | None = None + try: + task_payload = await cutvideo_client.get_task(row["provider_task_id"]) + except httpx.HTTPStatusError as exc: + if exc.response is None or exc.response.status_code != 404: + raise + run_fallback = await find_cutvideo_run_for_job(row) + if run_fallback and cutvideo_run_has_materialized_outputs(run_fallback["detail"]): + updated = update_job_state( + row["id"], + status="completed", + provider_name="cutvideo", + provider_task_id=row["provider_task_id"], + artifacts={ + "cutvideo_task": { + "task_id": row["provider_task_id"], + "status": "missing", + "compatibility_mode": "run-fallback", + "error": "Task not found after submit; using run fallback", + }, + "cutvideo_run_lookup": run_fallback["summary"], + "cutvideo_run": run_fallback["detail"], + }, + result={ + **parse_job_result(row), + "cutvideo_task": { + "task_id": row["provider_task_id"], + "status": "missing", + "compatibility_mode": "run-fallback", + "error": "Task not found after submit; using run fallback", + }, + "cutvideo_run_lookup": run_fallback["summary"], + "cutvideo_run": run_fallback["detail"], + }, + ) + return job_context_payload(updated) + task_payload = { + "task_id": row["provider_task_id"], + "status": "missing", + "error": "Task not found", + } + status = str(task_payload.get("status") or "").lower() + if status == "completed": + run_payload: dict[str, Any] = {} + run_id = str(task_payload.get("run_id") or "") + if run_id: + run_payload = await cutvideo_client.get_run(run_id) + updated = update_job_state( + row["id"], + status="completed", + provider_name="cutvideo", + provider_task_id=row["provider_task_id"], + artifacts={"cutvideo_task": task_payload, "cutvideo_run": run_payload}, + result={"cutvideo_task": task_payload, "cutvideo_run": run_payload}, + ) + return job_context_payload(updated) + if status == "failed": + updated = update_job_state( + row["id"], + status="failed", + error=str(task_payload.get("error") or "CutVideo task failed"), + provider_name="cutvideo", + provider_task_id=row["provider_task_id"], + artifacts={"cutvideo_task": task_payload}, + result={"cutvideo_task": task_payload}, + ) + return job_context_payload(updated) + if now_ts() >= deadline: + updated = update_job_state( + row["id"], + status="failed", + error="CutVideo task timed out", + provider_name="cutvideo", + provider_task_id=row["provider_task_id"], + artifacts={"cutvideo_task": task_payload}, + result={"cutvideo_task": task_payload}, + ) + return job_context_payload(updated) + await asyncio.sleep(CUTVIDEO_POLL_INTERVAL_SEC) + row = load_internal_job(row["id"]) + + +@app.post("/internal/jobs/steps/ai-video/render") +async def internal_ai_video_render( + request: InternalStepRequest | None = Body(default=None), + job_id: str = Query(default=""), + _: bool = Depends(require_orchestrator), +) -> dict[str, Any]: + if not huobao_client.enabled: + raise HTTPException(status_code=503, detail="Huobao is not configured") + + row = load_step_job(request, job_id, "ai_video_pipeline") + artifacts = parse_job_artifacts(row) + assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) if row.get("assistant_id") else None + source_job = None + source_storyboards: list[dict[str, Any]] = [] + source_job_id = str(artifacts.get("source_job_id") or "").strip() + if source_job_id: + source_job = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (source_job_id, row["user_id"])) + if source_job: + source_storyboards = extract_source_storyboards(source_job) + + if source_storyboards: + storyboard_items = source_storyboards[: max(int(artifacts.get("shots") or 4), 1)] + else: + profile = model_profile_for_account(row["user_id"], row.get("analysis_model_profile_id") or None) + blueprint = await generate_content_blueprint( + profile, + title=row["title"], + transcript_text=str(artifacts.get("brief") or row["title"]), + style_summary=str(artifacts.get("style") or ""), + agent_prompt=(assistant or {}).get("system_prompt", ""), + generation_goal=(assistant or {}).get("generation_goal", "") or "生成适合视频模型的分镜与提示词", + ) + storyboard_items = coerce_storyboards(blueprint.get("storyboards"))[: max(int(artifacts.get("shots") or 4), 1)] + + if not storyboard_items: + raise HTTPException(status_code=400, detail="No storyboards available for AI video rendering") + + drama_payload = await huobao_client.create_drama( + { + "title": row["title"], + "description": str(artifacts.get("brief") or row["title"]), + "style": str(artifacts.get("style") or "realistic"), + "genre": "short_video", + "tags": "storyforge", + } + ) + drama_id = str(drama_payload.get("id") or "") + if not drama_id: + raise RuntimeError("Huobao did not return drama id") + + update_job_state( + row["id"], + status="processing", + provider_name="huobao-drama", + provider_task_id=drama_id, + result={"huobao_drama": drama_payload}, + ) + + rendered_scenes: list[dict[str, Any]] = [] + image_provider = str(artifacts.get("image_provider") or "openai") + image_model = str(artifacts.get("image_model") or "") + video_provider = str(artifacts.get("video_provider") or "doubao") + video_model = str(artifacts.get("video_model") or "") + aspect_ratio = str(artifacts.get("aspect_ratio") or "9:16") + image_size = huobao_image_size_for_aspect_ratio(aspect_ratio) + duration = int(artifacts.get("duration") or 5) + style = str(artifacts.get("style") or "realistic") + + for idx, storyboard in enumerate(storyboard_items, start=1): + first_prompt = str(storyboard.get("first_frame_prompt") or storyboard.get("visual") or storyboard.get("title") or row["title"]) + last_prompt = str(storyboard.get("last_frame_prompt") or storyboard.get("visual") or storyboard.get("title") or row["title"]) + video_prompt = str(storyboard.get("video_prompt") or storyboard.get("narration") or storyboard.get("title") or row["title"]) + + first_image = await huobao_client.generate_image( + { + "drama_id": drama_id, + "image_type": "storyboard", + "frame_type": "first", + "prompt": first_prompt, + "provider": image_provider, + "model": image_model, + "size": image_size, + "style": style, + } + ) + last_image = await huobao_client.generate_image( + { + "drama_id": drama_id, + "image_type": "storyboard", + "frame_type": "last", + "prompt": last_prompt, + "provider": image_provider, + "model": image_model, + "size": image_size, + "style": style, + } + ) + + first_ready = await wait_for_huobao_image(str(first_image.get("id") or "")) + last_ready = await wait_for_huobao_image(str(last_image.get("id") or "")) + if str(first_ready.get("status") or "").lower() != "completed": + raise RuntimeError(f"First frame generation failed for scene {idx}") + if str(last_ready.get("status") or "").lower() != "completed": + raise RuntimeError(f"Last frame generation failed for scene {idx}") + + first_frame_url = first_ready.get("image_url") or first_ready.get("local_path") + last_frame_url = last_ready.get("image_url") or last_ready.get("local_path") + if not first_frame_url or not last_frame_url: + raise RuntimeError(f"Huobao image output missing for scene {idx}") + + video_payload = await huobao_client.generate_video( + { + "drama_id": drama_id, + "prompt": video_prompt, + "provider": video_provider, + "model": video_model, + "reference_mode": "first_last", + "first_frame_url": first_frame_url, + "last_frame_url": last_frame_url, + "aspect_ratio": aspect_ratio, + "duration": duration, + "style": style, + } + ) + video_ready = await wait_for_huobao_video(str(video_payload.get("id") or "")) + if str(video_ready.get("status") or "").lower() != "completed": + raise RuntimeError(f"Video generation failed for scene {idx}") + + rendered_scenes.append( + { + "shot_index": storyboard.get("shot_index", idx), + "title": storyboard.get("title", f"镜头{idx}"), + "narration": storyboard.get("narration", ""), + "first_frame": first_ready, + "last_frame": last_ready, + "video": video_ready, + } + ) + + updated = update_job_state( + row["id"], + status="completed", + provider_name="huobao-drama", + provider_task_id=drama_id, + artifacts={ + "huobao_drama_id": drama_id, + "source_job_id": source_job_id, + }, + result={ + "huobao_drama": drama_payload, + "rendered_scenes": rendered_scenes, + "storyboards": storyboard_items, + }, + ) + return job_context_payload(updated) + + +@app.post("/internal/jobs/{job_id}/status") +def internal_update_job_status(job_id: str, request: JobStatusUpdateRequest, _: bool = Depends(require_orchestrator)) -> dict[str, Any]: + updated = update_job_state( + job_id, + status=request.status, + error=request.error, + provider_name=request.provider_name or None, + provider_task_id=request.provider_task_id or None, + artifacts=request.artifacts, + result=request.result, + ) + return job_context_payload(updated) + + +@app.get("/v2/admin/accounts/pending") +def pending_accounts(admin: dict[str, Any] = Depends(require_super_admin)) -> list[dict[str, Any]]: + rows = db.fetch_all("SELECT * FROM accounts WHERE approval_status = 'pending' ORDER BY created_at ASC") + return [normalize_account(row) for row in rows] + + +@app.post("/v2/admin/accounts/{account_id}/approve") +def approve_account(account_id: str, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]: + account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) + if not account: + raise HTTPException(status_code=404, detail="Account not found") + db.execute( + "UPDATE accounts SET approval_status = 'approved', approved_by = ?, approved_at = ?, updated_at = ? WHERE id = ?", + (admin["id"], utc_now(), utc_now(), account_id), + ) + approved = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) + project = ensure_default_project(account_id, username=approved["username"]) + ensure_user_kb(account_id, project["id"], username=approved["username"]) + return {"saved": True, "account": normalize_account(approved)} + + +@app.post("/v2/admin/accounts/{account_id}/reject") +def reject_account(account_id: str, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]: + account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) + if not account: + raise HTTPException(status_code=404, detail="Account not found") + db.execute( + "UPDATE accounts SET approval_status = 'rejected', approved_by = ?, approved_at = ?, updated_at = ? WHERE id = ?", + (admin["id"], utc_now(), utc_now(), account_id), + ) + rejected = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) + return {"saved": True, "account": normalize_account(rejected)} + + +@app.get("/api/v1/app/update/latest") +def latest_update( + platform: str = Query(default="android"), + channel: str = Query(default="stable"), + currentVersionCode: int | None = Query(default=None), +) -> dict[str, Any]: + row = db.fetch_one( + "SELECT * FROM app_updates WHERE platform = ? AND channel = ? AND is_active = 1 ORDER BY version_code DESC, published_at DESC LIMIT 1", + (platform, channel), + ) + if not row: + return { + "platform": platform, + "channel": channel, + "hasUpdate": False, + "latestVersionCode": currentVersionCode or 0, + "latestVersionName": "", + "minSupportedCode": 0, + "downloadUrl": "", + "apkSha256": "", + "releaseNotes": "", + "forceUpdate": False, + "publishedAt": 0, + } + latest_version_code = int(row["version_code"]) + return { + "platform": row["platform"], + "channel": row["channel"], + "hasUpdate": currentVersionCode is None or latest_version_code > currentVersionCode, + "latestVersionCode": latest_version_code, + "latestVersionName": row["version_name"], + "minSupportedCode": int(row["min_supported_code"]), + "downloadUrl": row["apk_url"], + "apkSha256": row.get("apk_sha256", ""), + "releaseNotes": row.get("notes", ""), + "forceUpdate": bool(row.get("force_update", 0)), + "publishedAt": int(row.get("published_at", 0)), + } + + +@app.post("/v2/admin/app/update/publish") +def publish_app_update(request: PublishAppUpdateRequest, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]: + db.execute( + "UPDATE app_updates SET is_active = 0 WHERE platform = ? AND channel = ?", + (request.platform, request.channel), + ) + db.execute( + """ + INSERT INTO app_updates ( + platform, channel, version_code, version_name, min_supported_code, + apk_url, apk_sha256, notes, force_update, is_active, published_at, created_by + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + request.platform, + request.channel, + request.versionCode, + request.versionName, + request.minSupportedCode, + request.apkUrl, + request.apkSha256, + request.notes, + 1 if request.forceUpdate else 0, + 1 if request.isActive else 0, + now_ts(), + admin["id"], + ), + ) + row = db.fetch_one( + """ + SELECT id + FROM app_updates + WHERE platform = ? AND channel = ? AND version_code = ? + ORDER BY id DESC + LIMIT 1 + """, + (request.platform, request.channel, request.versionCode), + ) + return {"saved": True, "action": "published", "updateId": row["id"] if row else 0} diff --git a/collector-service/app/database.py b/collector-service/app/database.py index f83b801..755cdf7 100644 --- a/collector-service/app/database.py +++ b/collector-service/app/database.py @@ -235,6 +235,36 @@ class Database: FOREIGN KEY(assistant_id) REFERENCES assistants(id) ON DELETE SET NULL ); + CREATE TABLE IF NOT EXISTS live_recorder_sources ( + id TEXT PRIMARY KEY, + platform TEXT NOT NULL DEFAULT '', + source_url TEXT NOT NULL, + remote_name TEXT NOT NULL UNIQUE, + title TEXT NOT NULL DEFAULT '', + metadata_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(platform, source_url) + ); + + CREATE TABLE IF NOT EXISTS live_recorder_bindings ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + project_id TEXT, + assistant_id TEXT, + source_id TEXT NOT NULL, + title TEXT NOT NULL DEFAULT '', + quality TEXT NOT NULL DEFAULT '原画', + enabled INTEGER NOT NULL DEFAULT 1, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(user_id, source_id), + FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE, + FOREIGN KEY(project_id) REFERENCES projects(id) ON DELETE SET NULL, + FOREIGN KEY(assistant_id) REFERENCES assistants(id) ON DELETE SET NULL, + FOREIGN KEY(source_id) REFERENCES live_recorder_sources(id) ON DELETE CASCADE + ); + CREATE TABLE IF NOT EXISTS job_events ( id TEXT PRIMARY KEY, job_id TEXT NOT NULL, diff --git a/collector-service/app/domestic_platform_features.py b/collector-service/app/domestic_platform_features.py new file mode 100644 index 0000000..4bcda8d --- /dev/null +++ b/collector-service/app/domestic_platform_features.py @@ -0,0 +1,900 @@ +from __future__ import annotations + +import json +from typing import Any + +from fastapi import Body, Depends, HTTPException, Query +from pydantic import BaseModel, Field + + +class PlatformAnalysisRequest(BaseModel): + model_profile_ids: list[str] = Field(default_factory=list) + linked_account_ids: list[str] = Field(default_factory=list) + include_linked_accounts: bool = True + include_recent_similar_candidates: bool = True + max_videos: int = Field(default=6, ge=1, le=20) + extra_focus: str = "" + temperature: float = 0.35 + auto_analyze_top_videos: bool = False + top_video_analysis_count: int = Field(default=4, ge=1, le=10) + + +class PlatformTopVideoAnalysisRequest(BaseModel): + model_profile_id: str = "" + top_video_count: int = Field(default=5, ge=1, le=12) + min_score: float = 0 + temperature: float = 0.25 + + +class PlatformSimilaritySearchRequest(BaseModel): + source_account_id: str = "" + candidate_urls: list[str] = Field(default_factory=list) + seed_linked_accounts: bool = True + search_public_pages: bool = True + model_profile_id: str = "" + max_candidates: int = Field(default=8, ge=1, le=20) + extra_requirements: str = "" + + +class PlatformBenchmarkLinksRequest(BaseModel): + target_account_ids: list[str] = Field(default_factory=list) + target_profile_urls: list[str] = Field(default_factory=list) + relation_type: str = "benchmark" + note: str = "" + search_id: str = "" + + +class PlatformTrackingAccountRequest(BaseModel): + tracked_account_id: str + assistant_id: str = "" + note: str = "" + + +class PlatformTrackingCursorRequest(BaseModel): + last_seen_at: str + + +def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, label: str) -> None: + table_prefix = platform + + def now() -> str: + return legacy.utc_now() + + def make_id(prefix: str) -> str: + return legacy.make_id(prefix) + + def _safe_json_dumps(value: Any) -> str: + return json.dumps(value or {}, ensure_ascii=False) + + def _parse_json(raw: str, fallback: Any) -> Any: + cleaned = str(raw or "").strip() + if not cleaned: + return fallback + try: + value = json.loads(cleaned) + return value + except json.JSONDecodeError: + return fallback + + def ensure_schema() -> None: + schema = f""" + CREATE TABLE IF NOT EXISTS {table_prefix}_analysis_reports ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + account_source_id TEXT NOT NULL, + focus_text TEXT NOT NULL DEFAULT '', + prompt_text TEXT NOT NULL DEFAULT '', + context_json TEXT NOT NULL DEFAULT '{{}}', + created_at TEXT NOT NULL, + FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE, + FOREIGN KEY(account_source_id) REFERENCES content_sources(id) ON DELETE CASCADE + ); + + CREATE INDEX IF NOT EXISTS idx_{table_prefix}_analysis_reports_account_created + ON {table_prefix}_analysis_reports(account_source_id, created_at DESC); + + CREATE TABLE IF NOT EXISTS {table_prefix}_analysis_suggestions ( + id TEXT PRIMARY KEY, + report_id TEXT NOT NULL, + model_profile_id TEXT NOT NULL DEFAULT '', + model_label TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'ok', + suggestion_text TEXT NOT NULL DEFAULT '', + parsed_json TEXT NOT NULL DEFAULT '{{}}', + created_at TEXT NOT NULL, + FOREIGN KEY(report_id) REFERENCES {table_prefix}_analysis_reports(id) ON DELETE CASCADE + ); + + CREATE TABLE IF NOT EXISTS {table_prefix}_similarity_searches ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + source_account_id TEXT NOT NULL, + prompt_text TEXT NOT NULL DEFAULT '', + context_json TEXT NOT NULL DEFAULT '{{}}', + created_at TEXT NOT NULL, + FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE, + FOREIGN KEY(source_account_id) REFERENCES content_sources(id) ON DELETE CASCADE + ); + + CREATE TABLE IF NOT EXISTS {table_prefix}_similarity_candidates ( + id TEXT PRIMARY KEY, + search_id TEXT NOT NULL, + candidate_account_id TEXT, + candidate_profile_url TEXT NOT NULL DEFAULT '', + heuristic_score REAL NOT NULL DEFAULT 0, + agent_score REAL NOT NULL DEFAULT 0, + rationale_text TEXT NOT NULL DEFAULT '', + dimensions_json TEXT NOT NULL DEFAULT '{{}}', + raw_output_json TEXT NOT NULL DEFAULT '{{}}', + rank_index INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL, + FOREIGN KEY(search_id) REFERENCES {table_prefix}_similarity_searches(id) ON DELETE CASCADE, + FOREIGN KEY(candidate_account_id) REFERENCES content_sources(id) ON DELETE SET NULL + ); + + CREATE INDEX IF NOT EXISTS idx_{table_prefix}_similarity_candidates_search_rank + ON {table_prefix}_similarity_candidates(search_id, rank_index ASC); + + CREATE TABLE IF NOT EXISTS {table_prefix}_account_relations ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + source_account_id TEXT NOT NULL, + target_account_id TEXT, + target_profile_url TEXT NOT NULL DEFAULT '', + relation_type TEXT NOT NULL DEFAULT 'benchmark', + note TEXT NOT NULL DEFAULT '', + search_id TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL, + FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE, + FOREIGN KEY(source_account_id) REFERENCES content_sources(id) ON DELETE CASCADE, + FOREIGN KEY(target_account_id) REFERENCES content_sources(id) ON DELETE SET NULL + ); + + CREATE INDEX IF NOT EXISTS idx_{table_prefix}_account_relations_source + ON {table_prefix}_account_relations(source_account_id, created_at DESC); + + CREATE TABLE IF NOT EXISTS {table_prefix}_tracked_accounts ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + tracked_account_id TEXT NOT NULL, + assistant_id TEXT, + note TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(user_id, tracked_account_id), + FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE, + FOREIGN KEY(tracked_account_id) REFERENCES content_sources(id) ON DELETE CASCADE, + FOREIGN KEY(assistant_id) REFERENCES assistants(id) ON DELETE SET NULL + ); + + CREATE INDEX IF NOT EXISTS idx_{table_prefix}_tracked_accounts_user_updated + ON {table_prefix}_tracked_accounts(user_id, updated_at DESC); + + CREATE TABLE IF NOT EXISTS {table_prefix}_tracking_cursors ( + user_id TEXT PRIMARY KEY, + last_seen_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE + ); + """ + with legacy.db.session() as conn: + conn.executescript(schema) + + ensure_schema() + + @app.on_event("startup") + def _startup_platform_schema() -> None: + ensure_schema() + + def _content_source_rows(user_id: str, platform_value: str, kind: str = "") -> list[dict[str, Any]]: + rows = legacy.db.fetch_all( + "SELECT * FROM content_sources WHERE user_id = ? AND platform = ? ORDER BY updated_at DESC, created_at DESC", + (user_id, platform_value), + ) + if kind: + rows = [row for row in rows if row.get("source_kind") == kind] + return rows + + def _content_source_payload(row: dict[str, Any]) -> dict[str, Any]: + return legacy.content_source_payload(row) + + def _source_metadata(row: dict[str, Any]) -> dict[str, Any]: + return _content_source_payload(row).get("metadata", {}) + + def _require_account(account_id: str, user_id: str) -> dict[str, Any]: + row = legacy.db.fetch_one( + "SELECT * FROM content_sources WHERE id = ? AND user_id = ? AND source_kind = 'creator_account' AND platform = ?", + (account_id, user_id, platform), + ) + if not row: + raise HTTPException(status_code=404, detail=f"{label} account not found") + return row + + def _linked_video_sources(account_row: dict[str, Any]) -> list[dict[str, Any]]: + project_id = account_row.get("project_id", "") + rows = legacy.db.fetch_all( + "SELECT * FROM content_sources WHERE user_id = ? AND project_id = ? AND source_kind = 'video_link' AND platform = ? ORDER BY updated_at DESC, created_at DESC", + (account_row["user_id"], project_id, platform), + ) + account_id = account_row["id"] + source_url = str(account_row.get("source_url") or "").strip() + linked: list[dict[str, Any]] = [] + for row in rows: + metadata = _source_metadata(row) + if metadata.get("origin_content_source_id") == account_id or metadata.get("source_account_url") == source_url: + linked.append(row) + return linked + + def _jobs_for_source(source_id: str) -> list[dict[str, Any]]: + return legacy.db.fetch_all( + "SELECT * FROM jobs WHERE content_source_id = ? ORDER BY created_at DESC", + (source_id,), + ) + + def _latest_job_for_source(source_id: str) -> dict[str, Any] | None: + return legacy.db.fetch_one( + "SELECT * FROM jobs WHERE content_source_id = ? ORDER BY created_at DESC LIMIT 1", + (source_id,), + ) + + def _extract_performance_score(job_row: dict[str, Any] | None) -> float: + if not job_row: + return 0.0 + result_map = _parse_json(job_row.get("result_json") or "{}", {}) + artifacts_map = _parse_json(job_row.get("artifacts_json") or "{}", {}) + candidates = [ + result_map.get("performance_score"), + (result_map.get("analysis") or {}).get("performance_score"), + (result_map.get("scores") or {}).get("performance_score"), + artifacts_map.get("performance_score"), + (artifacts_map.get("scores") or {}).get("performance_score"), + ] + for value in candidates: + try: + return float(value) + except (TypeError, ValueError): + continue + return 0.0 + + def _extract_metrics(job_row: dict[str, Any] | None) -> dict[str, Any]: + if not job_row: + return {} + result_map = _parse_json(job_row.get("result_json") or "{}", {}) + artifacts_map = _parse_json(job_row.get("artifacts_json") or "{}", {}) + return ( + result_map.get("metrics") + or artifacts_map.get("metrics") + or result_map.get("stats") + or artifacts_map.get("stats") + or {} + ) + + def _video_payload(source_row: dict[str, Any]) -> dict[str, Any]: + payload = _content_source_payload(source_row) + metadata = payload.get("metadata", {}) + latest_job = _latest_job_for_source(source_row["id"]) + metrics = _extract_metrics(latest_job) + tags = metadata.get("tags") or [] + if not isinstance(tags, list): + tags = [] + return { + "id": source_row["id"], + "aweme_id": str(metadata.get("external_id") or source_row["id"]), + "title": payload.get("title") or "未命名作品", + "description": metadata.get("summary") or metadata.get("description") or (latest_job or {}).get("style_summary", ""), + "share_url": payload.get("source_url", ""), + "cover_url": metadata.get("cover_url") or "", + "duration_sec": float(metadata.get("duration_sec") or 0), + "published_at": metadata.get("published_at") or source_row.get("created_at"), + "tags": tags, + "content_type": metadata.get("content_type") or "video", + "stats": { + "play": metrics.get("play_count") or metrics.get("play") or 0, + "like": metrics.get("like_count") or metrics.get("like") or 0, + "comment": metrics.get("comment_count") or metrics.get("comment") or 0, + "share": metrics.get("share_count") or metrics.get("share") or 0, + }, + "score": { + "performance_score": _extract_performance_score(latest_job), + }, + "source": payload, + "latest_job_id": (latest_job or {}).get("id", ""), + } + + def _account_payload(account_row: dict[str, Any]) -> dict[str, Any]: + payload = _content_source_payload(account_row) + metadata = payload.get("metadata", {}) + videos = [_video_payload(item) for item in _linked_video_sources(account_row)] + play_values = [float(video["stats"].get("play") or 0) for video in videos if float(video["stats"].get("play") or 0) > 0] + like_values = [float(video["stats"].get("like") or 0) for video in videos if float(video["stats"].get("like") or 0) > 0] + tags = metadata.get("tags") or [] + if not isinstance(tags, list): + tags = [] + return { + "id": account_row["id"], + "platform": platform, + "profile_url": payload.get("source_url", ""), + "canonical_profile_url": payload.get("source_url", ""), + "handle": payload.get("handle", ""), + "nickname": payload.get("title") or payload.get("handle") or "未命名账号", + "signature": metadata.get("bio") or metadata.get("description") or "", + "avatar_url": metadata.get("avatar_url") or "", + "tags": tags, + "keywords": metadata.get("keywords") or [], + "sync_status": "ready" if payload.get("metadata", {}).get("last_sync_error", "") == "" else "partial", + "video_summary": { + "count": len(videos), + "avg_play": sum(play_values) / len(play_values) if play_values else 0, + "avg_like": sum(like_values) / len(like_values) if like_values else 0, + "videos": videos[:8], + }, + "project_id": payload.get("project_id", ""), + "created_at": payload.get("created_at", ""), + "updated_at": payload.get("updated_at", ""), + } + + def _relation_payload(row: dict[str, Any]) -> dict[str, Any]: + target = None + if row.get("target_account_id"): + target = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["target_account_id"],)) + return { + "id": row["id"], + "source_account_id": row["source_account_id"], + "target_account_id": row.get("target_account_id", "") or "", + "target_profile_url": row.get("target_profile_url", ""), + "target_nickname": (_account_payload(target)["nickname"] if target else ""), + "relation_type": row.get("relation_type", "benchmark"), + "note": row.get("note", ""), + "search_id": row.get("search_id", ""), + "created_at": row["created_at"], + } + + def _report_payload(row: dict[str, Any]) -> dict[str, Any]: + suggestions = [ + { + "id": suggestion["id"], + "status": suggestion.get("status", "ok"), + "model_profile_id": suggestion.get("model_profile_id", ""), + "model_label": suggestion.get("model_label", ""), + "suggestion_text": suggestion.get("suggestion_text", ""), + "parsed_json": _parse_json(suggestion.get("parsed_json") or "{}", {}), + "created_at": suggestion.get("created_at", ""), + } + for suggestion in legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_analysis_suggestions WHERE report_id = ? ORDER BY created_at ASC", + (row["id"],), + ) + ] + return { + "id": row["id"], + "focus_text": row.get("focus_text", ""), + "suggestions": suggestions, + "created_at": row["created_at"], + } + + def _workspace_payload(account_row: dict[str, Any]) -> dict[str, Any]: + reports = legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_analysis_reports WHERE account_source_id = ? ORDER BY created_at DESC LIMIT 6", + (account_row["id"],), + ) + relations = legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC", + (account_row["id"],), + ) + return { + "account": _account_payload(account_row), + "recent_reports": [_report_payload(row) for row in reports], + "linked_accounts": [_relation_payload(row) for row in relations], + } + + async def _call_reasoning_model(user_id: str, prompt: str, *, system_prompt: str, model_profile_id: str = "", temperature: float = 0.3) -> tuple[str, dict[str, Any]]: + profile = legacy.model_profile_for_account(user_id, model_profile_id or None) + output = await legacy.call_model(profile, system_prompt=system_prompt, user_prompt=prompt, temperature=temperature) + parsed = legacy.parse_json_object(output) + return output, parsed if isinstance(parsed, dict) else {} + + async def _create_sync_job_for_account(account_row: dict[str, Any], assistant_id: str = "") -> dict[str, Any]: + project_id = account_row.get("project_id") or "" + if not project_id: + raise HTTPException(status_code=400, detail="Account source is not attached to a project") + kb = legacy.resolve_target_kb(account_row["user_id"], None, project_id) + source_payload = _content_source_payload(account_row) + profile = legacy.model_profile_for_account(account_row["user_id"], None) + job_row = legacy.create_job_record( + account_id=account_row["user_id"], + project_id=project_id, + knowledge_base_id=kb["id"], + content_source_id=account_row["id"], + assistant_id=assistant_id or None, + source_type="creator_account", + line_type="analysis", + workflow_key="content_source_sync_pipeline", + title=f"{source_payload.get('title') or source_payload.get('handle') or label} 内容同步", + language="auto", + source_url=source_payload.get("source_url", ""), + artifacts={ + "source_account_url": source_payload.get("source_url", ""), + "platform": platform, + "handle": source_payload.get("handle", ""), + "max_items": int(source_payload.get("metadata", {}).get("max_items") or 5), + "skip_existing": True, + "auto_trigger_analysis": True, + }, + analysis_model_profile_id=profile["id"], + ) + queued = await legacy.trigger_orchestrated_job(job_row) + return legacy.job_payload(queued) + + def _tracking_cursor(user_id: str) -> dict[str, Any] | None: + return legacy.db.fetch_one( + f"SELECT * FROM {table_prefix}_tracking_cursors WHERE user_id = ?", + (user_id,), + ) + + def _set_tracking_cursor(user_id: str, last_seen_at: str) -> dict[str, Any]: + existing = _tracking_cursor(user_id) + updated_at = now() + if existing: + legacy.db.execute( + f"UPDATE {table_prefix}_tracking_cursors SET last_seen_at = ?, updated_at = ? WHERE user_id = ?", + (last_seen_at, updated_at, user_id), + ) + else: + legacy.db.execute( + f"INSERT INTO {table_prefix}_tracking_cursors (user_id, last_seen_at, updated_at) VALUES (?, ?, ?)", + (user_id, last_seen_at, updated_at), + ) + return legacy.db.fetch_one( + f"SELECT * FROM {table_prefix}_tracking_cursors WHERE user_id = ?", + (user_id,), + ) + + def _tracking_digest_item(tracked_row: dict[str, Any], video: dict[str, Any]) -> dict[str, Any]: + latest_job = _latest_job_for_source(video["id"]) + summary = (latest_job or {}).get("style_summary") or video.get("description") or "已发现更新内容" + assistant = None + if tracked_row.get("assistant_id"): + assistant_row = legacy.db.fetch_one("SELECT * FROM assistants WHERE id = ?", (tracked_row["assistant_id"],)) + if assistant_row: + assistant = legacy.assistant_payload(assistant_row) + borrowing_points = [point for point in [summary[:36], video.get("title", "")[:36]] if point] + return { + "tracking_id": tracked_row["id"], + "tracked_account_id": tracked_row["tracked_account_id"], + "tracked_account_name": _account_payload(_require_account(tracked_row["tracked_account_id"], tracked_row["user_id"]))["nickname"], + "assistant_id": tracked_row.get("assistant_id", "") or "", + "assistant_name": (assistant or {}).get("name", ""), + "note": tracked_row.get("note", ""), + "video": video, + "summary_text": summary, + "borrowing_points": borrowing_points[:3], + "created_at": video.get("published_at") or now(), + } + + def _tracking_digest(user_id: str, since_value: str = "", limit: int = 24) -> dict[str, Any]: + tracked_rows = legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? ORDER BY updated_at DESC", + (user_id,), + ) + cursor = _tracking_cursor(user_id) + threshold = (since_value or (cursor or {}).get("last_seen_at") or "").strip() + items: list[dict[str, Any]] = [] + for tracked in tracked_rows: + account_row = _require_account(tracked["tracked_account_id"], user_id) + for video in _account_payload(account_row)["video_summary"]["videos"]: + published_at = str(video.get("published_at") or "") + if threshold and published_at and published_at <= threshold: + continue + items.append(_tracking_digest_item(tracked, video)) + items.sort(key=lambda item: item.get("created_at", ""), reverse=True) + return { + "items": items[:limit], + "tracked_accounts": [ + { + "id": row["id"], + "tracked_account_id": row["tracked_account_id"], + "assistant_id": row.get("assistant_id", "") or "", + "note": row.get("note", ""), + "updated_at": row["updated_at"], + } + for row in tracked_rows + ], + "cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""), + } + + @app.get(f"/v2/{platform}/accounts") + def list_platform_accounts(account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]: + return [_account_payload(row) for row in _content_source_rows(account["id"], platform, "creator_account")] + + @app.get(f"/v2/{platform}/accounts/{{account_id}}/workspace") + def get_platform_account_workspace(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + account_row = _require_account(account_id, account["id"]) + return _workspace_payload(account_row) + + @app.get(f"/v2/{platform}/accounts/{{account_id}}/videos") + def list_platform_account_videos( + account_id: str, + limit: int = Query(default=80, ge=1, le=200), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + account_row = _require_account(account_id, account["id"]) + items = [_video_payload(row) for row in _linked_video_sources(account_row)] + items.sort(key=lambda item: (item["score"]["performance_score"], item.get("published_at") or ""), reverse=True) + top_ids = [item["id"] for item in items if float(item["score"]["performance_score"] or 0) >= 60][:12] + latest_ids = [item["id"] for item in sorted(items, key=lambda item: item.get("published_at") or "", reverse=True)[:12]] + return { + "items": items[:limit], + "count": len(items), + "meta": {"platform": platform, "account_id": account_id}, + "top_scored_video_ids": top_ids, + "latest_video_ids": latest_ids, + "high_score_threshold": 60, + } + + @app.post(f"/v2/{platform}/accounts/{{account_id}}/analysis") + async def analyze_platform_account( + account_id: str, + request: PlatformAnalysisRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + account_row = _require_account(account_id, account["id"]) + workspace = _workspace_payload(account_row) + context = { + "account": workspace["account"], + "top_videos": workspace["account"]["video_summary"]["videos"][: max(1, min(request.max_videos, 8))], + "linked_accounts": workspace["linked_accounts"][:5], + "extra_focus": request.extra_focus, + } + prompt = ( + f"请从新媒体商业化运营视角,分析这个{label}账号,输出执行摘要、可借鉴点、风险提醒和下一步动作。" + f"\n\n输入:\n{json.dumps(context, ensure_ascii=False, indent=2)}" + ) + output, parsed = await _call_reasoning_model( + account["id"], + prompt, + system_prompt="你是新媒体账号分析顾问。尽量输出 JSON,字段包括 executive_summary、borrow_points、risks、next_actions。", + temperature=request.temperature, + ) + report_id = make_id(f"{platform}_report") + legacy.db.execute( + f"INSERT INTO {table_prefix}_analysis_reports (id, user_id, account_source_id, focus_text, prompt_text, context_json, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)", + ( + report_id, + account["id"], + account_row["id"], + request.extra_focus or "", + prompt, + _safe_json_dumps(context), + now(), + ), + ) + suggestion_id = make_id(f"{platform}_suggestion") + profile = legacy.model_profile_for_account(account["id"], None) + legacy.db.execute( + f"INSERT INTO {table_prefix}_analysis_suggestions (id, report_id, model_profile_id, model_label, status, suggestion_text, parsed_json, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + ( + suggestion_id, + report_id, + profile["id"], + f"{profile.get('name', '')} · {profile.get('model_name', '')}".strip(" ·"), + "ok", + output[:4000], + _safe_json_dumps(parsed), + now(), + ), + ) + report_row = legacy.db.fetch_one( + f"SELECT * FROM {table_prefix}_analysis_reports WHERE id = ?", + (report_id,), + ) + report_payload = _report_payload(report_row) + return { + "report_id": report_id, + "account_id": account_row["id"], + "suggestions": report_payload["suggestions"], + "context": context, + } + + @app.post(f"/v2/{platform}/accounts/{{account_id}}/videos/analyze-top") + async def analyze_platform_top_videos( + account_id: str, + request: PlatformTopVideoAnalysisRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + account_row = _require_account(account_id, account["id"]) + videos = [_video_payload(row) for row in _linked_video_sources(account_row)] + ranked = [ + video for video in sorted(videos, key=lambda item: item["score"]["performance_score"], reverse=True) + if float(video["score"]["performance_score"] or 0) >= float(request.min_score or 0) + ][: request.top_video_count] + results: list[dict[str, Any]] = [] + for video in ranked: + prompt = ( + f"请拆解这条{label}作品为什么值得关注,输出 summary、borrow_points、risks。" + f"\n\n输入:\n{json.dumps(video, ensure_ascii=False, indent=2)}" + ) + output, parsed = await _call_reasoning_model( + account["id"], + prompt, + system_prompt="你是短视频内容拆解助手。尽量输出 JSON,字段包括 summary、borrow_points、risks。", + model_profile_id=request.model_profile_id, + temperature=request.temperature, + ) + summary_text = str(parsed.get("summary") or parsed.get("headline_summary") or output)[:240] + results.append( + { + "id": make_id(f"{platform}_va"), + "video_id": video["id"], + "video_title": video["title"], + "status": "ok", + "summary_text": summary_text, + "parsed_json": parsed, + "performance_score": video["score"]["performance_score"], + "created_at": now(), + } + ) + return { + "account_id": account_row["id"], + "analyzed_count": len(results), + "items": results, + } + + @app.post(f"/v2/{platform}/similar-searches") + async def create_platform_similarity_search( + request: PlatformSimilaritySearchRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + account_row = _require_account(request.source_account_id, account["id"]) + source_payload = _account_payload(account_row) + candidates = [ + row for row in _content_source_rows(account["id"], platform, "creator_account") + if row["id"] != account_row["id"] + ][: max(5, request.max_candidates)] + ranked_candidates: list[dict[str, Any]] = [] + source_tags = set(source_payload.get("tags") or []) + for index, row in enumerate(candidates, start=1): + payload = _account_payload(row) + overlap = len(source_tags.intersection(set(payload.get("tags") or []))) + heuristic = overlap * 10 + max(0, 50 - index) + rationale = f"与源账号同平台,标签重合 {overlap},适合作为{label}对标候选。" + ranked_candidates.append( + { + "candidate_account_id": row["id"], + "candidate_profile_url": payload.get("profile_url", ""), + "candidate_nickname": payload.get("nickname", ""), + "heuristic_score": float(heuristic), + "agent_score": float(heuristic), + "rationale_text": rationale, + "dimensions_json": {"tag_overlap": overlap}, + } + ) + ranked_candidates.sort(key=lambda item: item["agent_score"], reverse=True) + ranked_candidates = ranked_candidates[: request.max_candidates] + search_id = make_id(f"{platform}_search") + legacy.db.execute( + f"INSERT INTO {table_prefix}_similarity_searches (id, user_id, source_account_id, prompt_text, context_json, created_at) VALUES (?, ?, ?, ?, ?, ?)", + ( + search_id, + account["id"], + account_row["id"], + request.extra_requirements or "", + _safe_json_dumps({"source_account": source_payload}), + now(), + ), + ) + for idx, item in enumerate(ranked_candidates): + legacy.db.execute( + f"""INSERT INTO {table_prefix}_similarity_candidates + (id, search_id, candidate_account_id, candidate_profile_url, heuristic_score, agent_score, rationale_text, dimensions_json, raw_output_json, rank_index, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + make_id(f"{platform}_candidate"), + search_id, + item.get("candidate_account_id") or None, + item.get("candidate_profile_url", ""), + item.get("heuristic_score", 0), + item.get("agent_score", 0), + item.get("rationale_text", ""), + _safe_json_dumps(item.get("dimensions_json") or {}), + _safe_json_dumps(item), + idx, + now(), + ), + ) + return {"id": search_id, "search_id": search_id} + + @app.get(f"/v2/{platform}/similar-searches/{{search_id}}") + def get_platform_similarity_search(search_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + search_row = legacy.db.fetch_one( + f"SELECT * FROM {table_prefix}_similarity_searches WHERE id = ? AND user_id = ?", + (search_id, account["id"]), + ) + if not search_row: + raise HTTPException(status_code=404, detail="Similarity search not found") + candidate_rows = legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_similarity_candidates WHERE search_id = ? ORDER BY rank_index ASC", + (search_id,), + ) + candidates = [] + for row in candidate_rows: + payload = _parse_json(row.get("raw_output_json") or "{}", {}) + payload.setdefault("candidate_account_id", row.get("candidate_account_id", "")) + payload.setdefault("candidate_profile_url", row.get("candidate_profile_url", "")) + payload.setdefault("rationale_text", row.get("rationale_text", "")) + payload.setdefault("agent_score", row.get("agent_score", 0)) + payload.setdefault("heuristic_score", row.get("heuristic_score", 0)) + candidates.append(payload) + return { + "id": search_row["id"], + "search_id": search_row["id"], + "source_account_id": search_row["source_account_id"], + "candidates": candidates, + "created_at": search_row["created_at"], + } + + @app.get(f"/v2/{platform}/accounts/{{account_id}}/benchmark-links") + def list_platform_benchmark_links(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]: + _require_account(account_id, account["id"]) + rows = legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC", + (account_id,), + ) + return [_relation_payload(row) for row in rows] + + @app.post(f"/v2/{platform}/accounts/{{account_id}}/benchmark-links") + def create_platform_benchmark_links( + account_id: str, + request: PlatformBenchmarkLinksRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_account = _require_account(account_id, account["id"]) + created: list[dict[str, Any]] = [] + for target_account_id in request.target_account_ids: + target = _require_account(target_account_id, account["id"]) + relation_id = make_id(f"{platform}_link") + legacy.db.execute( + f"INSERT INTO {table_prefix}_account_relations (id, user_id, source_account_id, target_account_id, target_profile_url, relation_type, note, search_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + relation_id, + account["id"], + source_account["id"], + target["id"], + target.get("source_url", ""), + request.relation_type or "benchmark", + request.note or "", + request.search_id or "", + now(), + ), + ) + created.append(_relation_payload(legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_account_relations WHERE id = ?", (relation_id,)))) + for target_profile_url in request.target_profile_urls: + cleaned = str(target_profile_url or "").strip() + if not cleaned: + continue + relation_id = make_id(f"{platform}_link") + legacy.db.execute( + f"INSERT INTO {table_prefix}_account_relations (id, user_id, source_account_id, target_account_id, target_profile_url, relation_type, note, search_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + relation_id, + account["id"], + source_account["id"], + None, + cleaned, + request.relation_type or "benchmark", + request.note or "", + request.search_id or "", + now(), + ), + ) + created.append(_relation_payload(legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_account_relations WHERE id = ?", (relation_id,)))) + return {"links": created} + + @app.get(f"/v2/{platform}/tracking/accounts") + def list_platform_tracking_accounts(account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + rows = legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? ORDER BY updated_at DESC", + (account["id"],), + ) + cursor = _tracking_cursor(account["id"]) + return { + "items": [ + { + "id": row["id"], + "tracked_account_id": row["tracked_account_id"], + "assistant_id": row.get("assistant_id", "") or "", + "note": row.get("note", ""), + "updated_at": row["updated_at"], + } + for row in rows + ], + "cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""), + } + + @app.post(f"/v2/{platform}/tracking/accounts") + def create_platform_tracking_account( + request: PlatformTrackingAccountRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + tracked = _require_account(request.tracked_account_id, account["id"]) + assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, tracked.get("project_id", "")) + existing = legacy.db.fetch_one( + f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? AND tracked_account_id = ?", + (account["id"], tracked["id"]), + ) + if existing: + legacy.db.execute( + f"UPDATE {table_prefix}_tracked_accounts SET assistant_id = ?, note = ?, updated_at = ? WHERE id = ?", + (((assistant or {}).get("id") or None), request.note or "", now(), existing["id"]), + ) + row = legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_tracked_accounts WHERE id = ?", (existing["id"],)) + else: + tracking_id = make_id(f"{platform}_track") + legacy.db.execute( + f"INSERT INTO {table_prefix}_tracked_accounts (id, user_id, tracked_account_id, assistant_id, note, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)", + ( + tracking_id, + account["id"], + tracked["id"], + (assistant or {}).get("id") or None, + request.note or "", + now(), + now(), + ), + ) + row = legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_tracked_accounts WHERE id = ?", (tracking_id,)) + return { + "id": row["id"], + "tracked_account_id": row["tracked_account_id"], + "assistant_id": row.get("assistant_id", "") or "", + "note": row.get("note", ""), + "updated_at": row["updated_at"], + } + + @app.post(f"/v2/{platform}/tracking/accounts/{{tracked_account_id}}/refresh") + async def refresh_platform_tracked_account(tracked_account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + tracked_row = legacy.db.fetch_one( + f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? AND tracked_account_id = ?", + (account["id"], tracked_account_id), + ) + if not tracked_row: + raise HTTPException(status_code=404, detail="Tracked account not found") + account_row = _require_account(tracked_account_id, account["id"]) + queued = await _create_sync_job_for_account(account_row, assistant_id=tracked_row.get("assistant_id", "") or "") + legacy.db.execute( + f"UPDATE {table_prefix}_tracked_accounts SET updated_at = ? WHERE id = ?", + (now(), tracked_row["id"]), + ) + return {"tracking_id": tracked_row["id"], "tracked_account_id": tracked_account_id, "sync_job_id": queued["id"], "status": queued["status"]} + + @app.post(f"/v2/{platform}/tracking/refresh") + async def refresh_platform_tracking(account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + tracked_rows = legacy.db.fetch_all( + f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? ORDER BY updated_at DESC", + (account["id"],), + ) + refreshed = 0 + failed = 0 + items: list[dict[str, Any]] = [] + for row in tracked_rows: + try: + account_row = _require_account(row["tracked_account_id"], account["id"]) + queued = await _create_sync_job_for_account(account_row, assistant_id=row.get("assistant_id", "") or "") + refreshed += 1 + items.append({"tracking_id": row["id"], "tracked_account_id": row["tracked_account_id"], "sync_job_id": queued["id"], "status": queued["status"]}) + except Exception as exc: + failed += 1 + items.append({"tracking_id": row["id"], "tracked_account_id": row["tracked_account_id"], "error": str(exc)}) + return {"refreshed": refreshed, "failed": failed, "items": items} + + @app.post(f"/v2/{platform}/tracking/cursor") + def update_platform_tracking_cursor(request: PlatformTrackingCursorRequest, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + cursor = _set_tracking_cursor(account["id"], request.last_seen_at) + return {"last_seen_at": cursor["last_seen_at"], "updated_at": cursor["updated_at"]} + + @app.get(f"/v2/{platform}/tracking/digest") + def get_platform_tracking_digest( + since: str = Query(default=""), + limit: int = Query(default=24, ge=1, le=100), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + return _tracking_digest(account["id"], since_value=(since or "").strip(), limit=limit) diff --git a/collector-service/app/douyin_features.py b/collector-service/app/douyin_features.py index 5ca66b6..8cf30f1 100644 --- a/collector-service/app/douyin_features.py +++ b/collector-service/app/douyin_features.py @@ -2,7 +2,6 @@ from __future__ import annotations import asyncio import json -import math import re from collections import Counter from datetime import datetime, timedelta, timezone @@ -13,7 +12,6 @@ from urllib.parse import quote, unquote import httpx from fastapi import Depends, HTTPException from pydantic import BaseModel, Field -from starlette.concurrency import run_in_threadpool DEFAULT_CREATOR_CENTER_URLS = [ "https://creator.douyin.com/creator-micro/home", @@ -38,8 +36,6 @@ class DouyinAccountSyncRequest(BaseModel): profile_url: str = "" session_cookie: str = "" creator_center_urls: list[str] = Field(default_factory=lambda: list(DEFAULT_CREATOR_CENTER_URLS)) - allow_creator_center_profile_fallback: bool = False - compact_response: bool = False manual_profile_payload: dict[str, Any] | None = None manual_creator_pages: list[ManualPageCapture] = Field(default_factory=list) manual_work_payloads: list[dict[str, Any]] = Field(default_factory=list) @@ -193,20 +189,6 @@ def _normalize_timestamp(value: Any) -> str | None: return None -def _parse_iso_datetime(value: Any) -> datetime | None: - text = str(value or "").strip() - if not text: - return None - normalized = text.replace("Z", "+00:00") - try: - parsed = datetime.fromisoformat(normalized) - except ValueError: - return None - if parsed.tzinfo is None: - parsed = parsed.replace(tzinfo=timezone.utc) - return parsed.astimezone(timezone.utc) - - def _extract_hashtags(*texts: str) -> list[str]: tags: list[str] = [] for text in texts: @@ -241,104 +223,6 @@ def _extract_keywords(*texts: str) -> list[str]: return _dedupe_strings(filtered) -def _video_score_breakdown(video: dict[str, Any]) -> dict[str, Any]: - stats = video.get("stats", {}) or {} - play = float(stats.get("play") or 0) - like = float(stats.get("like") or 0) - comment = float(stats.get("comment") or 0) - share = float(stats.get("share") or 0) - collect = float(stats.get("collect") or 0) - - published_dt = _parse_iso_datetime(video.get("published_at")) - if published_dt: - age_days = max(0.0, (datetime.now(timezone.utc) - published_dt).total_seconds() / 86400.0) - else: - age_days = 999.0 - - if play > 0: - rate_denominator = play - else: - rate_denominator = max( - like * 18.0, - comment * 70.0, - share * 95.0, - collect * 55.0, - 1000.0 - ) - - engagement_rate = (like + comment * 2.2 + share * 4.2 + collect * 3.0) / max(rate_denominator, 1.0) - share_rate = share / max(rate_denominator, 1.0) - collect_rate = collect / max(rate_denominator, 1.0) - comment_rate = comment / max(rate_denominator, 1.0) - like_rate = like / max(rate_denominator, 1.0) - - volume_component = min(36.0, math.log10(play + 1.0) * 9.0) - interaction_component = min(28.0, engagement_rate * 100.0) - spread_component = min(18.0, (share_rate * 1200.0) + (collect_rate * 700.0)) - freshness_component = max(0.0, 18.0 - min(age_days, 36.0) * 0.5) - baseline_component = 6.0 if play > 0 or like > 0 else 0.0 - - performance_score = round( - min(100.0, volume_component + interaction_component + spread_component + freshness_component + baseline_component), - 2 - ) - popularity_score = round( - min( - 100.0, - math.log10(play + 1.0) * 24.0 - + math.log10(like + 1.0) * 22.0 - + math.log10(comment + 1.0) * 20.0 - + math.log10(share + 1.0) * 18.0 - + math.log10(collect + 1.0) * 16.0 - ), - 2 - ) - commercial_score = round( - min( - 100.0, - performance_score * 0.58 - + min(24.0, share_rate * 2200.0) - + min(18.0, collect_rate * 2000.0) - + min(12.0, comment_rate * 900.0) - ), - 2 - ) - - signals: list[str] = [] - if share_rate >= 0.01: - signals.append("分享率高,具备扩散和二次传播潜力") - if collect_rate >= 0.008: - signals.append("收藏率高,适合沉淀模板、知识产品或私域承接") - if like_rate >= 0.05: - signals.append("点赞率突出,说明钩子与情绪价值有效") - if comment_rate >= 0.01: - signals.append("评论率较高,适合做互动运营和评论区转化") - if age_days <= 14 and play >= 10_000: - signals.append("近期作品仍有较高播放,说明题材仍在窗口期") - if not signals: - signals.append("当前数据中性,需要结合转化目标继续验证") - - return { - "performance_score": performance_score, - "popularity_score": popularity_score, - "commercial_score": commercial_score, - "engagement_rate": round(engagement_rate, 4), - "share_rate": round(share_rate, 4), - "collect_rate": round(collect_rate, 4), - "comment_rate": round(comment_rate, 4), - "like_rate": round(like_rate, 4), - "age_days": round(age_days if age_days < 999 else 0.0, 1) if published_dt else None, - "components": { - "volume": round(volume_component, 2), - "interaction": round(interaction_component, 2), - "spread": round(spread_component, 2), - "freshness": round(freshness_component, 2), - "baseline": round(baseline_component, 2) - }, - "signals": signals[:4] - } - - def _flatten_json(value: Any, prefix: str = "") -> list[tuple[str, str, str]]: rows: list[tuple[str, str, str]] = [] if isinstance(value, dict): @@ -519,79 +403,27 @@ def _pick_best_profile(candidates: list[dict[str, Any]], fallback_url: str = "") def _normalize_video_candidate(candidate: dict[str, Any]) -> dict[str, Any]: - def _collect_image_urls(node: Any) -> list[str]: - urls: list[str] = [] - - def _visit(value: Any) -> None: - if isinstance(value, str): - text = value.strip() - if text.startswith("http"): - urls.append(text) - return - if isinstance(value, list): - for item in value[:20]: - _visit(item) - return - if not isinstance(value, dict): - return - - for key in ("url", "download_url", "origin_url", "display_url", "cover_url"): - target = value.get(key) - if isinstance(target, str) and target.strip().startswith("http"): - urls.append(target.strip()) - - url_list = value.get("url_list") - if isinstance(url_list, list): - for item in url_list[:5]: - _visit(item) - - for key in ("image", "images", "cover", "display_image", "origin_image"): - child = value.get(key) - if child not in (None, "", [], {}): - _visit(child) - - _visit(node) - return _dedupe_strings(urls) - stats_source = candidate.get("statistics") if isinstance(candidate.get("statistics"), dict) else {} video_source = candidate.get("video") if isinstance(candidate.get("video"), dict) else {} title = _first_non_empty(candidate.get("title"), candidate.get("desc"), candidate.get("share_title")) description = _first_non_empty(candidate.get("desc"), candidate.get("title"), candidate.get("text")) cover = candidate.get("cover") or video_source.get("cover") - image_urls = _collect_image_urls( - [ - candidate.get("images"), - candidate.get("image_infos"), - candidate.get("image_list"), - candidate.get("slides"), - candidate.get("photos"), - candidate.get("photo"), - candidate.get("image_post_info"), - ] - ) if isinstance(cover, dict): cover = _first_non_empty( cover.get("url_list", [""])[0] if isinstance(cover.get("url_list"), list) else "", cover.get("url") ) - duration_raw = float(candidate.get("duration") or video_source.get("duration") or 0) - duration_sec = duration_raw / 1000.0 if duration_raw > 1000 else duration_raw - has_video_media = bool(video_source) or duration_sec > 0.3 - aweme_type = str(candidate.get("aweme_type") or "") - looks_like_image_text = bool(image_urls) and (not has_video_media or aweme_type in {"51", "55", "61", "68", "122", "150"}) - content_type = "image_text" if looks_like_image_text else "video" return { "aweme_id": _first_non_empty(candidate.get("aweme_id"), candidate.get("item_id"), candidate.get("group_id")), "title": title, "description": description, "share_url": _first_non_empty(candidate.get("share_url")), - "cover_url": _first_non_empty(cover, image_urls[0] if image_urls else ""), - "duration_sec": duration_sec, + "cover_url": _first_non_empty(cover), + "duration_sec": float(candidate.get("duration") or video_source.get("duration") or 0) / 1000.0 + if float(candidate.get("duration") or video_source.get("duration") or 0) > 1000 + else float(candidate.get("duration") or video_source.get("duration") or 0), "published_at": _normalize_timestamp(candidate.get("create_time") or candidate.get("publish_time")), "tags": _extract_hashtags(title, description), - "content_type": content_type, - "content_type_label": "图文" if content_type == "image_text" else "视频", - "image_count": len(image_urls), "stats": { "play": _parse_count(stats_source.get("play_count") or candidate.get("play_count")), "like": _parse_count(stats_source.get("digg_count") or candidate.get("digg_count")), @@ -623,88 +455,6 @@ def _extract_videos(payloads: Iterable[Any]) -> list[dict[str, Any]]: return videos -def _merge_profile_payload(base: dict[str, Any], overlay: dict[str, Any]) -> dict[str, Any]: - if not overlay: - return base - if not base or not base.get("nickname"): - return overlay - - merged = dict(base) - merged["nickname"] = base.get("nickname") or overlay.get("nickname", "") - merged["signature"] = base.get("signature") or overlay.get("signature", "") - merged["profile_url"] = base.get("profile_url") or overlay.get("profile_url", "") - merged["canonical_profile_url"] = base.get("canonical_profile_url") or overlay.get("canonical_profile_url", "") - merged["sec_uid"] = base.get("sec_uid") or overlay.get("sec_uid", "") - merged["douyin_uid"] = base.get("douyin_uid") or overlay.get("douyin_uid", "") - merged["douyin_id"] = base.get("douyin_id") or overlay.get("douyin_id", "") - merged["avatar_url"] = base.get("avatar_url") or overlay.get("avatar_url", "") - merged["tags"] = _dedupe_strings(base.get("tags", []) + overlay.get("tags", [])) - merged["stats"] = { - "followers": float(base.get("stats", {}).get("followers") or overlay.get("stats", {}).get("followers") or 0), - "following": float(base.get("stats", {}).get("following") or overlay.get("stats", {}).get("following") or 0), - "likes": float(base.get("stats", {}).get("likes") or overlay.get("stats", {}).get("likes") or 0), - "videos": float(base.get("stats", {}).get("videos") or overlay.get("stats", {}).get("videos") or 0), - } - if not merged.get("raw"): - merged["raw"] = overlay.get("raw", {}) - return merged - - -def _extract_creator_payloads(creator_data: dict[str, Any]) -> list[Any]: - payloads: list[Any] = [] - for page in creator_data.get("pages", []): - for blob in page.get("blobs", []): - payload = blob.get("payload") - if payload not in (None, "", [], {}): - payloads.append(payload) - return payloads - - -def _profile_identity_value(profile: dict[str, Any], field_name: str) -> str: - value = str(profile.get(field_name, "") or "").strip() - if not value: - return "" - if field_name in {"profile_url", "canonical_profile_url"}: - return _normalize_profile_url_input(value) - return value - - -def _profiles_appear_same(left: dict[str, Any], right: dict[str, Any]) -> bool: - if not left or not right: - return False - for field_name in ("sec_uid", "douyin_uid", "douyin_id", "canonical_profile_url", "profile_url"): - left_value = _profile_identity_value(left, field_name) - right_value = _profile_identity_value(right, field_name) - if left_value and right_value and left_value == right_value: - return True - return False - - -def _normalize_profile_url_input(value: str) -> str: - text = str(value or "").strip() - if not text: - return "" - - match = re.search(r"https?://[^\s]+", text) - if match: - text = match.group(0) - - text = text.strip().strip(",。;;、,)") - if text.startswith("www.douyin.com/") or text.startswith("douyin.com/"): - text = f"https://{text}" - return text - - -def _looks_like_douyin_anti_bot_page(html: str) -> bool: - markers = ( - "window.byted_acrawler.init", - "__ac_signature", - "__ac_nonce", - "window.location.reload()" - ) - return any(marker in html for marker in markers) - - async def _fetch_html(url: str, cookie: str = "") -> tuple[str, str]: headers = { "User-Agent": DEFAULT_USER_AGENT, @@ -928,37 +678,6 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: CREATE INDEX IF NOT EXISTS idx_douyin_videos_account_aweme ON douyin_videos(account_id, aweme_id); - CREATE TABLE IF NOT EXISTS douyin_video_analyses ( - id TEXT PRIMARY KEY, - account_id TEXT NOT NULL, - user_id TEXT NOT NULL, - video_id TEXT NOT NULL, - report_id TEXT NOT NULL DEFAULT '', - model_profile_id TEXT NOT NULL DEFAULT '', - model_label TEXT NOT NULL DEFAULT '', - source_type TEXT NOT NULL DEFAULT 'top_score_auto', - status TEXT NOT NULL DEFAULT 'ok', - performance_score REAL NOT NULL DEFAULT 0, - commercial_score REAL NOT NULL DEFAULT 0, - hook_score REAL NOT NULL DEFAULT 0, - retention_score REAL NOT NULL DEFAULT 0, - conversion_score REAL NOT NULL DEFAULT 0, - summary_text TEXT NOT NULL DEFAULT '', - suggestion_text TEXT NOT NULL DEFAULT '', - parsed_json TEXT NOT NULL DEFAULT '{}', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL, - FOREIGN KEY(account_id) REFERENCES douyin_accounts(id) ON DELETE CASCADE, - FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE, - FOREIGN KEY(video_id) REFERENCES douyin_videos(id) ON DELETE CASCADE - ); - - CREATE INDEX IF NOT EXISTS idx_douyin_video_analyses_video_created - ON douyin_video_analyses(video_id, created_at DESC); - - CREATE INDEX IF NOT EXISTS idx_douyin_video_analyses_account_created - ON douyin_video_analyses(account_id, created_at DESC); - CREATE TABLE IF NOT EXISTS douyin_analysis_reports ( id TEXT PRIMARY KEY, account_id TEXT NOT NULL, @@ -1110,7 +829,7 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: return [profile_map[profile_id] for profile_id in requested_ids] async def _collect_public_profile(profile_url: str, manual_payload: dict[str, Any] | None) -> dict[str, Any]: - source_url = _normalize_profile_url_input(profile_url) + source_url = profile_url.strip() blobs: list[dict[str, Any]] = [] errors: list[str] = [] @@ -1121,18 +840,7 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: try: final_url, html = await _fetch_html(source_url) source_url = final_url - if not html.strip(): - errors.append("public_profile_empty_html") - elif _looks_like_douyin_anti_bot_page(html): - errors.append("public_profile_anti_bot_challenge") - elif not blobs: - blobs.extend(_extract_json_blobs_from_html(html)) - if not blobs: - errors.append("public_profile_no_json_blobs") - else: - blobs.extend(_extract_json_blobs_from_html(html)) - if not blobs: - errors.append("public_profile_no_json_blobs") + blobs.extend(_extract_json_blobs_from_html(html)) except Exception as exc: errors.append(f"public_profile_fetch_failed: {exc}") @@ -1142,10 +850,6 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: fallback_url=source_url ) videos = _extract_videos(payloads) - if source_url and not profile.get("nickname") and not videos and not errors: - if not blobs: - errors.append("public_profile_no_json_blobs") - errors.append("public_profile_no_candidates") return { "profile": profile, "videos": videos, @@ -1292,12 +996,11 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: snapshot_type: str, source_url: str, payload: Any, - summary: dict[str, Any], - index_fields: bool = True + summary: dict[str, Any] ) -> str: snapshot_id = make_id("dysnap") collected_at = now() - fields = _flatten_json(payload) if index_fields else [] + fields = _flatten_json(payload) legacy.db.execute( """ INSERT INTO douyin_account_snapshots ( @@ -1349,8 +1052,7 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "video_count": len(public_data["videos"]), "nickname": public_data["profile"].get("nickname", ""), "tags": public_data["profile"].get("tags", []) - }, - index_fields=not sync_request.compact_response + } ) for page in creator_data["pages"]: @@ -1365,9 +1067,8 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: payload, { "blob_count": len(page["blobs"]), - "field_count": 0 if sync_request.compact_response else len(_flatten_json(payload)) - }, - index_fields=not sync_request.compact_response + "field_count": len(_flatten_json(payload)) + } ) for manual_video in sync_request.manual_work_payloads: @@ -1450,8 +1151,6 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: ) payloads: list[dict[str, Any]] = [] for row in rows: - raw_payload = _safe_json_loads(row["raw_json"], {}) - normalized = _normalize_video_candidate(raw_payload) if isinstance(raw_payload, dict) and raw_payload else {} payloads.append({ "id": row["id"], "aweme_id": row["aweme_id"], @@ -1463,212 +1162,10 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "published_at": row["published_at"], "tags": _safe_json_loads(row["tags_json"], []), "stats": _safe_json_loads(row["stats_json"], {}), - "content_type": normalized.get("content_type", "video"), - "content_type_label": normalized.get("content_type_label", "视频"), - "image_count": int(normalized.get("image_count") or 0), - "raw": raw_payload + "raw": _safe_json_loads(row["raw_json"], {}) }) return payloads - def _latest_video_analysis_map(account_id: str) -> dict[str, dict[str, Any]]: - rows = legacy.db.fetch_all( - """ - SELECT analysis.* - FROM douyin_video_analyses analysis - INNER JOIN ( - SELECT video_id, MAX(created_at) AS latest_created_at - FROM douyin_video_analyses - WHERE account_id = ? - GROUP BY video_id - ) latest - ON latest.video_id = analysis.video_id - AND latest.latest_created_at = analysis.created_at - WHERE analysis.account_id = ? - """, - (account_id, account_id) - ) - payloads: dict[str, dict[str, Any]] = {} - for row in rows: - parsed = _safe_json_loads(row["parsed_json"], {}) - payloads[row["video_id"]] = { - "id": row["id"], - "video_id": row["video_id"], - "report_id": row["report_id"], - "model_profile_id": row["model_profile_id"], - "model_label": row["model_label"], - "source_type": row["source_type"], - "status": row["status"], - "performance_score": float(row["performance_score"] or 0), - "commercial_score": float(row["commercial_score"] or 0), - "hook_score": float(row["hook_score"] or 0), - "retention_score": float(row["retention_score"] or 0), - "conversion_score": float(row["conversion_score"] or 0), - "summary_text": row["summary_text"], - "suggestion_text": row["suggestion_text"], - "parsed_json": parsed, - "created_at": row["created_at"], - "updated_at": row["updated_at"] - } - return payloads - - def _build_video_payload(video: dict[str, Any], latest_analysis: dict[str, Any] | None = None) -> dict[str, Any]: - score = _video_score_breakdown(video) - payload = { - "id": video["id"], - "aweme_id": video["aweme_id"], - "title": video["title"], - "description": video["description"], - "share_url": video["share_url"], - "cover_url": video["cover_url"], - "duration_sec": video["duration_sec"], - "published_at": video["published_at"], - "tags": video["tags"], - "content_type": video.get("content_type", "video"), - "content_type_label": video.get("content_type_label", "视频"), - "image_count": int(video.get("image_count") or 0), - "stats": video["stats"], - "score": score - } - if latest_analysis: - payload["latest_analysis"] = latest_analysis - return payload - - def _video_sort_key(video: dict[str, Any], sort_by: str) -> tuple[Any, ...]: - if sort_by in {"popular", "popularity"}: - return ( - float(video.get("score", {}).get("popularity_score") or 0), - float(video.get("score", {}).get("performance_score") or 0), - float(video.get("score", {}).get("commercial_score") or 0) - ) - if sort_by == "latest": - return ( - _parse_iso_datetime(video.get("published_at")) or datetime.fromtimestamp(0, tz=timezone.utc), - video.get("score", {}).get("performance_score", 0) - ) - if sort_by == "commercial": - return ( - float(video.get("score", {}).get("commercial_score") or 0), - float(video.get("score", {}).get("performance_score") or 0) - ) - if sort_by == "play": - return ( - float((video.get("stats") or {}).get("play") or 0), - float(video.get("score", {}).get("performance_score") or 0) - ) - if sort_by == "like": - return ( - float((video.get("stats") or {}).get("like") or 0), - float(video.get("score", {}).get("performance_score") or 0) - ) - if sort_by == "share": - return ( - float((video.get("stats") or {}).get("share") or 0), - float(video.get("score", {}).get("performance_score") or 0) - ) - if sort_by == "comment": - return ( - float((video.get("stats") or {}).get("comment") or 0), - float(video.get("score", {}).get("performance_score") or 0) - ) - return ( - float(video.get("score", {}).get("performance_score") or 0), - float(video.get("score", {}).get("commercial_score") or 0) - ) - - def _build_video_workspace_payload( - account_row: dict[str, Any], - limit: int = 60 - ) -> dict[str, Any]: - raw_videos = _list_videos(account_row["id"], limit=max(limit, 24)) - latest_analysis_map = _latest_video_analysis_map(account_row["id"]) - videos = [ - _build_video_payload(video, latest_analysis_map.get(video["id"])) - for video in raw_videos - ] - videos_by_score = sorted(videos, key=lambda item: _video_sort_key(item, "score"), reverse=True) - videos_by_latest = sorted(videos, key=lambda item: _video_sort_key(item, "latest"), reverse=True) - high_score_threshold = 60.0 - high_score_videos = [video for video in videos_by_score if float(video["score"]["performance_score"]) >= high_score_threshold] - analyzed_count = sum(1 for video in videos if video.get("latest_analysis")) - video_only_count = sum(1 for video in videos if video.get("content_type") == "video") - image_text_count = sum(1 for video in videos if video.get("content_type") == "image_text") - return { - "items": videos, - "top_scored_video_ids": [video["id"] for video in videos_by_score[: min(12, len(videos_by_score))]], - "latest_video_ids": [video["id"] for video in videos_by_latest[: min(12, len(videos_by_latest))]], - "high_score_threshold": high_score_threshold, - "meta": { - "total_count": len(videos), - "analyzed_count": analyzed_count, - "high_score_count": len(high_score_videos), - "video_count": video_only_count, - "image_text_count": image_text_count - } - } - - def _finalize_sync_workspace( - owner: dict[str, Any], - request: DouyinAccountSyncRequest, - public_data: dict[str, Any], - creator_data: dict[str, Any] - ) -> dict[str, Any]: - creator_payloads = _extract_creator_payloads(creator_data) - if creator_payloads: - creator_profile = _pick_best_profile( - [candidate for payload in creator_payloads for candidate in _extract_profile_candidates(payload)] - ) - creator_videos = _extract_videos(creator_payloads) - creator_identity_match = _profiles_appear_same(public_data["profile"], creator_profile) - should_merge_creator = creator_identity_match or request.allow_creator_center_profile_fallback - if should_merge_creator: - if creator_profile.get("nickname"): - public_data["profile"] = _merge_profile_payload(public_data["profile"], creator_profile) - if not public_data["source_url"]: - public_data["source_url"] = creator_profile.get("canonical_profile_url") or request.profile_url - if request.allow_creator_center_profile_fallback and not creator_identity_match: - public_data["errors"].append("creator_center_profile_fallback_used") - elif public_data["profile"].get("nickname") != creator_profile.get("nickname"): - public_data["errors"].append("creator_center_profile_merge_partial") - public_data["videos"].extend(creator_videos) - elif creator_profile.get("nickname") or creator_videos: - public_data["errors"].append("creator_center_identity_mismatch_skipped") - if not public_data["profile"].get("nickname") and not public_data["videos"]: - message = "No Douyin profile or creator-center data could be extracted" - if "creator_center_identity_mismatch_skipped" in public_data["errors"]: - message = "Creator-center capture belongs to a different logged-in Douyin account; automatic merge was skipped" - raise HTTPException( - status_code=400, - detail={ - "message": message, - "profile_url": request.profile_url, - "resolved_profile_url": public_data["source_url"], - "public_blob_count": len(public_data["raw_pages"]), - "public_video_count": len(public_data["videos"]), - "public_errors": public_data["errors"], - "creator_page_count": len(creator_data["pages"]), - "creator_errors": creator_data["errors"] - } - ) - account_row = _upsert_account(owner, public_data["profile"], request, public_data, creator_data) - sync_errors = public_data["errors"] + creator_data["errors"] - if request.compact_response: - return { - "account": { - "id": account_row["id"], - "nickname": account_row["nickname"], - "profile_url": account_row["profile_url"], - "douyin_id": account_row["douyin_id"], - "sec_uid": account_row["sec_uid"], - "sync_status": account_row["sync_status"] - }, - "sync_errors": sync_errors, - "public_video_count": len(public_data["videos"]), - "creator_page_count": len(creator_data["pages"]) - } - workspace = _build_workspace_payload(account_row) - workspace["sync_errors"] = sync_errors - return workspace - def _build_account_payload(account_row: dict[str, Any], include_recent_videos: int = 8) -> dict[str, Any]: videos = _list_videos(account_row["id"], limit=max(include_recent_videos, 12)) tags = _safe_json_loads(account_row["tags_json"], []) @@ -1695,6 +1192,60 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "video_summary": video_summary } + def _video_content_type(video: dict[str, Any]) -> str: + raw = video.get("raw") if isinstance(video.get("raw"), dict) else {} + if raw.get("images") or raw.get("image_infos") or raw.get("is_multi_content"): + return "image_text" + return "video" + + def _video_performance_score(video: dict[str, Any]) -> float: + stats = video.get("stats") if isinstance(video.get("stats"), dict) else {} + play = float(stats.get("play") or 0) + like = float(stats.get("like") or 0) + comment = float(stats.get("comment") or 0) + share = float(stats.get("share") or 0) + collect = float(stats.get("collect") or 0) + score = ( + min(play / 10000.0, 6.0) * 8.0 + + min(like / 1000.0, 6.0) * 7.0 + + min(comment / 200.0, 6.0) * 4.0 + + min(share / 100.0, 6.0) * 4.0 + + min(collect / 100.0, 6.0) * 3.0 + ) + return round(min(100.0, score), 1) + + def _workspace_video_payload(video: dict[str, Any]) -> dict[str, Any]: + tags = video.get("tags") if isinstance(video.get("tags"), list) else [] + return { + "id": video.get("id") or video.get("aweme_id") or "", + "aweme_id": video.get("aweme_id") or "", + "title": video.get("title") or video.get("description") or "未命名作品", + "description": video.get("description") or video.get("title") or "", + "share_url": video.get("share_url") or "", + "cover_url": video.get("cover_url") or "", + "duration_sec": video.get("duration_sec") or 0, + "published_at": video.get("published_at") or "", + "tags": tags, + "stats": video.get("stats") if isinstance(video.get("stats"), dict) else {}, + "content_type": _video_content_type(video), + "score": { + "performance_score": _video_performance_score(video) + } + } + + def _video_sort_key(video: dict[str, Any], sort_by: str) -> tuple[Any, ...]: + stats = video.get("stats") if isinstance(video.get("stats"), dict) else {} + normalized = (sort_by or "score").strip().lower() + if normalized == "latest": + return (video.get("published_at") or "", video.get("id") or "") + if normalized == "play": + return (float(stats.get("play") or 0), video.get("published_at") or "") + if normalized == "like": + return (float(stats.get("like") or 0), video.get("published_at") or "") + if normalized == "comment": + return (float(stats.get("comment") or 0), video.get("published_at") or "") + return (float(video.get("score", {}).get("performance_score") or 0), video.get("published_at") or "") + def _list_linked_accounts(account_row: dict[str, Any]) -> list[dict[str, Any]]: relation_rows = legacy.db.fetch_all( """ @@ -1736,6 +1287,19 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: raise HTTPException(status_code=404, detail="Assistant not found") return row + def _parse_iso_datetime(value: str | None) -> datetime | None: + text = str(value or "").strip() + if not text: + return None + try: + normalized = text.replace("Z", "+00:00") + parsed = datetime.fromisoformat(normalized) + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=timezone.utc) + return parsed.astimezone(timezone.utc) + except Exception: + return None + def _get_tracking_cursor(user_id: str) -> dict[str, Any] | None: return legacy.db.fetch_one( "SELECT * FROM douyin_tracking_cursors WHERE user_id = ?", @@ -1787,37 +1351,23 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: return payloads def _extract_tracking_borrowing_points(video: dict[str, Any]) -> list[str]: - latest_analysis = (video.get("latest_analysis") or {}).get("parsed_json") or {} - candidates: list[str] = [] - - def _collect(value: Any) -> None: - if isinstance(value, list): - for item in value: - if isinstance(item, str) and item.strip(): - candidates.append(item.strip()) - elif isinstance(item, dict): - for inner in item.values(): - if isinstance(inner, str) and inner.strip(): - candidates.append(inner.strip()) - elif isinstance(value, str) and value.strip(): - candidates.append(value.strip()) - - for key in ("winning_patterns", "replicate_plan", "hook_patterns", "content_engine", "offer_directions", "next_actions"): - _collect(latest_analysis.get(key)) - - score = video.get("score", {}) or {} stats = video.get("stats", {}) or {} - if float(score.get("hook_score") or 0) >= 70: - candidates.append("开头抓人,适合借前三秒强结论或反常识开场。") - if float(score.get("commercial_score") or 0) >= 65: - candidates.append("转化信号较强,可拆成交句式和行动指令。") - if float(score.get("performance_score") or 0) >= 70: - candidates.append("整体表现高,值得提炼成可复用栏目模板。") - if float(stats.get("comment") or 0) >= 100: - candidates.append("评论互动明显,适合提炼争议点或提问句。") - if str(video.get("content_type") or "") == "image_text": - candidates.append("图文结构清晰,可借分段标题和卡片式表达。") - + tags = video.get("tags", []) or [] + candidates: list[str] = [] + play_count = int(stats.get("play") or 0) + like_count = int(stats.get("like") or 0) + comment_count = int(stats.get("comment") or 0) + share_count = int(stats.get("share") or 0) + if like_count >= 100: + candidates.append("点赞明显更高,适合借标题切口和开头表达。") + if comment_count >= 20: + candidates.append("评论互动活跃,可借提问句和争议点设计。") + if share_count >= 10: + candidates.append("分享意愿较强,可借观点浓度和传播句式。") + if play_count >= 5000: + candidates.append("播放信号较强,值得拆成同题材复用模板。") + if tags: + candidates.append(f"标签集中在 {', '.join(tags[:3])},适合做系列化选题。") deduped: list[str] = [] seen: set[str] = set() for item in candidates: @@ -1829,16 +1379,10 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: return deduped[:4] def _build_tracking_digest_item(tracked_item: dict[str, Any], video: dict[str, Any]) -> dict[str, Any]: - latest_analysis = video.get("latest_analysis") or {} - summary = ( - (latest_analysis.get("parsed_json") or {}).get("executive_summary") - or latest_analysis.get("summary_text") - or latest_analysis.get("suggestion_text") - or video.get("description") - or video.get("title") - or "暂无摘要" - ) + stats = video.get("stats", {}) or {} + summary = video.get("description") or video.get("title") or "暂无摘要" borrowing_points = _extract_tracking_borrowing_points(video) + high_value = int(stats.get("like") or 0) >= 100 or int(stats.get("play") or 0) >= 5000 or bool(borrowing_points) return { "tracking_id": tracked_item["id"], "tracked_account_id": tracked_item["tracked_account_id"], @@ -1848,7 +1392,7 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "video": video, "summary": _compact_text(summary, 160), "borrowing_points": borrowing_points, - "is_high_value": float((video.get("score") or {}).get("performance_score") or 0) >= 70 or bool(borrowing_points), + "is_high_value": high_value, } def _build_tracking_digest(user_id: str, since_value: str = "", limit: int = 24) -> dict[str, Any]: @@ -1859,22 +1403,16 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: since_dt = _parse_iso_datetime(cursor.get("last_seen_at")) if since_dt is None: since_dt = (datetime.now(timezone.utc) - timedelta(days=3)).replace(microsecond=0) - items: list[dict[str, Any]] = [] for tracked in tracked_accounts: - account_row = _require_owned_account(tracked["tracked_account_id"], user_id) - workspace = _build_video_workspace_payload(account_row, limit=36) - for video in workspace.get("items", []): + account_payload = tracked.get("account", {}) or {} + for video in account_payload.get("video_summary", {}).get("videos", []): published_at = _parse_iso_datetime(video.get("published_at")) if published_at is None or published_at <= since_dt: continue items.append(_build_tracking_digest_item(tracked, video)) - items.sort( - key=lambda item: ( - _parse_iso_datetime(item["video"].get("published_at")) or datetime.fromtimestamp(0, tz=timezone.utc), - float((item["video"].get("score") or {}).get("performance_score") or 0) - ), + key=lambda item: _parse_iso_datetime(item["video"].get("published_at")) or datetime.fromtimestamp(0, tz=timezone.utc), reverse=True ) return { @@ -1899,143 +1437,31 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: raise HTTPException(status_code=400, detail="Tracked account has no profile_url to refresh") request = DouyinAccountSyncRequest( profile_url=profile_url, - compact_response=True, discovery_note=discovery_note ) public_data = await _collect_public_profile(profile_url, None) - creator_data = {"pages": [], "errors": []} - return await run_in_threadpool( - _finalize_sync_workspace, - owner, - request, - public_data, - creator_data - ) - - def _normalize_report_text(value: Any) -> str: - text = str(value or "").strip() - if not text: - return "" - return re.sub(r"\s+", " ", text) - - def _build_report_payload(report: dict[str, Any]) -> dict[str, Any]: - suggestions = legacy.db.fetch_all( - "SELECT * FROM douyin_analysis_suggestions WHERE report_id = ? ORDER BY created_at ASC", - (report["id"],) - ) - return { - "id": report["id"], - "focus_text": report["focus_text"], - "model_profile_ids": _safe_json_loads(report["model_profile_ids_json"], []), - "linked_account_ids": _safe_json_loads(report["linked_account_ids_json"], []), - "created_at": report["created_at"], - "duplicate_count": 1, - "duplicate_report_ids": [], - "suggestions": [ - { - "id": suggestion["id"], - "model_profile_id": suggestion["model_profile_id"], - "model_label": suggestion["model_label"], - "status": suggestion["status"], - "suggestion_text": suggestion["suggestion_text"], - "parsed_json": _safe_json_loads(suggestion["parsed_json"], {}) - } - for suggestion in suggestions - ] - } - - def _report_signature(report_payload: dict[str, Any]) -> str: - parts = [_normalize_report_text(report_payload.get("focus_text"))] - for suggestion in report_payload.get("suggestions", []): - parsed = suggestion.get("parsed_json") or {} - if isinstance(parsed, dict) and parsed: - normalized_content = json.dumps(parsed, ensure_ascii=False, sort_keys=True) - else: - normalized_content = _normalize_report_text(suggestion.get("suggestion_text")) - parts.append( - "|".join( - [ - suggestion.get("model_profile_id", ""), - suggestion.get("status", ""), - normalized_content - ] - ) + creator_data = await _collect_creator_center_pages([], "", []) + if not public_data.get("profile", {}).get("canonical_profile_url"): + public_data["profile"]["canonical_profile_url"] = profile_url + if public_data["errors"]: + raise HTTPException( + status_code=502, + detail={ + "message": "刷新对标账号失败", + "public_errors": public_data["errors"], + "creator_errors": creator_data["errors"], + }, ) - return "\n".join(parts) - - def _list_report_payloads(account_id: str, limit: int = 5, dedupe: bool = True) -> list[dict[str, Any]]: - rows = legacy.db.fetch_all( - """ - SELECT * - FROM douyin_analysis_reports - WHERE account_id = ? - ORDER BY created_at DESC - LIMIT ? - """, - (account_id, max(limit * 4, 20)) - ) - payloads = [_build_report_payload(report) for report in rows] - if not dedupe: - return payloads[:limit] - - unique_payloads: list[dict[str, Any]] = [] - seen: dict[str, dict[str, Any]] = {} - for payload in payloads: - signature = _report_signature(payload) - if signature in seen: - seen_payload = seen[signature] - seen_payload["duplicate_count"] = int(seen_payload.get("duplicate_count") or 1) + 1 - seen_payload.setdefault("duplicate_report_ids", []).append(payload["id"]) - continue - seen[signature] = payload - unique_payloads.append(payload) - focus_filtered: list[dict[str, Any]] = [] - focus_seen: dict[str, dict[str, Any]] = {} - for payload in unique_payloads: - focus_key = _normalize_report_text(payload.get("focus_text") or "__default__") - if focus_key in focus_seen: - seen_payload = focus_seen[focus_key] - seen_payload["duplicate_count"] = int(seen_payload.get("duplicate_count") or 1) + 1 - seen_payload.setdefault("duplicate_report_ids", []).append(payload["id"]) - continue - focus_seen[focus_key] = payload - focus_filtered.append(payload) - return focus_filtered[:limit] - - def _delete_report(report_id: str) -> None: - legacy.db.execute("DELETE FROM douyin_analysis_suggestions WHERE report_id = ?", (report_id,)) - legacy.db.execute("DELETE FROM douyin_analysis_reports WHERE id = ?", (report_id,)) - - def _find_duplicate_report_payload( - account_id: str, - focus_text: str, - suggestion_payloads: list[dict[str, Any]], - exclude_report_id: str = "" - ) -> dict[str, Any] | None: - candidate_rows = legacy.db.fetch_all( - """ - SELECT * - FROM douyin_analysis_reports - WHERE account_id = ? AND focus_text = ? AND id != ? - ORDER BY created_at DESC - LIMIT 10 - """, - (account_id, focus_text, exclude_report_id) - ) - probe_payload = { - "focus_text": focus_text, - "suggestions": suggestion_payloads + refreshed_account = _upsert_account(owner, public_data["profile"], request, public_data, creator_data) + return { + "account": _build_account_payload(refreshed_account, include_recent_videos=6), + "sync_errors": public_data["errors"] + creator_data["errors"], + "public_video_count": len(public_data.get("videos", [])), + "creator_page_count": len(creator_data.get("pages", [])), } - probe_signature = _report_signature(probe_payload) - for row in candidate_rows: - candidate_payload = _build_report_payload(row) - if _report_signature(candidate_payload) == probe_signature: - return candidate_payload - return None def _build_workspace_payload(account_row: dict[str, Any]) -> dict[str, Any]: account_payload = _build_account_payload(account_row) - video_workspace = _build_video_workspace_payload(account_row) latest_public_snapshot = legacy.db.fetch_one( """ SELECT * @@ -2056,7 +1482,40 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: """, (account_row["id"],) ) - report_payloads = _list_report_payloads(account_row["id"], limit=5, dedupe=True) + reports = legacy.db.fetch_all( + """ + SELECT * + FROM douyin_analysis_reports + WHERE account_id = ? + ORDER BY created_at DESC + LIMIT 5 + """, + (account_row["id"],) + ) + report_payloads = [] + for report in reports: + suggestions = legacy.db.fetch_all( + "SELECT * FROM douyin_analysis_suggestions WHERE report_id = ? ORDER BY created_at ASC", + (report["id"],) + ) + report_payloads.append({ + "id": report["id"], + "focus_text": report["focus_text"], + "model_profile_ids": _safe_json_loads(report["model_profile_ids_json"], []), + "linked_account_ids": _safe_json_loads(report["linked_account_ids_json"], []), + "created_at": report["created_at"], + "suggestions": [ + { + "id": suggestion["id"], + "model_profile_id": suggestion["model_profile_id"], + "model_label": suggestion["model_label"], + "status": suggestion["status"], + "suggestion_text": suggestion["suggestion_text"], + "parsed_json": _safe_json_loads(suggestion["parsed_json"], {}) + } + for suggestion in suggestions + ] + }) recent_searches = legacy.db.fetch_all( """ SELECT * @@ -2085,12 +1544,6 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: } if latest_creator_snapshot else None, "linked_accounts": _list_linked_accounts(account_row), "recent_reports": report_payloads, - "video_workspace": { - "top_scored_video_ids": video_workspace["top_scored_video_ids"], - "latest_video_ids": video_workspace["latest_video_ids"], - "high_score_threshold": video_workspace["high_score_threshold"], - "meta": video_workspace["meta"] - }, "recent_similarity_searches": [ { "id": row["id"], @@ -2166,421 +1619,12 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "fields": fields } - def _build_video_context_items( - video_workspace: dict[str, Any], - max_top_items: int = 6, - max_latest_items: int = 6 - ) -> dict[str, list[dict[str, Any]]]: - items = video_workspace.get("items", []) - item_map = {item["id"]: item for item in items} - top_items = [ - item_map[video_id] - for video_id in video_workspace.get("top_scored_video_ids", [])[:max_top_items] - if video_id in item_map - ] - latest_items = [ - item_map[video_id] - for video_id in video_workspace.get("latest_video_ids", [])[:max_latest_items] - if video_id in item_map - ] - - def _brief(video: dict[str, Any]) -> dict[str, Any]: - return { - "video_id": video["id"], - "aweme_id": video["aweme_id"], - "title": video["title"], - "description": video["description"], - "published_at": video["published_at"], - "tags": video["tags"][:6], - "stats": video["stats"], - "score": video["score"], - "latest_analysis": (video.get("latest_analysis") or {}).get("parsed_json") or {} - } - - return { - "top_performing_videos": [_brief(item) for item in top_items], - "latest_videos": [_brief(item) for item in latest_items] - } - - def _bounded_score(value: Any, fallback: float = 0.0) -> float: - parsed = _parse_count(value) - if parsed <= 0 and value not in (0, "0", 0.0): - parsed = fallback - return round(max(0.0, min(100.0, parsed or fallback)), 2) - - def _merge_structured_payload(fallback: Any, parsed: Any) -> Any: - if not isinstance(fallback, dict) or not isinstance(parsed, dict): - return parsed or fallback - merged: dict[str, Any] = {} - for key, fallback_value in fallback.items(): - if key not in parsed: - merged[key] = fallback_value - continue - parsed_value = parsed[key] - if isinstance(fallback_value, dict) and isinstance(parsed_value, dict): - merged[key] = _merge_structured_payload(fallback_value, parsed_value) - else: - merged[key] = parsed_value if parsed_value not in (None, "", [], {}) else fallback_value - for key, parsed_value in parsed.items(): - if key not in merged: - merged[key] = parsed_value - return merged - - def _infer_offer_directions(keywords: list[str]) -> list[str]: - normalized = {item.lower() for item in keywords} - offers: list[str] = [] - if {"创业", "成交", "获客"} & normalized: - offers.append("创业获客咨询、成交训练营或老板 IP 陪跑") - if {"文案", "短视频文案", "口播", "二创"} & normalized: - offers.append("短视频文案模板包、脚本代写或内容陪跑服务") - if {"教育", "教育规划"} & normalized: - offers.append("教育规划咨询、升学产品或高客单咨询服务") - if {"后期", "剪辑", "产品"} & normalized: - offers.append("剪辑优化、内容包装或产品策划服务") - if not offers: - offers.append("内容咨询、账号诊断和主题训练营") - offers.append("以高分作品为样板的复刻栏目和线索承接页") - return _dedupe_strings(offers)[:4] - - def _build_video_analysis_fallback(account_payload: dict[str, Any], video: dict[str, Any]) -> dict[str, Any]: - score = video["score"] - tags = video.get("tags") or [] - keywords = _extract_keywords(video.get("title", ""), video.get("description", "")) - hook_patterns: list[str] = [] - title = video.get("title", "") - if any(token in title for token in ("怎么", "如何", "为什么")): - hook_patterns.append("问题解决型开场,先给结果再给方法") - if any(token in title for token in ("坑", "误区", "别", "不要")): - hook_patterns.append("避坑警示型开场,容易拉停留和评论") - if re.search(r"\d", title): - hook_patterns.append("数字型表达能快速建立信息密度和预期") - if not hook_patterns: - hook_patterns.append("强结论或冲突判断先出,适合 3 秒内抢注意力") - - structure_patterns = [ - "开头给结论或反常识观点,中段拆 2-3 个要点,结尾给执行动作", - "用具体场景或常见错误承接,降低理解门槛", - "把方法论压缩成可收藏的清单,利于后续转化" - ] - commercial_judgement = ( - "这条内容适合做高意向线索承接,优先放在咨询、训练营或模板产品前链路。" - if score["commercial_score"] >= 70 - else "这条内容更适合作为流量内容,用来放大覆盖,再通过评论区和私信承接。" - ) - operator_actions = [ - "把标题里的核心钩子沉淀成 3-5 个固定开场模板,持续复用", - "在评论区补一个可执行清单,测试评论区转化和私信承接", - "围绕同主题连续发 3 条变体,验证题材是否可规模化" - ] - if score["collect_rate"] >= 0.008: - operator_actions.append("把这条内容延展成可下载资料或收藏型产品,提高转化效率") - if score["share_rate"] >= 0.01: - operator_actions.append("把传播点提炼成系列化选题,优先投放同类话题") - - return { - "headline_summary": f"《{_compact_text(title, 30)}》属于高可复制内容,核心价值在于{score['signals'][0]}。", - "hook_breakdown": hook_patterns[:3], - "structure_breakdown": structure_patterns, - "commercial_angle": { - "score": score["commercial_score"], - "judgement": commercial_judgement, - "suitable_for": _infer_offer_directions(account_payload.get("keywords", []))[:3] - }, - "replication_plan": [ - f"围绕 {tags[0] if tags else '当前主题'} 再做 3 条不同人群切口", - "保持同类开头结构,但替换成更具体的场景和结果承诺", - "在结尾加入明确的下一步动作,承接评论、私信或表单" - ], - "operator_actions": _dedupe_strings(operator_actions)[:5], - "risk_notes": [ - "如果后续复刻只保留题材、不保留强钩子,数据会明显回落", - "如果评论区没有承接动作,商业化价值会停留在播放层" - ], - "scores": { - "hook": min(100.0, round(score["performance_score"] * 0.92 + 4, 2)), - "retention": min(100.0, round(score["performance_score"] * 0.88 + 6, 2)), - "conversion": min(100.0, round(score["commercial_score"] * 0.93 + 3, 2)), - "commercial": score["commercial_score"] - }, - "raw_keywords": keywords[:8] - } - - def _build_account_analysis_fallback( - target_payload: dict[str, Any], - benchmark_payloads: list[dict[str, Any]], - analysis_context: dict[str, Any] - ) -> dict[str, Any]: - video_workspace = analysis_context.get("video_workspace", {}) - top_videos = video_workspace.get("top_performing_videos", []) - latest_videos = video_workspace.get("latest_videos", []) - keywords = _dedupe_strings( - list(target_payload.get("keywords", [])) - + list(target_payload.get("tags", [])) - + list(target_payload.get("video_summary", {}).get("top_tags", [])) - ) - avg_top_score = round( - sum(float(item.get("score", {}).get("performance_score") or 0) for item in top_videos) / max(len(top_videos), 1), - 2 - ) - avg_latest_score = round( - sum(float(item.get("score", {}).get("performance_score") or 0) for item in latest_videos) / max(len(latest_videos), 1), - 2 - ) - monetization_score = round( - min( - 100.0, - avg_top_score * 0.55 - + float(target_payload.get("video_summary", {}).get("avg_share") or 0) / 120.0 - + float(target_payload.get("video_summary", {}).get("avg_comment") or 0) / 80.0 - ), - 2 - ) - - audience = "想提升短视频获客效率、内容转化和账号定位的创业者与内容运营者" - if {"教育", "教育规划"} & {item.lower() for item in keywords}: - audience = "关注教育规划、升学决策和信息差机会的人群" - core_promise = ( - f"用 {_compact_text(target_payload.get('video_summary', {}).get('top_tags', ['内容方法'])[0], 10)} 相关主题," - "快速给用户一个能立刻套用的内容方法或判断。" - ) - hook_patterns = [] - titles = [item.get("title", "") for item in top_videos[:5]] - if any(re.search(r"\d", title) for title in titles): - hook_patterns.append("数字型开头,直接降低理解成本") - if any(any(token in title for token in ("怎么", "如何", "为什么")) for title in titles): - hook_patterns.append("问题解决型开头,先抛问题再给答案") - if any(any(token in title for token in ("坑", "误区", "别", "不要")) for title in titles): - hook_patterns.append("避坑警示型开头,容易拉停留和讨论") - hook_patterns = _dedupe_strings(hook_patterns + ["强结论先行,适合 3 秒内抢注意力"])[:4] - - winning_patterns = [] - for video in top_videos[:4]: - winning_patterns.append({ - "video_title": video.get("title", ""), - "score": video.get("score", {}).get("performance_score", 0), - "why": "高分原因主要来自 " + "、".join(video.get("score", {}).get("signals", [])[:2]), - "replication_angle": "保留原题材与开头结构,再改写为更具体的人群场景和结果承诺" - }) - - latest_signal = [] - for video in latest_videos[:4]: - signal = "近期内容仍在有效窗口期" - if float(video.get("score", {}).get("performance_score") or 0) + 8 < avg_top_score: - signal = "最近作品热度弱于历史高分样本,需要回到已验证题材" - latest_signal.append({ - "video_title": video.get("title", ""), - "signal": signal, - "action": "优先做同题材复刻、加强开头结论和结尾承接动作" - }) - - benchmark_insights = [ - f"对标账号 {payload.get('nickname', '未命名账号')} 可借鉴其 {', '.join(payload.get('video_summary', {}).get('top_tags', [])[:3]) or '选题聚焦'},但不要直接照搬口吻。" - for payload in benchmark_payloads[:3] - ] or [ - "当前可用对标账号较少,建议优先围绕高分作品题材扩充对标池。" - ] - - operational_gaps = [ - "高分内容和最近内容之间如果存在明显分差,说明选题复盘还没有形成固定机制", - "如果收藏和评论信号强,但页面承接动作弱,商业化效率会被浪费", - "账号标签较散时,用户对你卖什么、能解决什么问题的认知会不够集中" - ] - if avg_latest_score >= avg_top_score - 5: - operational_gaps[0] = "最近内容与高分内容差距不大,可以开始标准化选题库和周更节奏" - - return { - "executive_summary": ( - f"这个账号当前最值得放大的内容方向是 {', '.join(target_payload.get('video_summary', {}).get('top_tags', [])[:3]) or '已验证高分题材'}。" - f"高分作品平均得分 {avg_top_score},最近作品平均得分 {avg_latest_score}," - "已经具备做商业化内容矩阵和固定转化链路的基础。" - ), - "commercial_positioning": { - "audience": audience, - "core_promise": core_promise, - "monetization_readiness_score": monetization_score, - "offer_directions": _infer_offer_directions(keywords) - }, - "content_engine": { - "pillars": target_payload.get("video_summary", {}).get("top_tags", [])[:6], - "hook_patterns": hook_patterns, - "structure_patterns": [ - "开头先给结论或冲突点,中段拆 2-3 个关键动作,结尾给明确下一步", - "围绕用户熟悉的问题场景切入,降低完播门槛", - "把方法论做成可收藏的清单,提高后续转化机会" - ], - "cta_patterns": [ - "高收藏内容结尾引导先收藏再执行", - "高评论内容结尾抛反问,引导评论区互动", - "高分享内容结尾补一句适合谁转发给谁,放大自然传播" - ] - }, - "winning_patterns": winning_patterns, - "latest_signal": latest_signal, - "benchmark_insights": benchmark_insights, - "monetization_plan": [ - "把高分题材拆成免费内容、低门槛产品和咨询服务三级承接", - "优先围绕高收藏内容制作模板、清单或训练营资料", - "让评论区和私信都指向同一个明确转化动作,避免流量浪费" - ], - "operational_gaps": operational_gaps, - "next_30_day_actions": [ - "每周固定复刻 2 条高分题材,再测试 1 条新角度", - "给高分作品统一补评论区承接话术和私信关键词", - "每周复盘高分榜和最新榜,保留题材、更新场景和切口", - "把表现最稳的 3 条内容做成系列化栏目" - ], - "risk_watchlist": [ - "题材过多会稀释账号定位,影响成交效率", - "如果只追求播放而不设计承接动作,商业化会停留在表层流量", - "复制高分标题但不复制结构和场景,容易出现数据回落" - ] - } - - async def _run_top_video_analyses( - account_row: dict[str, Any], - owner: dict[str, Any], - profile: dict[str, Any], - *, - top_video_count: int = 6, - min_score: float = 45.0, - report_id: str = "", - source_type: str = "top_score_auto", - temperature: float = 0.25 - ) -> list[dict[str, Any]]: - video_workspace = _build_video_workspace_payload(account_row, limit=max(top_video_count * 3, 24)) - item_map = {item["id"]: item for item in video_workspace["items"]} - ranked_videos = [ - item_map[video_id] - for video_id in video_workspace["top_scored_video_ids"] - if video_id in item_map and float(item_map[video_id]["score"]["performance_score"]) >= float(min_score) - ][: max(1, min(top_video_count, 12))] - - if not ranked_videos: - return [] - - account_payload = _build_account_payload(account_row, include_recent_videos=8) - system_prompt = ( - "你是商业化短视频拆解顾问。你要针对单条作品给出可用于商业化运营的复盘。" - "请返回严格 JSON 对象,字段必须包含:headline_summary、hook_breakdown、" - "structure_breakdown、commercial_angle、replication_plan、operator_actions、" - "risk_notes、scores。scores 里必须包含 hook、retention、conversion、commercial,范围 0-100。" - ) - - async def _analyze_video(video: dict[str, Any]) -> dict[str, Any]: - prompt_context = { - "account": { - "id": account_payload["id"], - "nickname": account_payload["nickname"], - "signature": account_payload["signature"], - "tags": account_payload["tags"][:12] - }, - "video": { - "id": video["id"], - "aweme_id": video["aweme_id"], - "title": video["title"], - "description": video["description"], - "published_at": video["published_at"], - "tags": video["tags"], - "stats": video["stats"], - "score": video["score"] - } - } - user_prompt = ( - "请从商业化运营视角拆解这条作品。重点回答:为什么这条作品值得关注、" - "适合承接什么产品/服务、下一步怎么复刻、运营动作怎么排。" - f"\n\n输入上下文:\n{json.dumps(prompt_context, ensure_ascii=False, indent=2)}" - ) - try: - output = await legacy.call_model( - profile, - system_prompt=system_prompt, - user_prompt=user_prompt, - temperature=temperature - ) - parsed = _try_parse_agent_json(output) - status = "ok" - except Exception as exc: - output = str(exc) - parsed = {} - status = "error" - - score = video["score"] - if not isinstance(parsed, dict): - parsed = {} - parsed = _merge_structured_payload(_build_video_analysis_fallback(account_payload, video), parsed) - parsed_scores = parsed.get("scores", {}) if isinstance(parsed, dict) else {} - analysis_id = make_id("dyva") - summary_text = _first_non_empty( - (parsed.get("headline_summary") if isinstance(parsed, dict) else ""), - (parsed.get("summary") if isinstance(parsed, dict) else ""), - (parsed.get("commercial_angle", {}) or {}).get("judgement") if isinstance(parsed, dict) else "", - output - ) - hook_score = _bounded_score(parsed_scores.get("hook"), fallback=score["performance_score"]) - retention_score = _bounded_score(parsed_scores.get("retention"), fallback=score["performance_score"]) - conversion_score = _bounded_score(parsed_scores.get("conversion"), fallback=score["commercial_score"]) - commercial_score = _bounded_score(parsed_scores.get("commercial"), fallback=score["commercial_score"]) - created_at = now() - legacy.db.execute( - """ - INSERT INTO douyin_video_analyses ( - id, account_id, user_id, video_id, report_id, model_profile_id, model_label, - source_type, status, performance_score, commercial_score, hook_score, - retention_score, conversion_score, summary_text, suggestion_text, - parsed_json, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - analysis_id, - account_row["id"], - owner["id"], - video["id"], - report_id, - profile["id"], - _build_model_label(profile), - source_type, - status, - score["performance_score"], - commercial_score, - hook_score, - retention_score, - conversion_score, - _compact_text(summary_text, 240), - output if output.strip() else _safe_json_dumps(parsed), - _safe_json_dumps(parsed), - created_at, - created_at - ) - ) - return { - "id": analysis_id, - "video_id": video["id"], - "video_title": video["title"], - "status": status, - "summary_text": _compact_text(summary_text, 240), - "parsed_json": parsed, - "performance_score": score["performance_score"], - "commercial_score": commercial_score, - "hook_score": hook_score, - "retention_score": retention_score, - "conversion_score": conversion_score, - "created_at": created_at - } - - return await asyncio.gather(*[_analyze_video(video) for video in ranked_videos]) - async def _run_account_analysis( account_row: dict[str, Any], owner: dict[str, Any], request: DouyinAccountAnalysisRequest ) -> dict[str, Any]: target_payload = _build_account_payload(account_row, include_recent_videos=max(4, min(request.max_videos, 12))) - video_workspace = _build_video_workspace_payload(account_row, limit=max(request.max_videos * 3, 24)) - video_context = _build_video_context_items( - video_workspace, - max_top_items=min(max(request.max_videos, 4), 8), - max_latest_items=min(max(request.max_videos, 4), 8) - ) linked_rows = _list_linked_accounts(account_row) linked_account_ids = list(request.linked_account_ids) if request.include_linked_accounts: @@ -2625,25 +1669,15 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: profiles = _resolve_model_profiles(owner, request.model_profile_ids) system_prompt = ( - "你是资深抖音商业化增长顾问。你会基于账号画像、创作者中心字段、作品表现、" - "高分作品样本、最近更新信号和对标账号内容,给出可直接指导运营和商业化的结论。" - "请始终返回严格 JSON 对象,包含这些字段:executive_summary、commercial_positioning、" - "content_engine、winning_patterns、latest_signal、benchmark_insights、" - "monetization_plan、operational_gaps、next_30_day_actions、risk_watchlist。" - "commercial_positioning 必须是对象,至少包含 audience、core_promise、monetization_readiness_score、" - "offer_directions。content_engine 必须包含 pillars、hook_patterns、structure_patterns、cta_patterns。" - "winning_patterns、latest_signal、benchmark_insights、offer_directions、next_30_day_actions、" - "risk_watchlist 每个字段请给 3-6 条中文建议。" + "你是资深抖音增长顾问。你会基于账号画像、创作者中心字段、作品表现和对标账号内容," + "给出可执行的优化建议。请始终返回 JSON 对象,包含这些字段:" + "summary、strengths、weaknesses、benchmark_insights、content_plan、" + "growth_actions、deep_search_hypotheses。每个数组字段请给出 3-6 条中文建议。" ) analysis_context = { "target_account": target_payload, "benchmark_accounts": benchmark_payloads[:6], "focus": request.extra_focus, - "video_workspace": { - "high_score_threshold": video_workspace["high_score_threshold"], - "meta": video_workspace["meta"], - **video_context - }, "creator_center_snapshot_summary": _safe_json_loads( (legacy.db.fetch_one( """ @@ -2659,9 +1693,7 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: ) } user_prompt = ( - "请从商业化运营视角分析以下抖音账号。除了账号定位和内容打法," - "还要明确给出:什么内容最值得继续放大、什么内容已经过时、" - "适合承接什么类型的产品/服务、未来 30 天运营动作如何排优先级。" + "请分析以下抖音账号,并分别给出内容方向、选题结构、互动增长、账号定位和对标拆解建议。" "如果提供了对标账号,要重点指出可借鉴但不应直接照搬的部分。" f"\n\n输入上下文:\n{json.dumps(analysis_context, ensure_ascii=False, indent=2)}" ) @@ -2702,12 +1734,6 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: output = str(exc) parsed = {} status = "error" - if not isinstance(parsed, dict): - parsed = {} - parsed = _merge_structured_payload( - _build_account_analysis_fallback(target_payload, benchmark_payloads, analysis_context), - parsed - ) suggestion_id = make_id("dysady") legacy.db.execute( """ @@ -2722,7 +1748,7 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: profile["id"], _build_model_label(profile), status, - output if output.strip() else _safe_json_dumps(parsed), + output, _safe_json_dumps(parsed), now() ) @@ -2737,34 +1763,16 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: } suggestions = await asyncio.gather(*[_analyze_with_model(profile) for profile in profiles]) - duplicate_report = _find_duplicate_report_payload( - account_row["id"], - request.extra_focus, - suggestions, - exclude_report_id=report_id - ) - if duplicate_report: - _delete_report(report_id) - return { - "report_id": duplicate_report["id"], - "created_at": duplicate_report["created_at"], - "context": analysis_context, - "suggestions": duplicate_report["suggestions"], - "auto_video_analyses": [], - "duplicate_of_report_id": duplicate_report["id"], - "duplicate_count": duplicate_report.get("duplicate_count", 1) - } - auto_video_analyses: list[dict[str, Any]] = [] + top_video_analyses: list[dict[str, Any]] = [] if request.auto_analyze_top_videos and profiles: - auto_video_analyses = await _run_top_video_analyses( + top_video_analyses = await _run_top_video_analyses( account_row, owner, profiles[0], top_video_count=request.top_video_analysis_count, min_score=45.0, - report_id=report_id, - source_type="account_analysis_auto", - temperature=min(request.temperature, 0.3) + source_type="account_analysis_followup", + temperature=min(max(request.temperature, 0.1), 0.4) ) legacy.db.execute( "UPDATE douyin_accounts SET last_analysis_at = ?, updated_at = ? WHERE id = ?", @@ -2775,9 +1783,99 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "created_at": created_at, "context": analysis_context, "suggestions": suggestions, - "auto_video_analyses": auto_video_analyses + "top_video_analyses": top_video_analyses } + async def _run_top_video_analyses( + account_row: dict[str, Any], + owner: dict[str, Any], + profile: dict[str, Any], + *, + top_video_count: int = 6, + min_score: float = 45.0, + source_type: str = "top_score_auto", + temperature: float = 0.25 + ) -> list[dict[str, Any]]: + raw_videos = _list_videos(account_row["id"], limit=max(top_video_count * 3, 24)) + ranked_videos = [ + video for video in (_workspace_video_payload(item) for item in raw_videos) + if float(video.get("score", {}).get("performance_score") or 0) >= float(min_score) + ] + ranked_videos.sort(key=lambda item: _video_sort_key(item, "score"), reverse=True) + ranked_videos = ranked_videos[: max(1, min(top_video_count, 12))] + if not ranked_videos: + return [] + + account_payload = _build_account_payload(account_row, include_recent_videos=8) + system_prompt = ( + "你是商业化短视频拆解顾问。你要针对单条作品给出可用于商业化运营的复盘。" + "请返回严格 JSON 对象,字段包含:headline_summary、hook_breakdown、" + "structure_breakdown、commercial_angle、replication_plan、operator_actions、" + "risk_notes、scores。scores 里包含 hook、retention、conversion、commercial,范围 0-100。" + ) + + async def _analyze_video(video: dict[str, Any]) -> dict[str, Any]: + prompt_context = { + "account": { + "id": account_payload["id"], + "nickname": account_payload["nickname"], + "signature": account_payload["signature"], + "tags": account_payload["tags"][:12] + }, + "video": { + "id": video["id"], + "aweme_id": video["aweme_id"], + "title": video["title"], + "description": video["description"], + "published_at": video["published_at"], + "tags": video["tags"], + "stats": video["stats"], + "score": video["score"] + } + } + user_prompt = ( + "请从商业化运营视角拆解这条作品。重点回答:为什么值得关注、" + "适合承接什么产品或服务、下一步怎么复刻、运营动作怎么排。" + f"\n\n输入上下文:\n{json.dumps(prompt_context, ensure_ascii=False, indent=2)}" + ) + try: + output = await legacy.call_model( + profile, + system_prompt=system_prompt, + user_prompt=user_prompt, + temperature=temperature + ) + parsed = _try_parse_agent_json(output) + status = "ok" + except Exception as exc: + output = str(exc) + parsed = {} + status = "error" + + if not isinstance(parsed, dict): + parsed = {} + summary = _first_non_empty( + parsed.get("headline_summary"), + parsed.get("summary"), + parsed.get("commercial_angle"), + output + ) + return { + "id": make_id("dyva"), + "account_id": account_row["id"], + "video_id": video["id"], + "model_profile_id": profile["id"], + "model_label": _build_model_label(profile), + "source_type": source_type, + "status": status, + "summary": summary, + "analysis_json": parsed, + "video": video, + "created_at": now() + } + + return await asyncio.gather(*[_analyze_video(video) for video in ranked_videos]) + async def _prepare_similarity_source( owner: dict[str, Any], request: DouyinSimilarSearchRequest @@ -3068,13 +2166,12 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: request.session_cookie, request.manual_creator_pages ) - return await run_in_threadpool( - _finalize_sync_workspace, - account, - request, - public_data, - creator_data - ) + if not public_data["profile"].get("nickname") and not public_data["videos"] and not creator_data["pages"]: + raise HTTPException(status_code=400, detail="No Douyin profile or creator-center data could be extracted") + account_row = _upsert_account(account, public_data["profile"], request, public_data, creator_data) + workspace = _build_workspace_payload(account_row) + workspace["sync_errors"] = public_data["errors"] + creator_data["errors"] + return workspace @app.get("/v2/douyin/accounts/{account_id}") def get_douyin_account( @@ -3141,15 +2238,31 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: account: dict[str, Any] = Depends(legacy.require_approved) ) -> dict[str, Any]: account_row = _require_owned_account(account_id, account["id"]) - workspace = _build_video_workspace_payload(account_row, limit=max(limit, 24)) - items = list(workspace["items"]) + raw_videos = _list_videos(account_row["id"], limit=max(limit, 24)) + items = [_workspace_video_payload(video) for video in raw_videos] item_map = {item["id"]: item for item in items} + high_score_threshold = 60.0 + top_scored_video_ids = [ + item["id"] + for item in sorted(items, key=lambda entry: _video_sort_key(entry, "score"), reverse=True) + if float(item.get("score", {}).get("performance_score") or 0) >= high_score_threshold + ] + if not top_scored_video_ids: + top_scored_video_ids = [ + item["id"] + for item in sorted(items, key=lambda entry: _video_sort_key(entry, "score"), reverse=True)[:5] + ] + latest_video_ids = [ + item["id"] + for item in sorted(items, key=lambda entry: _video_sort_key(entry, "latest"), reverse=True)[:12] + ] + normalized_scope = (scope or "all").strip().lower() if normalized_scope == "top": - items = [item_map[video_id] for video_id in workspace["top_scored_video_ids"] if video_id in item_map] + items = [item_map[video_id] for video_id in top_scored_video_ids if video_id in item_map] elif normalized_scope == "latest": - items = [item_map[video_id] for video_id in workspace["latest_video_ids"] if video_id in item_map] + items = [item_map[video_id] for video_id in latest_video_ids if video_id in item_map] normalized_content_type = (content_type or "all").strip().lower() if normalized_content_type in {"video", "image_text"}: @@ -3164,9 +2277,9 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: item for item in items if query_text in " ".join( [ - item.get("title", ""), - item.get("description", ""), - item.get("aweme_id", ""), + str(item.get("title") or ""), + str(item.get("description") or ""), + str(item.get("aweme_id") or ""), *[str(tag_item) for tag_item in item.get("tags", [])] ] ).lower() @@ -3188,10 +2301,14 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "content_type": normalized_content_type, "query": q, "tag": tag, - "high_score_threshold": workspace["high_score_threshold"], - "meta": workspace["meta"], - "top_scored_video_ids": workspace["top_scored_video_ids"], - "latest_video_ids": workspace["latest_video_ids"], + "high_score_threshold": high_score_threshold, + "meta": { + "source": "fastgpt-live-fallback", + "total": len(raw_videos), + "filtered": len(items) + }, + "top_scored_video_ids": top_scored_video_ids, + "latest_video_ids": latest_video_ids, "items": items[: max(1, min(limit, 1000))] } @@ -3220,7 +2337,7 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: ) -> dict[str, Any]: account_row = _require_owned_account(account_id, account["id"]) profile = legacy.model_profile_for_account(account["id"], request.model_profile_id) - results = await _run_top_video_analyses( + items = await _run_top_video_analyses( account_row, account, profile, @@ -3232,8 +2349,8 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: return { "account_id": account_row["id"], "model_profile_id": profile["id"], - "analyzed_count": len(results), - "items": results + "analyzed_count": len(items), + "items": items } @app.post("/v2/douyin/similar-searches") diff --git a/collector-service/app/kuaishou_features.py b/collector-service/app/kuaishou_features.py new file mode 100644 index 0000000..be081cd --- /dev/null +++ b/collector-service/app/kuaishou_features.py @@ -0,0 +1,381 @@ +from __future__ import annotations + +import json +from typing import Any + +from fastapi import Depends, HTTPException, Query +from pydantic import BaseModel, Field + +from .core_main import ( + content_source_payload, + create_content_source, + create_job_record, + job_payload, + load_owned_content_source, + load_owned_job, + make_id, + parse_json_object, + resolve_target_assistant, + resolve_target_kb, + resolve_target_project, + review_payload, + trigger_orchestrated_job, + utc_now, + model_profile_for_account, + db, +) + +KUAISHOU_PLATFORM = "kuaishou" +KUAISHOU_URL_HINTS = ( + "kuaishou.com", + "v.kuaishou.com", + "chenzhongtech.com", +) +YOUTUBE_URL_HINTS = ( + "youtube.com", + "youtu.be", + "m.youtube.com", + "music.youtube.com", +) + + +class KuaishouContentSourceCreateRequest(BaseModel): + project_id: str = "" + source_kind: str = "creator_account" + handle: str = "" + source_url: str = "" + title: str = "" + local_path: str = "" + metadata: dict[str, Any] = Field(default_factory=dict) + + +class KuaishouContentSourceSyncRequest(BaseModel): + project_id: str = "" + knowledge_base_id: str = "" + assistant_id: str = "" + content_source_id: str = "" + handle: str = "" + source_url: str = "" + title: str = "" + analysis_model_profile_id: str = "" + language: str = "auto" + max_items: int = Field(default=5, ge=1, le=20) + skip_existing: bool = True + auto_trigger_analysis: bool = True + + +class KuaishouReviewCreateRequest(BaseModel): + project_id: str = "" + source_job_id: str = "" + assistant_id: str = "" + title: str = "" + content_type: str = "video" + publish_url: str = "" + published_at: str = "" + metrics: dict[str, Any] = Field(default_factory=dict) + verdict: str = "" + highlights: str = "" + next_actions: str = "" + notes: str = "" + + +def _normalize_text(value: str | None) -> str: + return str(value or "").strip() + + +def _is_youtube_url(value: str) -> bool: + normalized = _normalize_text(value).lower() + return any(hint in normalized for hint in YOUTUBE_URL_HINTS) + + +def _is_kuaishou_url(value: str) -> bool: + normalized = _normalize_text(value).lower() + return any(hint in normalized for hint in KUAISHOU_URL_HINTS) + + +def _ensure_kuaishou_url(value: str) -> str: + normalized = _normalize_text(value) + if not normalized: + return "" + if _is_youtube_url(normalized): + raise HTTPException(status_code=400, detail="YouTube URLs are not supported in the Kuaishou routes") + return normalized + + +def _content_source_is_kuaishou(row: dict[str, Any]) -> bool: + if _normalize_text(row.get("platform")).lower() == KUAISHOU_PLATFORM: + return True + return _is_kuaishou_url(row.get("source_url", "")) + + +def _job_is_kuaishou(row: dict[str, Any]) -> bool: + artifacts = parse_json_object(row.get("artifacts_json") or "{}") + source_url = _normalize_text(row.get("source_url")) + if source_url and _is_youtube_url(source_url): + return False + if source_url and _is_kuaishou_url(source_url): + return True + if _normalize_text(artifacts.get("platform")).lower() == KUAISHOU_PLATFORM: + return True + content_source_id = _normalize_text(row.get("content_source_id")) + if content_source_id: + source_row = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (content_source_id,)) + return bool(source_row and _content_source_is_kuaishou(source_row)) + return False + + +def _require_owned_kuaishou_source(source_id: str, account_id: str) -> dict[str, Any]: + row = load_owned_content_source(source_id, account_id) + if not _content_source_is_kuaishou(row): + raise HTTPException(status_code=400, detail="Content source does not belong to the Kuaishou route") + return row + + +def _list_kuaishou_jobs(account_id: str, project_id: str | None = None, limit: int = 50) -> list[dict[str, Any]]: + rows = db.fetch_all( + "SELECT * FROM jobs WHERE user_id = ? ORDER BY created_at DESC LIMIT ?", + (account_id, max(limit, 1) * 10), + ) + items: list[dict[str, Any]] = [] + for row in rows: + if project_id and _normalize_text(row.get("project_id")) != project_id: + continue + if _job_is_kuaishou(row): + items.append(job_payload(row)) + if len(items) >= limit: + break + return items + + +def _list_kuaishou_reviews(account_id: str, project_id: str | None = None, limit: int = 50) -> list[dict[str, Any]]: + clauses = ["user_id = ?", "platform = ?"] + params: list[Any] = [account_id, KUAISHOU_PLATFORM] + if project_id is not None: + normalized = project_id.strip() + if normalized: + clauses.append("project_id = ?") + params.append(normalized) + else: + clauses.append("(project_id IS NULL OR project_id = '')") + sql = f""" + SELECT * FROM publish_reviews + WHERE {' AND '.join(clauses)} + ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC + LIMIT ? + """ + params.append(limit) + return [review_payload(row) for row in db.fetch_all(sql, tuple(params))] + + +def register_kuaishou_routes(app: Any, legacy: Any) -> None: + """Register a small Kuaishou route set on top of the shared collector tables.""" + + @app.get("/v2/kuaishou/content-sources") + def list_kuaishou_content_sources( + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + clauses = ["user_id = ?", "platform = ?"] + params: list[Any] = [account["id"], KUAISHOU_PLATFORM] + if project_id: + resolve_target_project(account["id"], project_id, username=account["username"]) + clauses.append("project_id = ?") + params.append(project_id) + rows = legacy.db.fetch_all( + f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY created_at DESC", + tuple(params), + ) + return [content_source_payload(row) for row in rows] + + @app.post("/v2/kuaishou/content-sources") + def create_kuaishou_content_source_api( + request: KuaishouContentSourceCreateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) + source_url = _ensure_kuaishou_url(request.source_url) + if source_url and _is_youtube_url(source_url): + raise HTTPException(status_code=400, detail="YouTube URLs are not supported in the Kuaishou routes") + row = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind=_normalize_text(request.source_kind) or "creator_account", + platform=KUAISHOU_PLATFORM, + handle=_normalize_text(request.handle), + source_url=source_url, + title=_normalize_text(request.title) or _normalize_text(request.handle) or source_url, + local_path=_normalize_text(request.local_path), + metadata=request.metadata, + ) + return content_source_payload(row) + + @app.post("/v2/kuaishou/pipelines/content-source-sync") + async def create_kuaishou_content_source_sync_job( + request: KuaishouContentSourceSyncRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_row = None + if request.content_source_id.strip(): + source_row = _require_owned_kuaishou_source(request.content_source_id.strip(), account["id"]) + + requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "") + project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + kb = resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"]) + assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + profile = model_profile_for_account(account["id"], request.analysis_model_profile_id or None) + + source_url = _ensure_kuaishou_url( + request.source_url or (source_row or {}).get("source_url", "") + ) + if not source_url: + raise HTTPException(status_code=400, detail="source_url or content_source_id with a Kuaishou URL is required") + + handle = _normalize_text(request.handle or (source_row or {}).get("handle", "")) + source_title = ( + _normalize_text(request.title) + or (source_row or {}).get("title", "").strip() + or handle + or source_url + ) + + if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]: + raise HTTPException(status_code=400, detail="Content source does not belong to target project") + + if not source_row: + source_row = create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind="creator_account", + platform=KUAISHOU_PLATFORM, + handle=handle, + source_url=source_url, + title=source_title, + metadata={ + "sync_mode": "recent_uploads", + "max_items": request.max_items, + "analysis_model_profile_id": profile["id"], + }, + ) + + job_row = create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="content_source_sync", + line_type="content_source_sync", + workflow_key="content_source_sync_pipeline", + title=f"{source_title} 内容源同步", + language=request.language, + source_url=source_url, + assistant_id=(assistant or {}).get("id"), + content_source_id=source_row["id"], + artifacts={ + "platform": KUAISHOU_PLATFORM, + "handle": handle, + "source_account_url": source_url, + "source_title": source_title, + "max_items": request.max_items, + "skip_existing": request.skip_existing, + "auto_trigger_analysis": request.auto_trigger_analysis, + }, + analysis_model_profile_id=profile["id"], + ) + legacy.update_content_source_metadata( + source_row["id"], + { + "sync_mode": "recent_uploads", + "max_items": request.max_items, + "analysis_model_profile_id": profile["id"], + "last_sync_job_id": job_row["id"], + "last_sync_requested_at": utc_now(), + }, + ) + return job_payload(await trigger_orchestrated_job(job_row)) + + @app.get("/v2/kuaishou/jobs") + def list_kuaishou_jobs_api( + project_id: str | None = Query(default=None), + limit: int = Query(default=20, ge=1, le=100), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + return _list_kuaishou_jobs(account["id"], project_id=project_id, limit=limit) + + @app.get("/v2/kuaishou/workspace") + def get_kuaishou_workspace( + project_id: str | None = Query(default=None), + limit: int = Query(default=10, ge=1, le=50), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + content_sources = list_kuaishou_content_sources(project_id=project_id, account=account) + reviews = _list_kuaishou_reviews(account["id"], project_id=project_id, limit=limit) + jobs = _list_kuaishou_jobs(account["id"], project_id=project_id, limit=limit) + return { + "platform": KUAISHOU_PLATFORM, + "project_id": project_id or "", + "content_sources": content_sources, + "recent_jobs": jobs, + "recent_reviews": reviews, + "counts": { + "content_sources": len(content_sources), + "jobs": len(jobs), + "reviews": len(reviews), + }, + } + + @app.get("/v2/kuaishou/reviews") + def list_kuaishou_reviews_api( + project_id: str | None = Query(default=None), + limit: int = Query(default=50, ge=1, le=200), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + return _list_kuaishou_reviews(account["id"], project_id=project_id, limit=limit) + + @app.post("/v2/kuaishou/reviews") + def create_kuaishou_review( + request: KuaishouReviewCreateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_job = None + if request.source_job_id.strip(): + source_job = load_owned_job(request.source_job_id.strip(), account["id"]) + if not _job_is_kuaishou(source_job): + raise HTTPException(status_code=400, detail="Source job does not belong to the Kuaishou route") + + requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "") + project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + review_id = make_id("review") + title = request.title.strip() or (source_job.get("title", "") if source_job else "") + if not title: + title = f"{project['name']} 快手复盘" + timestamp = utc_now() + db.execute( + """ + INSERT INTO publish_reviews ( + id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, + publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + review_id, + account["id"], + project["id"], + source_job["id"] if source_job else None, + (assistant or {}).get("id") or None, + title, + KUAISHOU_PLATFORM, + request.content_type or "video", + _normalize_text(request.publish_url), + _normalize_text(request.published_at), + json.dumps(request.metrics, ensure_ascii=False), + _normalize_text(request.verdict), + _normalize_text(request.highlights), + _normalize_text(request.next_actions), + _normalize_text(request.notes), + timestamp, + timestamp, + ), + ) + row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) + return review_payload(row) diff --git a/collector-service/app/legacy_runtime.py b/collector-service/app/legacy_runtime.py new file mode 100644 index 0000000..dcb1e84 --- /dev/null +++ b/collector-service/app/legacy_runtime.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import importlib.machinery +import importlib.util +import sys +import types +from pathlib import Path +from typing import Any + +BASE_DIR = Path(__file__).resolve().parent +PYCACHE_DIR = BASE_DIR / "__pycache__" +LEGACY_PYC_DIR = BASE_DIR / "_legacy_pyc" +SUPPORTED_PYTHON = (3, 11) + +_LEGACY_MODULE: Any | None = None + + +def _ensure_supported_runtime() -> None: + if sys.version_info[:2] != SUPPORTED_PYTHON: + version = ".".join(map(str, sys.version_info[:3])) + required = ".".join(map(str, SUPPORTED_PYTHON)) + raise RuntimeError( + f"Legacy collector bytecode requires Python {required}. Current runtime: {version}." + ) + + +def _ensure_package() -> None: + package = sys.modules.get("app") + if package is None: + package = types.ModuleType("app") + package.__path__ = [str(BASE_DIR)] + sys.modules["app"] = package + + +def _load_sourceless_module(module_name: str, pyc_path: Path) -> Any: + loader = importlib.machinery.SourcelessFileLoader(module_name, str(pyc_path)) + spec = importlib.util.spec_from_loader(module_name, loader) + if spec is None: + raise RuntimeError(f"Unable to create spec for {module_name}") + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + loader.exec_module(module) + return module + + +def load_legacy_main() -> Any: + global _LEGACY_MODULE + if _LEGACY_MODULE is not None: + return _LEGACY_MODULE + + _ensure_supported_runtime() + _ensure_package() + + for name in ("database", "fastgpt", "openai_compat"): + full_name = f"app.{name}" + if full_name not in sys.modules: + pyc_dir = LEGACY_PYC_DIR if (LEGACY_PYC_DIR / f"{name}.cpython-311.pyc").exists() else PYCACHE_DIR + _load_sourceless_module(full_name, pyc_dir / f"{name}.cpython-311.pyc") + + legacy_name = "app.main_legacy" + if legacy_name in sys.modules: + _LEGACY_MODULE = sys.modules[legacy_name] + return _LEGACY_MODULE + + main_pyc_dir = LEGACY_PYC_DIR if (LEGACY_PYC_DIR / "main.cpython-311.pyc").exists() else PYCACHE_DIR + _LEGACY_MODULE = _load_sourceless_module(legacy_name, main_pyc_dir / "main.cpython-311.pyc") + _LEGACY_MODULE.__package__ = "app" + return _LEGACY_MODULE diff --git a/collector-service/app/main.py b/collector-service/app/main.py index 537f6ec..b686fe8 100644 --- a/collector-service/app/main.py +++ b/collector-service/app/main.py @@ -1,3325 +1,24 @@ from __future__ import annotations -import asyncio -import httpx -import json -import os -import re -import secrets -import shutil -import socket -import subprocess -import sys -import uuid -from datetime import datetime, timezone -from pathlib import Path -from typing import Any -from urllib.parse import urljoin, urlparse - -from fastapi import Body, Depends, FastAPI, File, Form, Header, HTTPException, Query, UploadFile -from fastapi.middleware.cors import CORSMiddleware -from fastapi.staticfiles import StaticFiles -from pydantic import BaseModel, Field - -from .database import Database, utc_now +from .domestic_platform_features import register_domestic_platform_routes from .douyin_features import register_douyin_routes -from .integrations import AsrHttpClient, CutVideoClient, HuobaoDramaClient, N8NClient from .oneliner_features import register_oneliner_routes -from .openai_compat import OpenAICompatClient -BASE_DIR = Path(__file__).resolve().parents[2] -DATA_DIR = Path(os.getenv("DATA_DIR", BASE_DIR / "data" / "collector")) -DOWNLOADS_DIR = DATA_DIR / "downloads" -JOBS_DIR = DATA_DIR / "jobs" -MODELS_DIR = DATA_DIR / "models" -DB_PATH = os.getenv("DATABASE_PATH", str(DATA_DIR / "storyforge.db")) -DEFAULT_EXTERNAL_BASE_URL = os.getenv("DEFAULT_EXTERNAL_BASE_URL", "https://test.hyzq.net/storyforge") -LOCAL_OPENAI_BASE_URL = os.getenv("LOCAL_OPENAI_BASE_URL", "http://127.0.0.1:8317/v1") -LOCAL_OPENAI_MODEL = os.getenv("LOCAL_OPENAI_MODEL", "GLM-5") -LOCAL_OPENAI_API_KEY = os.getenv("LOCAL_OPENAI_API_KEY", "") -YTDLP_BIN = os.getenv("YTDLP_BIN", "yt-dlp") -FFMPEG_BIN = os.getenv("FFMPEG_BIN", "ffmpeg") -WHISPER_BIN = os.getenv("WHISPER_BIN", "") -WHISPER_MODEL = os.getenv("WHISPER_MODEL", str(MODELS_DIR / "ggml-base.en.bin")) -ASR_HTTP_BASE_URL = os.getenv("ASR_HTTP_BASE_URL", "") -ASR_HTTP_TRANSCRIBE_PATH = os.getenv("ASR_HTTP_TRANSCRIBE_PATH", "/transcribe") -ASR_HTTP_FIELD_NAME = os.getenv("ASR_HTTP_FIELD_NAME", "wav") -ASR_HTTP_TIMEOUT_SEC = float(os.getenv("ASR_HTTP_TIMEOUT_SEC", "120")) -N8N_BASE_URL = os.getenv("N8N_BASE_URL", "http://127.0.0.1:5670") -N8N_ANALYSIS_WEBHOOK_PATH = os.getenv("N8N_ANALYSIS_WEBHOOK_PATH", "/webhook/storyforge-analysis") -N8N_REAL_CUT_WEBHOOK_PATH = os.getenv("N8N_REAL_CUT_WEBHOOK_PATH", "/webhook/storyforge-real-cut") -N8N_AI_VIDEO_WEBHOOK_PATH = os.getenv("N8N_AI_VIDEO_WEBHOOK_PATH", "/webhook/storyforge-ai-video") -N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH = os.getenv("N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH", "/webhook/storyforge-content-source-sync") -ORCHESTRATOR_SHARED_SECRET = os.getenv("ORCHESTRATOR_SHARED_SECRET", "") -CUTVIDEO_BASE_URL = os.getenv("CUTVIDEO_BASE_URL", "") -CUTVIDEO_API_KEY = os.getenv("CUTVIDEO_API_KEY", "") -HUOBAO_BASE_URL = os.getenv("HUOBAO_BASE_URL", "http://127.0.0.1:5678") -CUTVIDEO_BASE_CONFIG = os.getenv("CUTVIDEO_BASE_CONFIG", "example.job.yaml") -CUTVIDEO_POLL_INTERVAL_SEC = int(os.getenv("CUTVIDEO_POLL_INTERVAL_SEC", "10")) -CUTVIDEO_MAX_WAIT_SEC = int(os.getenv("CUTVIDEO_MAX_WAIT_SEC", "1800")) -CUTVIDEO_UPLOAD_TIMEOUT_SEC = int(os.getenv("CUTVIDEO_UPLOAD_TIMEOUT_SEC", "1800")) -HUOBAO_POLL_INTERVAL_SEC = int(os.getenv("HUOBAO_POLL_INTERVAL_SEC", "10")) -HUOBAO_MAX_WAIT_SEC = int(os.getenv("HUOBAO_MAX_WAIT_SEC", "900")) +try: + from . import core_main as core +except Exception: + # Keep a bytecode-backed fallback so the app can still boot if the + # recovered source baseline is incomplete in this workspace. + from .legacy_runtime import load_legacy_main -DOMESTIC_PLATFORMS = {"douyin", "xiaohongshu", "bilibili", "kuaishou", "wechat_video"} + core = load_legacy_main() -for path in (DATA_DIR, DOWNLOADS_DIR, JOBS_DIR, MODELS_DIR): - path.mkdir(parents=True, exist_ok=True) +app = core.app -db = Database(DB_PATH) -openai_client = OpenAICompatClient() -asr_http_client = AsrHttpClient( - base_url=ASR_HTTP_BASE_URL, - transcribe_path=ASR_HTTP_TRANSCRIBE_PATH, - field_name=ASR_HTTP_FIELD_NAME, - timeout=ASR_HTTP_TIMEOUT_SEC, -) -n8n_client = N8NClient( - base_url=N8N_BASE_URL, - workflow_paths={ - "analysis_pipeline": N8N_ANALYSIS_WEBHOOK_PATH, - "real_cut_pipeline": N8N_REAL_CUT_WEBHOOK_PATH, - "ai_video_pipeline": N8N_AI_VIDEO_WEBHOOK_PATH, - "content_source_sync_pipeline": N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH, - }, - shared_secret=ORCHESTRATOR_SHARED_SECRET, -) -cutvideo_client = CutVideoClient( - base_url=CUTVIDEO_BASE_URL, - api_key=CUTVIDEO_API_KEY, - upload_timeout=CUTVIDEO_UPLOAD_TIMEOUT_SEC, -) -huobao_client = HuobaoDramaClient(base_url=HUOBAO_BASE_URL) - -app = FastAPI(title="StoryForge Collector Service", version="0.2.0") -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) -app.mount("/downloads", StaticFiles(directory=str(DOWNLOADS_DIR)), name="downloads") - - -class RegisterAccountRequest(BaseModel): - username: str - password: str - display_name: str = "" - - -class LoginRequest(BaseModel): - username: str - password: str - - -class ModelProfileRequest(BaseModel): - name: str - base_url: str - api_key: str = "" - model_name: str - is_default: bool = False - - -class PreferredModelRequest(BaseModel): - model_profile_id: str - - -class KnowledgeBaseCreateRequest(BaseModel): - name: str - description: str = "" - project_id: str = "" - - -class ExploreVideoLinkRequest(BaseModel): - video_url: str - title: str | None = None - project_id: str | None = None - knowledge_base_id: str | None = None - assistant_id: str | None = None - analysis_model_profile_id: str | None = None - language: str = "auto" - - -class ExploreTextRequest(BaseModel): - title: str - content: str - project_id: str | None = None - knowledge_base_id: str | None = None - assistant_id: str | None = None - analysis_model_profile_id: str | None = None - - -class AssistantCreateRequest(BaseModel): - name: str - description: str = "" - system_prompt: str = "" - generation_goal: str = "" - knowledge_base_ids: list[str] = Field(default_factory=list) - project_id: str = "" - model_profile_id: str = "" - - -class AssistantUpdateRequest(BaseModel): - name: str | None = None - description: str | None = None - system_prompt: str | None = None - generation_goal: str | None = None - knowledge_base_ids: list[str] | None = None - project_id: str | None = None - model_profile_id: str | None = None - - -class GenerateCopyRequest(BaseModel): - brief: str - platform: str = "抖音" - audience: str = "创业者" - extra_requirements: str = "" - knowledge_base_ids: list[str] = Field(default_factory=list) - - -class PublishAppUpdateRequest(BaseModel): - platform: str = "android" - channel: str = "stable" - versionCode: int - versionName: str - minSupportedCode: int - apkUrl: str - apkSha256: str = "" - notes: str = "" - forceUpdate: bool = False - isActive: bool = True - - -class ProjectCreateRequest(BaseModel): - name: str - description: str = "" - - -class ContentSourceCreateRequest(BaseModel): - project_id: str = "" - source_kind: str - platform: str = "" - handle: str = "" - source_url: str = "" - title: str = "" - local_path: str = "" - metadata: dict[str, Any] = Field(default_factory=dict) - - -class ContentSourceSyncRequest(BaseModel): - project_id: str = "" - knowledge_base_id: str = "" - assistant_id: str = "" - content_source_id: str = "" - platform: str = "" - handle: str = "" - source_url: str = "" - title: str = "" - analysis_model_profile_id: str = "" - language: str = "auto" - max_items: int = Field(default=5, ge=1, le=20) - skip_existing: bool = True - auto_trigger_analysis: bool = True - - -class RealCutJobRequest(BaseModel): - project_id: str = "" - title: str - input_dir: str = "" - source_job_id: str = "" - base_config: str = "" - objective: str = "保留高信息密度片段,输出适合短视频平台的粗剪结果" - target_duration_sec: int = 60 - target_aspect_ratio: str = "9:16" - ideal_segment_duration_sec: int = 8 - max_segment_duration_sec: int = 18 - transcript_backend: str = "auto" - transcript_device: str = "cuda" - review_enabled: bool = False - dry_run: bool = False - - -class AiVideoJobRequest(BaseModel): - project_id: str = "" - assistant_id: str = "" - knowledge_base_id: str = "" - source_job_id: str = "" - title: str - brief: str - style: str = "realistic" - shots: int = 4 - image_provider: str = "openai" - image_model: str = "" - video_provider: str = "doubao" - video_model: str = "" - aspect_ratio: str = "9:16" - duration: int = 5 - - -class ReviewCreateRequest(BaseModel): - project_id: str = "" - source_job_id: str = "" - assistant_id: str = "" - title: str = "" - platform: str = "douyin" - content_type: str = "video" - publish_url: str = "" - published_at: str = "" - metrics: dict[str, Any] = Field(default_factory=dict) - verdict: str = "" - highlights: str = "" - next_actions: str = "" - notes: str = "" - - -class ReviewUpdateRequest(BaseModel): - title: str | None = None - platform: str | None = None - content_type: str | None = None - publish_url: str | None = None - published_at: str | None = None - metrics: dict[str, Any] | None = None - verdict: str | None = None - highlights: str | None = None - next_actions: str | None = None - notes: str | None = None - assistant_id: str | None = None - - -class InternalStepRequest(BaseModel): - job_id: str = "" - jobId: str = "" - payload: dict[str, Any] = Field(default_factory=dict) - - -class JobStatusUpdateRequest(BaseModel): - status: str - error: str = "" - provider_name: str = "" - provider_task_id: str = "" - artifacts: dict[str, Any] = Field(default_factory=dict) - result: dict[str, Any] = Field(default_factory=dict) - - -def now_ts() -> int: - return int(datetime.now(timezone.utc).timestamp()) - - -def make_id(prefix: str) -> str: - return f"{prefix}_{uuid.uuid4().hex}" - - -def hash_password(password: str, salt: str) -> str: - import hashlib - - return hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt.encode("utf-8"), 120_000).hex() - - -def create_password_hash(password: str) -> tuple[str, str]: - salt = secrets.token_hex(16) - return hash_password(password, salt), salt - - -def verify_password(password: str, hashed: str, salt: str) -> bool: - return secrets.compare_digest(hash_password(password, salt), hashed) - - -def mask_api_key(value: str) -> str: - if not value: - return "" - if len(value) <= 8: - return "*" * len(value) - return f"{value[:4]}***{value[-4:]}" - - -def normalize_model_profile(row: dict[str, Any]) -> dict[str, Any]: - return { - "id": row["id"], - "owner_account_id": row.get("owner_account_id"), - "name": row["name"], - "provider": row["provider"], - "base_url": row["base_url"], - "api_key_masked": mask_api_key(row.get("api_key", "")), - "model_name": row["model_name"], - "is_system": bool(row.get("is_system", 0)), - "is_default": bool(row.get("is_default", 0)), - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def normalize_account(row: dict[str, Any]) -> dict[str, Any]: - return { - "id": row["id"], - "username": row["username"], - "display_name": row["display_name"], - "role": row["role"], - "approval_status": row["approval_status"], - "approved_by": row.get("approved_by"), - "approved_at": row.get("approved_at"), - "preferred_analysis_model_id": row.get("preferred_analysis_model_id") or "", - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def model_profile_for_account(account_id: str, requested_id: str | None) -> dict[str, Any]: - if requested_id: - row = db.fetch_one( - "SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)", - (requested_id, account_id), - ) - if row: - return row - account = db.fetch_one("SELECT preferred_analysis_model_id FROM accounts WHERE id = ?", (account_id,)) - preferred_id = (account or {}).get("preferred_analysis_model_id") or "" - if preferred_id: - row = db.fetch_one( - "SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)", - (preferred_id, account_id), - ) - if row: - return row - row = db.fetch_one("SELECT * FROM model_profiles WHERE is_default = 1 ORDER BY is_system DESC, created_at ASC LIMIT 1") - if not row: - raise HTTPException(status_code=500, detail="No model profile configured") - return row - - -def project_payload(row: dict[str, Any]) -> dict[str, Any]: - return { - "id": row["id"], - "user_id": row["user_id"], - "name": row["name"], - "description": row.get("description", ""), - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def content_source_payload(row: dict[str, Any]) -> dict[str, Any]: - metadata = row.get("metadata_json") or "{}" - try: - metadata_map = json.loads(metadata) - except json.JSONDecodeError: - metadata_map = {} - return { - "id": row["id"], - "user_id": row["user_id"], - "project_id": row.get("project_id", ""), - "source_kind": row["source_kind"], - "platform": row.get("platform", ""), - "handle": row.get("handle", ""), - "source_url": row.get("source_url", ""), - "title": row.get("title", ""), - "local_path": row.get("local_path", ""), - "metadata": metadata_map, - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def job_event_payload(row: dict[str, Any]) -> dict[str, Any]: - return { - "id": row["id"], - "job_id": row["job_id"], - "event_type": row["event_type"], - "payload": parse_json_object(row.get("payload_json") or "{}"), - "created_at": row["created_at"], - } - - -def ensure_default_project(account_id: str, username: str = "默认用户") -> dict[str, Any]: - project = db.fetch_one( - "SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC LIMIT 1", - (account_id,), - ) - if project: - return project - now = utc_now() - project_id = make_id("project") - db.execute( - """ - INSERT INTO projects (id, user_id, name, description, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?) - """, - ( - project_id, - account_id, - f"{username} 默认项目", - "系统自动创建", - now, - now, - ), - ) - return db.fetch_one("SELECT * FROM projects WHERE id = ?", (project_id,)) - - -def resolve_target_project(account_id: str, requested_project_id: str | None, username: str = "默认用户") -> dict[str, Any]: - if requested_project_id: - project = db.fetch_one( - "SELECT * FROM projects WHERE id = ? AND user_id = ?", - (requested_project_id, account_id), - ) - if project: - return project - raise HTTPException(status_code=404, detail="Project not found") - return ensure_default_project(account_id, username=username) - - -def resolve_target_assistant(account_id: str, requested_assistant_id: str | None, project_id: str = "") -> dict[str, Any] | None: - if not requested_assistant_id: - return None - assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (requested_assistant_id, account_id)) - if not assistant: - raise HTTPException(status_code=404, detail="Assistant not found") - if project_id and assistant.get("project_id") and assistant.get("project_id") != project_id: - raise HTTPException(status_code=400, detail="Assistant does not belong to target project") - return assistant - - -def append_job_event(job_id: str, event_type: str, payload: dict[str, Any] | None = None) -> None: - db.execute( - """ - INSERT INTO job_events (id, job_id, event_type, payload_json, created_at) - VALUES (?, ?, ?, ?, ?) - """, - ( - make_id("evt"), - job_id, - event_type, - json.dumps(payload or {}, ensure_ascii=False), - utc_now(), - ), - ) - - -def parse_json_object(raw_text: str) -> dict[str, Any]: - cleaned = raw_text.strip() - if not cleaned: - return {} - try: - data = json.loads(cleaned) - return data if isinstance(data, dict) else {} - except json.JSONDecodeError: - match = re.search(r"\{.*\}", cleaned, re.S) - if not match: - return {} - try: - data = json.loads(match.group(0)) - return data if isinstance(data, dict) else {} - except json.JSONDecodeError: - return {} - - -def knowledge_base_payload(row: dict[str, Any]) -> dict[str, Any]: - document_count = db.fetch_one( - "SELECT COUNT(*) AS count FROM knowledge_documents WHERE knowledge_base_id = ?", - (row["id"],), - )["count"] - linked_count = db.fetch_one( - "SELECT COUNT(*) AS count FROM assistant_knowledge_bases WHERE knowledge_base_id = ?", - (row["id"],), - )["count"] - return { - "id": row["id"], - "user_id": row["user_id"], - "project_id": row.get("project_id", ""), - "name": row["name"], - "description": row.get("description", ""), - "sync_status": row.get("sync_status", "ready"), - "document_count": document_count, - "linked_assistant_count": linked_count, - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def assistant_payload(row: dict[str, Any]) -> dict[str, Any]: - kb_rows = db.fetch_all( - "SELECT knowledge_base_id FROM assistant_knowledge_bases WHERE assistant_id = ? ORDER BY knowledge_base_id ASC", - (row["id"],), - ) - return { - "id": row["id"], - "user_id": row["user_id"], - "project_id": row.get("project_id", ""), - "name": row["name"], - "description": row.get("description", ""), - "system_prompt": row.get("system_prompt", ""), - "generation_goal": row.get("generation_goal", ""), - "knowledge_base_ids": [item["knowledge_base_id"] for item in kb_rows], - "config": parse_json_object(row.get("config_json") or "{}"), - "model_profile_id": row.get("model_profile_id", ""), - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def review_payload(row: dict[str, Any]) -> dict[str, Any]: - metrics = parse_json_object(row.get("metrics_json") or "{}") - source_job = None - assistant = None - if row.get("source_job_id"): - source_job_row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (row["source_job_id"],)) - if source_job_row: - source_job = job_payload(source_job_row) - if row.get("assistant_id"): - assistant_row = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) - if assistant_row: - assistant = assistant_payload(assistant_row) - return { - "id": row["id"], - "user_id": row["user_id"], - "project_id": row.get("project_id", ""), - "source_job_id": row.get("source_job_id", ""), - "assistant_id": row.get("assistant_id", ""), - "title": row.get("title", ""), - "platform": row.get("platform", "douyin"), - "content_type": row.get("content_type", "video"), - "publish_url": row.get("publish_url", ""), - "published_at": row.get("published_at", ""), - "metrics": metrics, - "verdict": row.get("verdict", ""), - "highlights": row.get("highlights", ""), - "next_actions": row.get("next_actions", ""), - "notes": row.get("notes", ""), - "source_job": source_job, - "assistant": assistant, - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def document_payload(row: dict[str, Any]) -> dict[str, Any]: - analysis_map = parse_json_object(row.get("analysis_json") or "{}") - source_artifacts = parse_json_object(row.get("source_artifact_json") or "{}") - storyboard_raw = row.get("storyboard_json") or "[]" - try: - storyboard_items = json.loads(storyboard_raw) - except json.JSONDecodeError: - storyboard_items = [] - return { - "id": row["id"], - "knowledge_base_id": row["knowledge_base_id"], - "title": row["title"], - "source_type": row["source_type"], - "source_url": row.get("source_url", ""), - "transcript_text": row.get("transcript_text", ""), - "style_summary": row.get("style_summary", ""), - "combined_text": row.get("combined_text", ""), - "analysis": analysis_map, - "storyboards": storyboard_items, - "source_artifacts": source_artifacts, - "analysis_model_profile_id": row.get("analysis_model_profile_id", ""), - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def job_payload(row: dict[str, Any]) -> dict[str, Any]: - artifacts = row.get("artifacts_json") or "{}" - result = row.get("result_json") or "{}" - try: - artifacts_map = json.loads(artifacts) - except json.JSONDecodeError: - artifacts_map = {} - try: - result_map = json.loads(result) - except json.JSONDecodeError: - result_map = {} - return { - "id": row["id"], - "user_id": row["user_id"], - "project_id": row.get("project_id", ""), - "parent_job_id": row.get("parent_job_id", ""), - "assistant_id": row.get("assistant_id"), - "knowledge_base_id": row["knowledge_base_id"], - "content_source_id": row.get("content_source_id", ""), - "source_type": row["source_type"], - "line_type": row.get("line_type", "analysis"), - "workflow_key": row.get("workflow_key", ""), - "orchestrator": row.get("orchestrator", "n8n"), - "provider_name": row.get("provider_name", ""), - "provider_task_id": row.get("provider_task_id", ""), - "source_url": row.get("source_url"), - "title": row["title"], - "language": row.get("language", "auto"), - "status": row["status"], - "transcript_text": row.get("transcript_text", ""), - "style_summary": row.get("style_summary", ""), - "upload_status": row.get("upload_status", "pending"), - "error": row.get("error", ""), - "artifacts": artifacts_map, - "result": result_map, - "analysis_model_profile_id": row.get("analysis_model_profile_id", ""), - "created_at": row["created_at"], - "updated_at": row["updated_at"], - } - - -def require_auth(authorization: str | None = Header(default=None)) -> dict[str, Any]: - if not authorization or not authorization.startswith("Bearer "): - raise HTTPException(status_code=401, detail="Missing bearer token") - token = authorization.split(" ", 1)[1].strip() - token_row = db.fetch_one("SELECT * FROM auth_tokens WHERE token = ?", (token,)) - if not token_row: - raise HTTPException(status_code=401, detail="Invalid token") - account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (token_row["account_id"],)) - if not account: - raise HTTPException(status_code=401, detail="Account not found") - return account - - -def require_approved(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]: - if account["approval_status"] != "approved": - raise HTTPException(status_code=403, detail="Account pending approval") - return account - - -def require_super_admin(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]: - if account["role"] != "super_admin": - raise HTTPException(status_code=403, detail="Super admin required") - return account - - -def require_orchestrator(x_orchestrator_secret: str | None = Header(default=None)) -> bool: - if ORCHESTRATOR_SHARED_SECRET and x_orchestrator_secret != ORCHESTRATOR_SHARED_SECRET: - raise HTTPException(status_code=401, detail="Invalid orchestrator secret") - return True - - -def create_content_source( - *, - account_id: str, - project_id: str, - source_kind: str, - platform: str = "", - handle: str = "", - source_url: str = "", - title: str = "", - local_path: str = "", - metadata: dict[str, Any] | None = None, -) -> dict[str, Any]: - source_id = make_id("source") - now = utc_now() - db.execute( - """ - INSERT INTO content_sources ( - id, user_id, project_id, source_kind, platform, handle, - source_url, title, local_path, metadata_json, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - source_id, - account_id, - project_id, - source_kind, - platform, - handle, - source_url, - title, - local_path, - json.dumps(metadata or {}, ensure_ascii=False), - now, - now, - ), - ) - return db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,)) - - -def merge_json_field(current_raw: str | None, updates: dict[str, Any]) -> str: - current = parse_json_object(current_raw or "{}") - current.update(updates) - return json.dumps(current, ensure_ascii=False) - - -def update_content_source_metadata(source_id: str, updates: dict[str, Any]) -> dict[str, Any]: - row = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,)) - if not row: - raise HTTPException(status_code=404, detail="Content source not found") - db.execute( - "UPDATE content_sources SET metadata_json = ?, updated_at = ? WHERE id = ?", - (merge_json_field(row.get("metadata_json") or "{}", updates), utc_now(), source_id), - ) - return db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,)) - - -def update_job_state( - job_id: str, - *, - status: str, - error: str = "", - provider_name: str | None = None, - provider_task_id: str | None = None, - artifacts: dict[str, Any] | None = None, - result: dict[str, Any] | None = None, -) -> dict[str, Any]: - row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) - if not row: - raise HTTPException(status_code=404, detail="Job not found") - merged_artifacts = merge_json_field(row.get("artifacts_json") or "{}", artifacts or {}) - merged_result = merge_json_field(row.get("result_json") or "{}", result or {}) - db.execute( - """ - UPDATE jobs - SET status = ?, error = ?, provider_name = ?, provider_task_id = ?, - artifacts_json = ?, result_json = ?, updated_at = ? - WHERE id = ? - """, - ( - status, - error, - provider_name if provider_name is not None else row.get("provider_name", ""), - provider_task_id if provider_task_id is not None else row.get("provider_task_id", ""), - merged_artifacts, - merged_result, - utc_now(), - job_id, - ), - ) - append_job_event( - job_id, - f"job.{status}", - { - "provider_name": provider_name if provider_name is not None else row.get("provider_name", ""), - "provider_task_id": provider_task_id if provider_task_id is not None else row.get("provider_task_id", ""), - "error": error, - "artifacts": artifacts or {}, - "result": result or {}, - }, - ) - return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) - - -def job_context_payload(row: dict[str, Any]) -> dict[str, Any]: - payload = job_payload(row) - payload["parent_job"] = None - payload["child_jobs"] = [] - payload["project"] = None - payload["assistant"] = None - payload["knowledge_base"] = None - payload["content_source"] = None - payload["events"] = [] - - if row.get("project_id"): - project = db.fetch_one("SELECT * FROM projects WHERE id = ?", (row["project_id"],)) - if project: - payload["project"] = project_payload(project) - - if row.get("assistant_id"): - assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) - if assistant: - payload["assistant"] = assistant_payload(assistant) - - kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (row["knowledge_base_id"],)) - if kb: - payload["knowledge_base"] = knowledge_base_payload(kb) - - if row.get("content_source_id"): - source = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["content_source_id"],)) - if source: - payload["content_source"] = content_source_payload(source) - - if row.get("parent_job_id"): - parent = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (row["parent_job_id"],)) - if parent: - payload["parent_job"] = job_payload(parent) - - payload["child_jobs"] = [ - job_payload(item) - for item in db.fetch_all("SELECT * FROM jobs WHERE parent_job_id = ? ORDER BY created_at ASC", (row["id"],)) - ] - - payload["events"] = [ - job_event_payload(item) - for item in db.fetch_all("SELECT * FROM job_events WHERE job_id = ? ORDER BY created_at ASC", (row["id"],)) - ] - - return payload - - -async def run_local_orchestrated_job(job_id: str, workflow_key: str) -> None: - try: - if workflow_key == "analysis_pipeline": - await internal_run_analysis(None, job_id, True) - return - if workflow_key == "content_source_sync_pipeline": - await internal_content_source_sync(None, job_id, True) - return - if workflow_key == "real_cut_pipeline": - await internal_real_cut_run(None, job_id, True) - return - if workflow_key == "ai_video_pipeline": - await internal_ai_video_render(None, job_id, True) - return - raise HTTPException(status_code=400, detail=f"Unsupported local workflow fallback: {workflow_key}") - except HTTPException as exc: - row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) - if row: - update_job_state( - job_id, - status="failed", - provider_name="collector-local", - provider_task_id="", - error=str(exc.detail), - result=merge_json_field(row.get("result_json") or "{}", {"local_orchestrator": {"error": str(exc.detail)}}), - ) - append_job_event(job_id, "workflow.local.failed", {"workflow_key": workflow_key, "error": str(exc.detail)}) - except Exception as exc: - row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) - if row: - update_job_state( - job_id, - status="failed", - provider_name="collector-local", - provider_task_id="", - error=f"Local orchestrator failed: {exc}", - result=merge_json_field(row.get("result_json") or "{}", {"local_orchestrator": {"error": str(exc)}}), - ) - append_job_event(job_id, "workflow.local.failed", {"workflow_key": workflow_key, "error": str(exc)}) - - -async def trigger_orchestrated_job(job_row: dict[str, Any]) -> dict[str, Any]: - workflow_key = job_row.get("workflow_key") or "analysis_pipeline" - append_job_event(job_row["id"], "workflow.trigger.requested", {"workflow_key": workflow_key}) - update_job_state( - job_row["id"], - status="queued", - provider_name="n8n", - provider_task_id="", - result={"n8n_trigger": {"requested": True}}, - ) - payload = { - "jobId": job_row["id"], - "job_id": job_row["id"], - "workflowKey": workflow_key, - "workflow_key": workflow_key, - "lineType": job_row.get("line_type", "analysis"), - "line_type": job_row.get("line_type", "analysis"), - } - if not n8n_client.enabled: - append_job_event(job_row["id"], "workflow.trigger.fallback", {"workflow_key": workflow_key, "reason": "n8n is not configured"}) - asyncio.create_task(run_local_orchestrated_job(job_row["id"], workflow_key)) - db.execute( - """ - UPDATE jobs - SET provider_name = ?, provider_task_id = ?, result_json = ?, updated_at = ? - WHERE id = ? - """, - ( - "collector-local", - "", - merge_json_field( - db.fetch_one("SELECT result_json FROM jobs WHERE id = ?", (job_row["id"],)).get("result_json") or "{}", - {"n8n_trigger": {"requested": True, "fallback": "local", "reason": "n8n is not configured"}}, - ), - utc_now(), - job_row["id"], - ), - ) - return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["id"],)) - - try: - trigger_result = await n8n_client.trigger(workflow_key, payload) - except Exception as exc: - append_job_event(job_row["id"], "workflow.trigger.fallback", {"workflow_key": workflow_key, "reason": str(exc)}) - asyncio.create_task(run_local_orchestrated_job(job_row["id"], workflow_key)) - db.execute( - """ - UPDATE jobs - SET provider_name = ?, provider_task_id = ?, result_json = ?, updated_at = ? - WHERE id = ? - """, - ( - "collector-local", - "", - merge_json_field( - db.fetch_one("SELECT result_json FROM jobs WHERE id = ?", (job_row["id"],)).get("result_json") or "{}", - {"n8n_trigger": {"requested": True, "fallback": "local", "reason": str(exc)}}, - ), - utc_now(), - job_row["id"], - ), - ) - return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["id"],)) - provider_task_id = str(trigger_result.get("executionId") or "") - db.execute( - """ - UPDATE jobs - SET provider_name = ?, provider_task_id = ?, result_json = ?, updated_at = ? - WHERE id = ? - """, - ( - "n8n", - provider_task_id, - merge_json_field( - db.fetch_one("SELECT result_json FROM jobs WHERE id = ?", (job_row["id"],)).get("result_json") or "{}", - {"n8n_trigger": trigger_result}, - ), - utc_now(), - job_row["id"], - ), - ) - append_job_event( - job_row["id"], - "workflow.trigger.accepted", - {"provider_task_id": provider_task_id, "trigger_result": trigger_result}, - ) - return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["id"],)) - - -async def call_model(profile: dict[str, Any], system_prompt: str, user_prompt: str, temperature: float = 0.4) -> str: - try: - content = await openai_client.chat_completion( - base_url=profile["base_url"], - api_key=profile.get("api_key", ""), - model=profile["model_name"], - system_prompt=system_prompt, - user_prompt=user_prompt, - temperature=temperature, - ) - if content: - return content.strip() - except Exception: - pass - excerpt = user_prompt.strip().replace("\n", " ")[:220] - return f"风格摘要:内容以强结论开头,节奏偏短句,强调冲突转折和行动指令。素材摘要:{excerpt}" - - -async def summarize_style(profile: dict[str, Any], transcript_text: str, title: str) -> str: - prompt = ( - f"标题:{title}\n\n" - f"素材全文:\n{transcript_text}\n\n" - "请提炼这段素材的文案风格、结构节奏、开头钩子、情绪推进、收尾 CTA,并给出可复用的学习结论。" - ) - system_prompt = "你是短视频文案拆解师,输出简洁、结构化、适合沉淀进知识库。" - return await call_model(profile, system_prompt, prompt, temperature=0.3) - - -async def generate_content_blueprint( - profile: dict[str, Any], - *, - title: str, - transcript_text: str, - style_summary: str, - agent_prompt: str = "", - generation_goal: str = "", -) -> dict[str, Any]: - system_prompt = ( - "你是短视频内容策略师。" - "必须输出 JSON 对象,不要输出 Markdown,不要输出多余解释。" - ) - user_prompt = ( - f"标题:{title}\n\n" - f"素材转写:\n{transcript_text}\n\n" - f"风格拆解:\n{style_summary}\n\n" - f"智能体补充约束:\n{agent_prompt or '无'}\n\n" - f"生成目标:\n{generation_goal or '围绕原素材做二创短视频'}\n\n" - "请输出如下 JSON 结构:" - "{" - '"analysis":{"hook":"","structure":[],"style_tags":[],"cta":""},' - '"rewrite":{"title":"","script":"","summary":""},' - '"storyboards":[' - '{"shot_index":1,"title":"","narration":"","visual":"","first_frame_prompt":"","last_frame_prompt":"","video_prompt":"","duration_sec":5}' - "]" - "}" - ) - raw = await call_model(profile, system_prompt, user_prompt, temperature=0.5) - parsed = parse_json_object(raw) - if parsed.get("storyboards"): - return parsed - - fallback_storyboards: list[dict[str, Any]] = [] - paragraphs = [part.strip() for part in transcript_text.split("\n") if part.strip()] - seed_segments = paragraphs[:4] or [transcript_text[:1200]] - for idx, segment in enumerate(seed_segments, start=1): - snippet = segment[:180] - fallback_storyboards.append( - { - "shot_index": idx, - "title": f"镜头{idx}", - "narration": snippet, - "visual": f"围绕这段内容构建具象画面:{snippet}", - "first_frame_prompt": f"短视频首帧,突出主题:{snippet}", - "last_frame_prompt": f"短视频尾帧,强化结论和行动指令:{snippet}", - "video_prompt": f"基于首尾帧生成连贯镜头,内容是:{snippet}", - "duration_sec": 5, - } - ) - - return { - "analysis": { - "hook": title, - "structure": ["结论开场", "核心论点", "例证推进", "收尾行动"], - "style_tags": ["短句", "结论先行", "强 CTA"], - "cta": "引导用户采取下一步行动", - }, - "rewrite": { - "title": title, - "script": transcript_text[:3000], - "summary": style_summary[:500], - }, - "storyboards": fallback_storyboards, - } - - -def fallback_transcript_from_text(title: str, content: str) -> str: - return f"标题:{title}\n\n正文:\n{content.strip()}" - - -def infer_platform_from_url(source_url: str) -> str: - normalized = source_url.strip().lower() - if "bilibili.com" in normalized or "b23.tv" in normalized: - return "bilibili" - if "douyin.com" in normalized or "iesdouyin.com" in normalized: - return "douyin" - if "xiaohongshu.com" in normalized or "xhslink.com" in normalized: - return "xiaohongshu" - if "youtube.com" in normalized or "youtu.be" in normalized: - return "youtube" - return "" - - -def command_exists(name: str) -> bool: - return shutil.which(name) is not None - - -def run_command(command: list[str], cwd: Path | None = None, timeout: float | None = None) -> tuple[int, str, str]: - try: - proc = subprocess.run( - command, - cwd=str(cwd) if cwd else None, - capture_output=True, - text=True, - timeout=timeout, - ) - return proc.returncode, proc.stdout, proc.stderr - except subprocess.TimeoutExpired as exc: - stdout = exc.stdout if isinstance(exc.stdout, str) else (exc.stdout or b"").decode("utf-8", errors="ignore") - stderr = exc.stderr if isinstance(exc.stderr, str) else (exc.stderr or b"").decode("utf-8", errors="ignore") - detail = stderr or f"Command timed out after {timeout} seconds" - return 124, stdout, detail - - -def discover_account_video_links(source_url: str, max_items: int) -> tuple[list[dict[str, Any]], dict[str, Any]]: - if not command_exists(YTDLP_BIN): - raise HTTPException(status_code=503, detail="yt-dlp is not configured") - - discovery_cmd = [ - YTDLP_BIN, - "--flat-playlist", - "--playlist-end", - str(max_items), - "--print", - "%(webpage_url)s\t%(title)s\t%(id)s", - source_url, - ] - code, stdout, stderr = run_command(discovery_cmd, timeout=180) - raw_lines = [line.strip() for line in stdout.splitlines() if line.strip()] - items: list[dict[str, Any]] = [] - seen_urls: set[str] = set() - for line in raw_lines: - parts = line.split("\t") - video_url = parts[0].strip() if parts else "" - raw_title = parts[1].strip() if len(parts) > 1 else "" - raw_external_id = parts[2].strip() if len(parts) > 2 else "" - if not video_url or video_url == "NA" or video_url in seen_urls: - continue - seen_urls.add(video_url) - items.append( - { - "video_url": video_url, - "title": raw_title if raw_title and raw_title != "NA" else "短视频素材", - "external_id": raw_external_id if raw_external_id != "NA" else "", - } - ) - - debug_payload = { - "discovery_command": discovery_cmd, - "discovery_stdout_preview": raw_lines[: min(len(raw_lines), max_items)], - "discovery_stderr": stderr.strip()[:1000], - "discovery_exit_code": code, - } - if code != 0: - raise HTTPException(status_code=502, detail=f"Failed to inspect content source: {stderr.strip()[:200] or 'yt-dlp error'}") - return items, debug_payload - - -def validate_real_cut_source_job(source_job: dict[str, Any]) -> None: - source_type = source_job.get("source_type", "") - if source_type not in {"upload_video", "video_link"}: - raise HTTPException(status_code=400, detail="Real-cut source job must come from upload_video or video_link") - if source_type == "video_link" and source_job.get("status") != "completed": - raise HTTPException(status_code=409, detail="Video link source job must be completed before real-cut staging") - - -def resolve_real_cut_source_file(source_job: dict[str, Any]) -> tuple[Path, dict[str, Any] | None]: - validate_real_cut_source_job(source_job) - artifacts = parse_job_artifacts(source_job) - candidates: list[Path] = [] - - if artifacts.get("uploaded_path"): - candidates.append(Path(str(artifacts["uploaded_path"]))) - if artifacts.get("source_path"): - candidates.append(Path(str(artifacts["source_path"]))) - if source_job.get("content_source_id"): - source_row = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_job["content_source_id"],)) - if source_row and source_row.get("local_path"): - candidates.append(Path(str(source_row["local_path"]))) - if source_job.get("source_type") == "video_link": - candidates.append(JOBS_DIR / source_job["id"] / "source.mp4") - - seen: set[str] = set() - for candidate in candidates: - candidate_str = str(candidate) - if not candidate_str or candidate_str in seen: - continue - seen.add(candidate_str) - if candidate.exists() and candidate.is_file(): - return candidate, artifacts - - raise HTTPException(status_code=409, detail="Source job media file is not available for real-cut staging") - - -async def stage_real_cut_source_to_cutvideo(source_job: dict[str, Any]) -> dict[str, Any]: - if not cutvideo_client.enabled: - raise HTTPException(status_code=503, detail="CutVideo is not configured") - - source_path, source_artifacts = resolve_real_cut_source_file(source_job) - folder_name = f"storyforge-{source_job['id']}" - upload_payload = await cutvideo_client.upload_source_file(source_path, folder_name=folder_name) - input_dir = str(upload_payload.get("input_dir") or "").strip() - if not input_dir: - raise HTTPException(status_code=502, detail="CutVideo upload did not return input_dir") - return { - "input_dir": input_dir, - "source_path": str(source_path), - "upload": upload_payload, - "source_artifacts": source_artifacts, - } - - -def cutvideo_run_has_materialized_outputs(run_payload: dict[str, Any]) -> bool: - for key in ( - "manifest", - "assets", - "segments", - "top_segments", - "tool_report", - "llm_review_summary", - "exports", - "timeline", - "summary_markdown", - "clips", - "downloads", - "transcripts", - "files", - ): - value = run_payload.get(key) - if value not in (None, "", [], {}, 0): - return True - return bool(str(run_payload.get("generated_at") or "").strip()) - - -async def find_cutvideo_run_for_job(row: dict[str, Any]) -> dict[str, Any] | None: - result_payload = parse_job_result(row) - submit_payload = result_payload.get("cutvideo_submit") or {} - if not isinstance(submit_payload, dict): - submit_payload = {} - request_payload = submit_payload.get("request") or {} - if not isinstance(request_payload, dict): - request_payload = {} - expected_name = str(request_payload.get("name") or row.get("title") or "").strip() - if not expected_name: - return None - - runs_payload = await cutvideo_client.list_runs() - items = runs_payload.get("items") - if not isinstance(items, list): - return None - - normalized_expected = expected_name.casefold() - for item in items: - if not isinstance(item, dict): - continue - run_id = str(item.get("run_id") or item.get("id") or "").strip() - job_name = str(item.get("job_name") or item.get("name") or "").strip() - normalized_job_name = job_name.casefold() - normalized_run_id = run_id.casefold() - if ( - normalized_job_name == normalized_expected - or normalized_run_id == normalized_expected - or normalized_job_name.endswith(normalized_expected) - or normalized_run_id.endswith(normalized_expected) - ): - detail = await cutvideo_client.get_run(run_id or job_name) - return { - "run_id": run_id, - "summary": item, - "detail": detail, - } - return None - - -def create_job_record( - *, - account_id: str, - project_id: str, - knowledge_base_id: str, - parent_job_id: str | None = None, - source_type: str, - line_type: str, - workflow_key: str, - title: str, - language: str = "auto", - source_url: str = "", - assistant_id: str | None = None, - content_source_id: str | None = None, - artifacts: dict[str, Any] | None = None, - analysis_model_profile_id: str = "", -) -> dict[str, Any]: - job_id = make_id("job") - now = utc_now() - db.execute( - """ - INSERT INTO jobs ( - id, user_id, project_id, parent_job_id, assistant_id, knowledge_base_id, content_source_id, - source_type, line_type, workflow_key, orchestrator, provider_name, provider_task_id, - source_url, title, language, status, transcript_text, style_summary, upload_status, - error, artifacts_json, result_json, analysis_model_profile_id, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'n8n', '', '', ?, ?, ?, 'pending', '', '', 'pending', '', ?, '{}', ?, ?, ?) - """, - ( - job_id, - account_id, - project_id, - parent_job_id, - assistant_id, - knowledge_base_id, - content_source_id, - source_type, - line_type, - workflow_key, - source_url or None, - title, - language, - json.dumps(artifacts or {}, ensure_ascii=False), - analysis_model_profile_id, - now, - now, - ), - ) - return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) - - -async def wait_for_huobao_image(image_id: str | int) -> dict[str, Any]: - deadline = now_ts() + HUOBAO_MAX_WAIT_SEC - last_payload: dict[str, Any] = {} - while True: - last_payload = await huobao_client.get_image(str(image_id)) - status = str(last_payload.get("status") or "").lower() - if status in {"completed", "failed"}: - return last_payload - if now_ts() >= deadline: - raise RuntimeError(f"Huobao image task timed out: {image_id}") - await asyncio.sleep(HUOBAO_POLL_INTERVAL_SEC) - - -async def wait_for_huobao_video(video_id: str | int) -> dict[str, Any]: - deadline = now_ts() + HUOBAO_MAX_WAIT_SEC - last_payload: dict[str, Any] = {} - while True: - last_payload = await huobao_client.get_video(str(video_id)) - status = str(last_payload.get("status") or "").lower() - if status in {"completed", "failed"}: - return last_payload - if now_ts() >= deadline: - raise RuntimeError(f"Huobao video task timed out: {video_id}") - await asyncio.sleep(HUOBAO_POLL_INTERVAL_SEC) - - -def coerce_storyboards(items: Any) -> list[dict[str, Any]]: - if not isinstance(items, list): - return [] - return [item for item in items if isinstance(item, dict)] - - -def huobao_image_size_for_aspect_ratio(aspect_ratio: str) -> str: - normalized = str(aspect_ratio or "").strip() - if normalized == "9:16": - return "1024x1536" - if normalized == "16:9": - return "1536x1024" - if normalized == "1:1": - return "1024x1024" - return "1024x1536" - - -async def transcribe_media(job_dir: Path, source_path: Path, title: str, source_url: str = "") -> tuple[str, dict[str, Any]]: - artifacts: dict[str, Any] = {} - transcript = "" - media_path = source_path - artifacts["source_path"] = str(media_path) - - if not source_path.exists(): - transcript = ( - f"素材标题:{title}\n" - f"素材来源:{source_url or source_path.name}\n\n" - "当前环境未找到可直接处理的本地视频文件,已记录来源信息并进入降级学习流程。" - ) - return transcript, artifacts - - audio_path = job_dir / "audio.wav" - if command_exists(FFMPEG_BIN): - code, _, err = run_command([FFMPEG_BIN, "-y", "-i", str(source_path), "-ar", "16000", "-ac", "1", str(audio_path)]) - if code == 0 and audio_path.exists(): - artifacts["audio_path"] = str(audio_path) - media_path = audio_path - elif err: - artifacts["ffmpeg_error"] = err.strip()[:500] - - if asr_http_client.enabled and media_path.exists(): - try: - asr_payload = await asr_http_client.transcribe_audio(media_path) - artifacts["asr_http_payload"] = { - "success": bool(asr_payload.get("success", True)), - "duration_ms": asr_payload.get("duration_ms"), - "error_message": str(asr_payload.get("error_message") or "")[:500], - } - transcript = str(asr_payload.get("text") or "").strip() - if transcript: - artifacts["asr_backend"] = "http" - except Exception as exc: - error_detail = str(exc).strip() or exc.__class__.__name__ - artifacts["asr_http_error"] = error_detail[:500] - - if WHISPER_BIN and Path(WHISPER_BIN).exists() and Path(WHISPER_MODEL).exists(): - out_prefix = job_dir / "whisper" - code, stdout, stderr = run_command([ - WHISPER_BIN, - "-m", - WHISPER_MODEL, - "-f", - str(media_path), - "-otxt", - "-of", - str(out_prefix), - ]) - txt_path = Path(str(out_prefix) + ".txt") - if code == 0 and txt_path.exists(): - cli_transcript = txt_path.read_text(encoding="utf-8", errors="ignore").strip() - if cli_transcript: - transcript = cli_transcript - artifacts["transcript_path"] = str(txt_path) - artifacts["asr_backend"] = artifacts.get("asr_backend") or "whisper_cli" - else: - artifacts["whisper_stdout"] = stdout.strip()[:500] - artifacts["whisper_error"] = stderr.strip()[:500] - - if not transcript: - transcript = ( - f"素材标题:{title}\n" - f"素材来源:{source_url or source_path.name}\n\n" - "当前环境未完成真实 ASR,已保留原始素材供后续转写。请结合标题、来源和上下文进行初步风格学习。" - ) - return transcript, artifacts - - -def ensure_user_kb(account_id: str, project_id: str = "", username: str = "默认用户") -> dict[str, Any]: - project = resolve_target_project(account_id, project_id or None, username=username) - row = db.fetch_one( - "SELECT * FROM knowledge_bases WHERE user_id = ? AND project_id = ? ORDER BY created_at ASC LIMIT 1", - (account_id, project["id"]), - ) - if row: - return row - kb_id = make_id("kb") - now = utc_now() - db.execute( - """ - INSERT INTO knowledge_bases (id, user_id, project_id, name, description, sync_status, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """, - (kb_id, account_id, project["id"], "默认知识库", "系统为新用户自动创建", "ready", now, now), - ) - return db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (kb_id,)) - - -async def process_job(job_id: str) -> None: - row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) - if not row: - return - now = utc_now() - db.execute("UPDATE jobs SET status = ?, updated_at = ? WHERE id = ?", ("processing", now, job_id)) - append_job_event(job_id, "job.processing", {}) - - try: - artifacts = json.loads(row.get("artifacts_json") or "{}") - transcript_text = row.get("transcript_text", "") - job_dir = JOBS_DIR / job_id - job_dir.mkdir(parents=True, exist_ok=True) - - if row["source_type"] == "text": - transcript_text = fallback_transcript_from_text(row["title"], artifacts.get("input_text", "")) - elif row["source_type"] == "video_link": - downloaded = job_dir / "source.mp4" - if command_exists(YTDLP_BIN): - code, stdout, stderr = run_command([ - YTDLP_BIN, - "--no-playlist", - "-o", - str(downloaded), - row.get("source_url") or "", - ], cwd=job_dir) - if code == 0 and downloaded.exists(): - artifacts["download_stdout"] = stdout.strip()[:500] - else: - artifacts["download_error"] = stderr.strip()[:500] - transcript_text, extra = await transcribe_media(job_dir, downloaded if downloaded.exists() else job_dir / "placeholder.mp4", row["title"], row.get("source_url") or "") - artifacts.update(extra) - elif row["source_type"] == "upload_video": - source_path = Path(artifacts.get("uploaded_path", "")) - transcript_text, extra = await transcribe_media(job_dir, source_path, row["title"], row.get("source_url") or "") - artifacts.update(extra) - - profile = model_profile_for_account(row["user_id"], row.get("analysis_model_profile_id") or None) - style_summary = await summarize_style(profile, transcript_text, row["title"]) - assistant = None - if row.get("assistant_id"): - assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) - content_blueprint = await generate_content_blueprint( - profile, - title=row["title"], - transcript_text=transcript_text, - style_summary=style_summary, - agent_prompt=(assistant or {}).get("system_prompt", ""), - generation_goal=(assistant or {}).get("generation_goal", ""), - ) - combined_text = ( - f"{transcript_text}\n\n" - "------\n" - f"风格学习结论:\n{style_summary}\n\n" - "------\n" - f"二创文案:\n{(content_blueprint.get('rewrite') or {}).get('script', '')}\n\n" - "------\n" - f"分镜:\n{json.dumps(content_blueprint.get('storyboards') or [], ensure_ascii=False, indent=2)}" - ) - kb_row = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (row["knowledge_base_id"],)) - if not kb_row: - raise RuntimeError("Knowledge base not found") - document_id = make_id("doc") - timestamp = utc_now() - db.execute( - """ - INSERT INTO knowledge_documents ( - id, knowledge_base_id, title, source_type, source_url, transcript_text, - style_summary, combined_text, analysis_json, storyboard_json, source_artifact_json, - analysis_model_profile_id, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - document_id, - row["knowledge_base_id"], - row["title"], - row["source_type"], - row.get("source_url") or "", - transcript_text, - style_summary, - combined_text, - json.dumps(content_blueprint.get("analysis") or {}, ensure_ascii=False), - json.dumps(content_blueprint.get("storyboards") or [], ensure_ascii=False), - json.dumps(artifacts, ensure_ascii=False), - profile["id"], - timestamp, - timestamp, - ), - ) - update_job_state( - job_id, - status="completed", - artifacts={ - "document_id": document_id, - "project_job_dir": str(job_dir), - **artifacts, - }, - result={ - "analysis": content_blueprint.get("analysis") or {}, - "rewrite": content_blueprint.get("rewrite") or {}, - "storyboards": content_blueprint.get("storyboards") or [], - "document_id": document_id, - }, - ) - db.execute( - """ - UPDATE jobs - SET transcript_text = ?, style_summary = ?, upload_status = ?, updated_at = ? - WHERE id = ? - """, - (transcript_text, style_summary, "ready", timestamp, job_id), - ) - db.execute( - "UPDATE knowledge_bases SET sync_status = ?, updated_at = ? WHERE id = ?", - ("ready", timestamp, kb_row["id"]), - ) - except Exception as exc: - update_job_state(job_id, status="failed", error=str(exc)) - - -def probe_tcp(url: str, timeout: float = 3.0) -> dict[str, Any]: - if not url: - return {"configured": False, "reachable": False, "status_code": 0, "error": "not_configured", "url": ""} - parsed = urlparse(url) - host = parsed.hostname - port = parsed.port or (443 if parsed.scheme == "https" else 80) - if not host: - return {"configured": True, "reachable": False, "status_code": 0, "error": "invalid_url", "url": url} - sock = socket.socket() - sock.settimeout(timeout) - try: - sock.connect((host, port)) - return {"configured": True, "reachable": True, "status_code": 0, "error": "", "url": url} - except Exception as exc: # pragma: no cover - operational probe - return {"configured": True, "reachable": False, "status_code": 0, "error": str(exc), "url": url} - finally: - sock.close() - - -def probe_http(url: str, path: str = "", timeout: float = 3.0) -> dict[str, Any]: - tcp = probe_tcp(url, timeout=timeout) - target_url = urljoin(url if url.endswith("/") else f"{url}/", path.lstrip("/")) if url else "" - if not tcp["configured"] or not tcp["reachable"]: - if target_url: - tcp["url"] = target_url - return tcp - try: - response = httpx.get(target_url or url, timeout=timeout, follow_redirects=True) - tcp["status_code"] = response.status_code - tcp["reachable"] = response.status_code < 500 - tcp["error"] = "" if response.status_code < 500 else f"http_{response.status_code}" - except Exception as exc: # pragma: no cover - operational probe - tcp["reachable"] = False - tcp["error"] = str(exc) - tcp["url"] = target_url or url - return tcp - - -def local_model_public_base_url() -> str: - if not LOCAL_OPENAI_BASE_URL: - return "" - parsed = urlparse(LOCAL_OPENAI_BASE_URL) - scheme = parsed.scheme or "http" - host = parsed.hostname or "127.0.0.1" - if host in {"host.docker.internal", "localhost"}: - host = "127.0.0.1" - port = parsed.port - root = f"{scheme}://{host}" - if port: - root = f"{root}:{port}" - return root - - -def fetch_local_model_catalog(timeout: float = 8.0) -> dict[str, Any]: - detail = probe_http(LOCAL_OPENAI_BASE_URL, "/models", timeout=timeout) - public_base_url = local_model_public_base_url() - management_url = f"{public_base_url}/management.html" if public_base_url else "" - payload = { - "configured": detail.get("configured", False), - "reachable": detail.get("reachable", False), - "base_url": LOCAL_OPENAI_BASE_URL, - "public_base_url": public_base_url, - "management_url": management_url, - "default_model": LOCAL_OPENAI_MODEL, - "models": [], - "status_code": detail.get("status_code", 0), - "error": detail.get("error", ""), - "url": detail.get("url", ""), - } - if not detail.get("configured") or not detail.get("reachable"): - return payload - try: - response = httpx.get(urljoin(LOCAL_OPENAI_BASE_URL if LOCAL_OPENAI_BASE_URL.endswith("/") else f"{LOCAL_OPENAI_BASE_URL}/", "models"), timeout=timeout) - response.raise_for_status() - data = response.json() - payload["models"] = [ - { - "id": item.get("id", ""), - "owned_by": item.get("owned_by", ""), - "created": item.get("created", 0), - } - for item in (data.get("data") or []) - if isinstance(item, dict) - ] - except Exception as exc: # pragma: no cover - operational probe - payload["reachable"] = False - payload["error"] = str(exc) - return payload - - -@app.on_event("startup") -def on_startup() -> None: - db.init_schema() - seed_defaults() - - -@app.get("/healthz") -def healthz() -> dict[str, Any]: - return { - "status": "ok", - "dbPath": DB_PATH, - "defaultExternalBaseUrl": DEFAULT_EXTERNAL_BASE_URL, - "localModelBaseUrl": LOCAL_OPENAI_BASE_URL, - "asrHttpBaseUrl": ASR_HTTP_BASE_URL, - "n8nBaseUrl": N8N_BASE_URL, - "cutvideoBaseUrl": CUTVIDEO_BASE_URL, - "cutvideoUploadTimeoutSec": CUTVIDEO_UPLOAD_TIMEOUT_SEC, - "huobaoBaseUrl": HUOBAO_BASE_URL, - } - - -@app.get("/v2/integrations/health") -def integrations_health(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - _ = account - cutvideo_bootstrap = probe_http(CUTVIDEO_BASE_URL, "/api/bootstrap", timeout=5.0) - cutvideo_uploads = probe_http(CUTVIDEO_BASE_URL, "/api/uploads", timeout=5.0) - cutvideo_supports_uploads = bool( - cutvideo_uploads.get("configured") - and cutvideo_uploads.get("reachable") - and int(cutvideo_uploads.get("status_code") or 0) != 404 - ) - return { - "local_model": { - "base_url": LOCAL_OPENAI_BASE_URL, - **probe_http(LOCAL_OPENAI_BASE_URL, "/models"), - }, - "cutvideo": { - "base_url": CUTVIDEO_BASE_URL, - **cutvideo_bootstrap, - "supports_uploads": cutvideo_supports_uploads, - "upload_status_code": int(cutvideo_uploads.get("status_code") or 0), - "upload_error": cutvideo_uploads.get("error", ""), - "upload_url": cutvideo_uploads.get("url", ""), - }, - "huobao": { - "base_url": HUOBAO_BASE_URL, - **probe_http(HUOBAO_BASE_URL, "/health"), - }, - "n8n": { - "base_url": N8N_BASE_URL, - **probe_http(N8N_BASE_URL, "/healthz"), - }, - "asr": { - "base_url": ASR_HTTP_BASE_URL, - **probe_tcp(ASR_HTTP_BASE_URL), - }, - } - - -@app.get("/v2/integrations/local-models") -def integrations_local_models(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - _ = account - return fetch_local_model_catalog() - - -def seed_defaults() -> None: - if not db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1"): - profile_id = make_id("model") - now = utc_now() - db.execute( - """ - INSERT INTO model_profiles (id, owner_account_id, name, provider, base_url, api_key, model_name, is_system, is_default, created_at, updated_at) - VALUES (?, NULL, ?, ?, ?, ?, ?, 1, 1, ?, ?) - """, - ( - profile_id, - "本机默认模型", - "openai_compat", - LOCAL_OPENAI_BASE_URL, - LOCAL_OPENAI_API_KEY, - LOCAL_OPENAI_MODEL, - now, - now, - ), - ) - if not db.fetch_one("SELECT id FROM accounts WHERE username = ?", ("kris",)): - account_id = make_id("acct") - password_hash, password_salt = create_password_hash("Asd123456.") - now = utc_now() - model_row = db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1") - db.execute( - """ - INSERT INTO accounts ( - id, username, password_hash, password_salt, display_name, role, - approval_status, approved_by, approved_at, preferred_analysis_model_id, - created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - account_id, - "kris", - password_hash, - password_salt, - "Kris", - "super_admin", - "approved", - account_id, - now, - model_row["id"] if model_row else "", - now, - now, - ), - ) - project = ensure_default_project(account_id, username="kris") - kb = ensure_user_kb(account_id, project["id"], username="kris") - assistant_id = make_id("assistant") - db.execute( - """ - INSERT INTO assistants (id, user_id, project_id, name, description, system_prompt, generation_goal, config_json, model_profile_id, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, '{}', ?, ?, ?) - """, - ( - assistant_id, - account_id, - project["id"], - "默认文案助手", - "系统为超级管理员预置", - "你是一个擅长学习短视频文案风格的 AI 助手。", - "为用户生成稳定风格的短视频文案。", - model_row["id"] if model_row else "", - now, - now, - ), - ) - db.execute( - "INSERT INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)", - (assistant_id, kb["id"]), - ) - - -@app.post("/v2/auth/register") -def register(request: RegisterAccountRequest) -> dict[str, Any]: - username = request.username.strip() - password = request.password.strip() - display_name = request.display_name.strip() or username - if not username or not password: - raise HTTPException(status_code=400, detail="username and password are required") - if db.fetch_one("SELECT id FROM accounts WHERE username = ?", (username,)): - raise HTTPException(status_code=409, detail="username already exists") - account_id = make_id("acct") - password_hash, password_salt = create_password_hash(password) - now = utc_now() - default_model = db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1") - db.execute( - """ - INSERT INTO accounts ( - id, username, password_hash, password_salt, display_name, role, - approval_status, approved_by, approved_at, preferred_analysis_model_id, - created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, 'user', 'pending', NULL, NULL, ?, ?, ?) - """, - ( - account_id, - username, - password_hash, - password_salt, - display_name, - default_model["id"] if default_model else "", - now, - now, - ), - ) - ensure_default_project(account_id, username=username) - account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) - return normalize_account(account) - - -@app.post("/v2/auth/login") -def login(request: LoginRequest) -> dict[str, Any]: - account = db.fetch_one("SELECT * FROM accounts WHERE username = ?", (request.username.strip(),)) - if not account or not verify_password(request.password, account["password_hash"], account["password_salt"]): - raise HTTPException(status_code=401, detail="Invalid credentials") - token = secrets.token_urlsafe(32) - db.execute( - "INSERT INTO auth_tokens (token, account_id, created_at) VALUES (?, ?, ?)", - (token, account["id"], utc_now()), - ) - return { - "token": token, - "account": normalize_account(account), - "default_external_base_url": DEFAULT_EXTERNAL_BASE_URL, - } - - -@app.post("/v2/auth/logout") -def logout(account: dict[str, Any] = Depends(require_auth), authorization: str | None = Header(default=None)) -> dict[str, bool]: - token = authorization.split(" ", 1)[1].strip() - db.execute("DELETE FROM auth_tokens WHERE token = ?", (token,)) - return {"saved": True} - - -@app.get("/v2/me") -def me(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]: - return normalize_account(account) - - -@app.get("/v2/me/dashboard") -def dashboard(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - projects = [project_payload(row) for row in db.fetch_all("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC", (account["id"],))] - knowledge_bases = [knowledge_base_payload(row) for row in db.fetch_all("SELECT * FROM knowledge_bases WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))] - assistants = [assistant_payload(row) for row in db.fetch_all("SELECT * FROM assistants WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))] - jobs = [job_payload(row) for row in db.fetch_all("SELECT * FROM jobs WHERE user_id = ? ORDER BY created_at DESC LIMIT 20", (account["id"],))] - model_profiles = [normalize_model_profile(row) for row in db.fetch_all("SELECT * FROM model_profiles WHERE owner_account_id IS NULL OR owner_account_id = ? ORDER BY is_default DESC, created_at ASC", (account["id"],))] - return { - "account": normalize_account(account), - "projects": projects, - "knowledge_bases": knowledge_bases, - "assistants": assistants, - "recent_jobs": jobs, - "model_profiles": model_profiles, - } - - -@app.get("/v2/projects") -def list_projects(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: - return [project_payload(row) for row in db.fetch_all("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC", (account["id"],))] - - -@app.post("/v2/projects") -def create_project(request: ProjectCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - project_id = make_id("project") - now = utc_now() - db.execute( - """ - INSERT INTO projects (id, user_id, name, description, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?) - """, - ( - project_id, - account["id"], - request.name.strip(), - request.description.strip(), - now, - now, - ), - ) - ensure_user_kb(account["id"], project_id, username=account["username"]) - return project_payload(db.fetch_one("SELECT * FROM projects WHERE id = ?", (project_id,))) - - -@app.get("/v2/content-sources") -def list_content_sources( - project_id: str | None = Query(default=None), - account: dict[str, Any] = Depends(require_approved), -) -> list[dict[str, Any]]: - if project_id: - resolve_target_project(account["id"], project_id, username=account["username"]) - rows = db.fetch_all( - "SELECT * FROM content_sources WHERE user_id = ? AND project_id = ? ORDER BY created_at DESC", - (account["id"], project_id), - ) - else: - rows = db.fetch_all("SELECT * FROM content_sources WHERE user_id = ? ORDER BY created_at DESC", (account["id"],)) - return [content_source_payload(row) for row in rows] - - -@app.post("/v2/content-sources") -def create_content_source_api(request: ContentSourceCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) - row = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind=request.source_kind.strip(), - platform=request.platform.strip(), - handle=request.handle.strip(), - source_url=request.source_url.strip(), - title=request.title.strip(), - local_path=request.local_path.strip(), - metadata=request.metadata, - ) - return content_source_payload(row) - - -@app.post("/v2/pipelines/content-source-sync") -async def create_content_source_sync_job( - request: ContentSourceSyncRequest, - account: dict[str, Any] = Depends(require_approved), -) -> dict[str, Any]: - source_row = None - if request.content_source_id.strip(): - source_row = load_owned_content_source(request.content_source_id.strip(), account["id"]) - - requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "") - project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - kb = resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"]) - assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - profile = model_profile_for_account(account["id"], request.analysis_model_profile_id or None) - - source_url = (request.source_url or (source_row or {}).get("source_url") or "").strip() - if not source_url: - raise HTTPException(status_code=400, detail="source_url or content_source_id is required") - platform = (request.platform or (source_row or {}).get("platform") or infer_platform_from_url(source_url)).strip() - handle = (request.handle or (source_row or {}).get("handle") or "").strip() - source_title = ( - request.title.strip() - or (source_row or {}).get("title", "").strip() - or handle - or source_url - ) - - if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]: - raise HTTPException(status_code=400, detail="Content source does not belong to target project") - - if not source_row: - source_row = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind="creator_account", - platform=platform, - handle=handle, - source_url=source_url, - title=source_title, - metadata={ - "sync_mode": "recent_uploads", - "max_items": request.max_items, - "analysis_model_profile_id": profile["id"], - }, - ) - - job_row = create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="content_source_sync", - line_type="content_source_sync", - workflow_key="content_source_sync_pipeline", - title=f"{source_title} 内容源同步", - language=request.language, - source_url=source_url, - assistant_id=(assistant or {}).get("id"), - content_source_id=source_row["id"], - artifacts={ - "platform": platform, - "handle": handle, - "source_account_url": source_url, - "source_title": source_title, - "max_items": request.max_items, - "skip_existing": request.skip_existing, - "auto_trigger_analysis": request.auto_trigger_analysis, - }, - analysis_model_profile_id=profile["id"], - ) - update_content_source_metadata( - source_row["id"], - { - "sync_mode": "recent_uploads", - "max_items": request.max_items, - "analysis_model_profile_id": profile["id"], - "last_sync_job_id": job_row["id"], - "last_sync_requested_at": utc_now(), - }, - ) - return job_payload(await trigger_orchestrated_job(job_row)) - - -@app.get("/v2/model-profiles") -def list_model_profiles(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: - rows = db.fetch_all( - "SELECT * FROM model_profiles WHERE owner_account_id IS NULL OR owner_account_id = ? ORDER BY is_default DESC, is_system DESC, created_at ASC", - (account["id"],), - ) - return [normalize_model_profile(row) for row in rows] - - -@app.post("/v2/model-profiles") -def create_model_profile(request: ModelProfileRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - model_id = make_id("model") - now = utc_now() - if request.is_default: - db.execute("UPDATE model_profiles SET is_default = 0 WHERE owner_account_id = ?", (account["id"],)) - db.execute( - """ - INSERT INTO model_profiles (id, owner_account_id, name, provider, base_url, api_key, model_name, is_system, is_default, created_at, updated_at) - VALUES (?, ?, ?, 'openai_compat', ?, ?, ?, 0, ?, ?, ?) - """, - (model_id, account["id"], request.name.strip(), request.base_url.strip(), request.api_key.strip(), request.model_name.strip(), 1 if request.is_default else 0, now, now), - ) - row = db.fetch_one("SELECT * FROM model_profiles WHERE id = ?", (model_id,)) - return normalize_model_profile(row) - - -@app.post("/v2/me/preferences/analysis-model") -def set_preferred_analysis_model(request: PreferredModelRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - model = db.fetch_one( - "SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)", - (request.model_profile_id, account["id"]), - ) - if not model: - raise HTTPException(status_code=404, detail="Model profile not found") - db.execute( - "UPDATE accounts SET preferred_analysis_model_id = ?, updated_at = ? WHERE id = ?", - (request.model_profile_id, utc_now(), account["id"]), - ) - account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account["id"],)) - return normalize_account(account) - - -@app.get("/v2/knowledge-bases") -def list_knowledge_bases(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: - return [knowledge_base_payload(row) for row in db.fetch_all("SELECT * FROM knowledge_bases WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))] - - -@app.post("/v2/knowledge-bases") -def create_knowledge_base(request: KnowledgeBaseCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) - kb_id = make_id("kb") - now = utc_now() - db.execute( - """ - INSERT INTO knowledge_bases (id, user_id, project_id, name, description, sync_status, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, 'ready', ?, ?) - """, - (kb_id, account["id"], project["id"], request.name.strip(), request.description.strip(), now, now), - ) - row = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (kb_id,)) - return knowledge_base_payload(row) - - -@app.get("/v2/knowledge-bases/{knowledge_base_id}/documents") -def list_knowledge_documents(knowledge_base_id: str, account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: - kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ? AND user_id = ?", (knowledge_base_id, account["id"])) - if not kb: - raise HTTPException(status_code=404, detail="Knowledge base not found") - rows = db.fetch_all("SELECT * FROM knowledge_documents WHERE knowledge_base_id = ? ORDER BY created_at DESC", (knowledge_base_id,)) - return [document_payload(row) for row in rows] - - -@app.get("/v2/reviews") -def list_reviews( - project_id: str | None = Query(default=None), - limit: int = Query(default=50, ge=1, le=200), - account: dict[str, Any] = Depends(require_approved), -) -> list[dict[str, Any]]: - clauses = ["user_id = ?"] - params: list[Any] = [account["id"]] - if project_id is not None: - normalized_project = project_id.strip() - if normalized_project: - clauses.append("project_id = ?") - params.append(normalized_project) - else: - clauses.append("(project_id IS NULL OR project_id = '')") - sql = f"SELECT * FROM publish_reviews WHERE {' AND '.join(clauses)} ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC LIMIT ?" - params.append(limit) - return [review_payload(row) for row in db.fetch_all(sql, tuple(params))] - - -@app.post("/v2/reviews") -def create_review(request: ReviewCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - source_job = None - if request.source_job_id.strip(): - source_job = load_owned_job(request.source_job_id.strip(), account["id"]) - requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "") - project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - review_id = make_id("review") - title = request.title.strip() or (source_job.get("title", "") if source_job else "") - if not title: - title = f"{project['name']} 复盘" - timestamp = utc_now() - db.execute( - """ - INSERT INTO publish_reviews ( - id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, - publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at - ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - review_id, - account["id"], - project["id"], - source_job["id"] if source_job else None, - (assistant or {}).get("id") or None, - title, - request.platform or "douyin", - request.content_type or "video", - request.publish_url.strip(), - request.published_at.strip(), - json.dumps(request.metrics, ensure_ascii=False), - request.verdict.strip(), - request.highlights.strip(), - request.next_actions.strip(), - request.notes.strip(), - timestamp, - timestamp, - ), - ) - row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) - return review_payload(row) - - -@app.patch("/v2/reviews/{review_id}") -def update_review(review_id: str, request: ReviewUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - current = load_owned_review(review_id, account["id"]) - assistant_id = current.get("assistant_id") or None - if request.assistant_id is not None: - assistant = resolve_target_assistant(account["id"], request.assistant_id or None, current.get("project_id", "")) - assistant_id = (assistant or {}).get("id") or None - db.execute( - """ - UPDATE publish_reviews - SET title = ?, platform = ?, content_type = ?, publish_url = ?, published_at = ?, - metrics_json = ?, verdict = ?, highlights = ?, next_actions = ?, notes = ?, - assistant_id = ?, updated_at = ? - WHERE id = ? AND user_id = ? - """, - ( - request.title if request.title is not None else current.get("title", ""), - request.platform if request.platform is not None else current.get("platform", "douyin"), - request.content_type if request.content_type is not None else current.get("content_type", "video"), - request.publish_url if request.publish_url is not None else current.get("publish_url", ""), - request.published_at if request.published_at is not None else current.get("published_at", ""), - json.dumps(request.metrics if request.metrics is not None else parse_json_object(current.get("metrics_json") or "{}"), ensure_ascii=False), - request.verdict if request.verdict is not None else current.get("verdict", ""), - request.highlights if request.highlights is not None else current.get("highlights", ""), - request.next_actions if request.next_actions is not None else current.get("next_actions", ""), - request.notes if request.notes is not None else current.get("notes", ""), - assistant_id, - utc_now(), - review_id, - account["id"], - ), - ) - row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) - return review_payload(row) - - -@app.get("/v2/explore/jobs") -def list_jobs( - parent_job_id: str | None = Query(default=None), - line_type: str | None = Query(default=None), - account: dict[str, Any] = Depends(require_approved), -) -> list[dict[str, Any]]: - clauses = ["user_id = ?"] - params: list[Any] = [account["id"]] - if parent_job_id is not None: - normalized_parent = parent_job_id.strip() - if normalized_parent: - clauses.append("parent_job_id = ?") - params.append(normalized_parent) - else: - clauses.append("(parent_job_id IS NULL OR parent_job_id = '')") - if line_type: - clauses.append("line_type = ?") - params.append(line_type.strip()) - sql = f"SELECT * FROM jobs WHERE {' AND '.join(clauses)} ORDER BY created_at DESC" - return [job_payload(row) for row in db.fetch_all(sql, tuple(params))] - - -@app.get("/v2/explore/jobs/{job_id}") -def get_job(job_id: str, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - row = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"])) - if not row: - raise HTTPException(status_code=404, detail="Job not found") - return job_payload(row) - - -@app.get("/v2/explore/jobs/{job_id}/events") -def get_job_events(job_id: str, account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: - row = db.fetch_one("SELECT id FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"])) - if not row: - raise HTTPException(status_code=404, detail="Job not found") - return [ - job_event_payload(item) - for item in db.fetch_all("SELECT * FROM job_events WHERE job_id = ? ORDER BY created_at ASC", (job_id,)) - ] - - -def resolve_target_kb(account_id: str, requested_kb_id: str | None, project_id: str = "", username: str = "默认用户") -> dict[str, Any]: - if requested_kb_id: - kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ? AND user_id = ?", (requested_kb_id, account_id)) - if kb: - if project_id and kb.get("project_id") and kb.get("project_id") != project_id: - raise HTTPException(status_code=400, detail="Knowledge base does not belong to target project") - return kb - raise HTTPException(status_code=404, detail="Knowledge base not found") - return ensure_user_kb(account_id, project_id, username=username) - - -@app.post("/v2/explore/text") -async def create_text_job(request: ExploreTextRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) - kb = resolve_target_kb(account["id"], request.knowledge_base_id, project["id"], username=account["username"]) - assistant = resolve_target_assistant(account["id"], request.assistant_id, project["id"]) - profile = model_profile_for_account(account["id"], request.analysis_model_profile_id) - source = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind="inline_text", - title=request.title.strip(), - metadata={"content_preview": request.content[:280]}, - ) - job_row = create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="text", - line_type="analysis", - workflow_key="analysis_pipeline", - title=request.title.strip(), - language="zh-CN", - assistant_id=(assistant or {}).get("id"), - content_source_id=source["id"], - artifacts={"input_text": request.content}, - analysis_model_profile_id=profile["id"], - ) - return job_payload(await trigger_orchestrated_job(job_row)) - - -@app.post("/v2/explore/video-link") -async def create_video_link_job(request: ExploreVideoLinkRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) - kb = resolve_target_kb(account["id"], request.knowledge_base_id, project["id"], username=account["username"]) - assistant = resolve_target_assistant(account["id"], request.assistant_id, project["id"]) - profile = model_profile_for_account(account["id"], request.analysis_model_profile_id) - source = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind="video_link", - source_url=request.video_url.strip(), - title=(request.title or "短视频素材").strip(), - metadata={"platform": "video_link"}, - ) - job_row = create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="video_link", - line_type="analysis", - workflow_key="analysis_pipeline", - title=(request.title or "短视频素材").strip(), - language=request.language, - source_url=request.video_url.strip(), - assistant_id=(assistant or {}).get("id"), - content_source_id=source["id"], - artifacts={}, - analysis_model_profile_id=profile["id"], - ) - return job_payload(await trigger_orchestrated_job(job_row)) - - -@app.post("/v2/explore/upload-video") -async def upload_video( - file: UploadFile = File(...), - title: str = Form(""), - project_id: str = Form(""), - knowledge_base_id: str = Form(""), - assistant_id: str = Form(""), - analysis_model_profile_id: str = Form(""), - account: dict[str, Any] = Depends(require_approved), -) -> dict[str, Any]: - project = resolve_target_project(account["id"], project_id or None, username=account["username"]) - kb = resolve_target_kb(account["id"], knowledge_base_id or None, project["id"], username=account["username"]) - assistant = resolve_target_assistant(account["id"], assistant_id or None, project["id"]) - profile = model_profile_for_account(account["id"], analysis_model_profile_id or None) - job_id = make_id("job_upload") - job_dir = JOBS_DIR / job_id - job_dir.mkdir(parents=True, exist_ok=True) - suffix = Path(file.filename or "upload.mp4").suffix or ".mp4" - target_path = job_dir / f"source{suffix}" - with target_path.open("wb") as handle: - shutil.copyfileobj(file.file, handle) - source = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind="upload_video", - source_url=file.filename or "", - title=(title or file.filename or "上传视频素材").strip(), - local_path=str(target_path), - metadata={"filename": file.filename or "", "size_bytes": target_path.stat().st_size}, - ) - job_row = create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="upload_video", - line_type="analysis", - workflow_key="analysis_pipeline", - title=(title or file.filename or "上传视频素材").strip(), - source_url=file.filename or "", - assistant_id=(assistant or {}).get("id"), - content_source_id=source["id"], - artifacts={"uploaded_path": str(target_path)}, - analysis_model_profile_id=profile["id"], - ) - return job_payload(await trigger_orchestrated_job(job_row)) - - -@app.post("/v2/pipelines/real-cut") -async def create_real_cut_job(request: RealCutJobRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - source_job = None - source_job_id = request.source_job_id.strip() - if source_job_id: - source_job = load_owned_job(source_job_id, account["id"]) - - requested_project_id = request.project_id or (source_job.get("project_id", "") if source_job else "") - project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - if source_job and source_job.get("project_id") and source_job.get("project_id") != project["id"]: - raise HTTPException(status_code=400, detail="Source job does not belong to target project") - - kb = ensure_user_kb(account["id"], project["id"], username=account["username"]) - resolved_input_dir = request.input_dir.strip() - staged_payload: dict[str, Any] = {} - if not resolved_input_dir: - if not source_job: - raise HTTPException(status_code=400, detail="input_dir or source_job_id is required") - staged_payload = await stage_real_cut_source_to_cutvideo(source_job) - resolved_input_dir = staged_payload["input_dir"] - - source_url = resolved_input_dir - source_metadata: dict[str, Any] = {"line_type": "real_cut"} - if source_job: - source_url = source_job.get("source_url") or resolved_input_dir - source_metadata["source_job_id"] = source_job["id"] - source_metadata["source_job_type"] = source_job.get("source_type", "") - if staged_payload: - source_metadata["cutvideo_upload"] = staged_payload.get("upload", {}) - source_metadata["source_media_path"] = staged_payload.get("source_path", "") - - source = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind="real_cut_input", - title=request.title.strip(), - source_url=source_url, - local_path=resolved_input_dir, - metadata=source_metadata, - ) - job_row = create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="real_cut", - line_type="real_cut", - workflow_key="real_cut_pipeline", - title=request.title.strip(), - source_url=resolved_input_dir, - content_source_id=source["id"], - artifacts={ - "source_job_id": source_job["id"] if source_job else "", - "source_media_path": staged_payload.get("source_path", ""), - "cutvideo_upload": staged_payload.get("upload", {}), - "cutvideo_request": { - "base_config": request.base_config.strip() or CUTVIDEO_BASE_CONFIG, - "name": request.title.strip(), - "input_dir": resolved_input_dir, - "objective": request.objective, - "target_duration_sec": request.target_duration_sec, - "target_aspect_ratio": request.target_aspect_ratio, - "ideal_segment_duration_sec": request.ideal_segment_duration_sec, - "max_segment_duration_sec": request.max_segment_duration_sec, - "transcript_backend": request.transcript_backend, - "transcript_device": request.transcript_device, - "review_enabled": request.review_enabled, - "dry_run": request.dry_run, - } - }, - ) - return job_payload(await trigger_orchestrated_job(job_row)) - - -@app.post("/v2/pipelines/ai-video") -async def create_ai_video_job(request: AiVideoJobRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - source_job = None - source_project_id = "" - source_kb_id = "" - if request.source_job_id.strip(): - source_job = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (request.source_job_id.strip(), account["id"])) - if not source_job: - raise HTTPException(status_code=404, detail="Source job not found") - if source_job["status"] != "completed": - raise HTTPException(status_code=409, detail="Source job must be completed before AI video generation") - source_project_id = source_job.get("project_id", "") - source_kb_id = source_job.get("knowledge_base_id", "") - - requested_project_id = request.project_id or source_project_id - project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) - kb = resolve_target_kb(account["id"], request.knowledge_base_id or source_kb_id or None, project["id"], username=account["username"]) - assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) - source = create_content_source( - account_id=account["id"], - project_id=project["id"], - source_kind="ai_video_brief", - title=request.title.strip(), - metadata={"source_job_id": request.source_job_id.strip()}, - ) - job_row = create_job_record( - account_id=account["id"], - project_id=project["id"], - knowledge_base_id=kb["id"], - source_type="ai_video", - line_type="ai_video", - workflow_key="ai_video_pipeline", - title=request.title.strip(), - assistant_id=(assistant or {}).get("id"), - content_source_id=source["id"], - artifacts={ - "brief": request.brief, - "style": request.style, - "shots": request.shots, - "image_provider": request.image_provider, - "image_model": request.image_model, - "video_provider": request.video_provider, - "video_model": request.video_model, - "aspect_ratio": request.aspect_ratio, - "duration": request.duration, - "source_job_id": request.source_job_id.strip(), - }, - ) - return job_payload(await trigger_orchestrated_job(job_row)) - - -@app.get("/v2/assistants") -def list_assistants(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: - return [assistant_payload(row) for row in db.fetch_all("SELECT * FROM assistants WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))] - - -@app.post("/v2/assistants") -def create_assistant(request: AssistantCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - assistant_id = make_id("assistant") - now = utc_now() - project = resolve_target_project(account["id"], request.project_id or None, username=account["username"]) - model_profile = model_profile_for_account(account["id"], request.model_profile_id or None) - db.execute( - """ - INSERT INTO assistants (id, user_id, project_id, name, description, system_prompt, generation_goal, config_json, model_profile_id, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, '{}', ?, ?, ?) - """, - ( - assistant_id, - account["id"], - project["id"], - request.name.strip(), - request.description.strip(), - request.system_prompt.strip(), - request.generation_goal.strip(), - model_profile["id"], - now, - now, - ), - ) - for kb_id in request.knowledge_base_ids: - kb = db.fetch_one("SELECT id FROM knowledge_bases WHERE id = ? AND user_id = ?", (kb_id, account["id"])) - if kb: - db.execute("INSERT OR IGNORE INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)", (assistant_id, kb_id)) - return assistant_payload(db.fetch_one("SELECT * FROM assistants WHERE id = ?", (assistant_id,))) - - -@app.patch("/v2/assistants/{assistant_id}") -def update_assistant(assistant_id: str, request: AssistantUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - current = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (assistant_id, account["id"])) - if not current: - raise HTTPException(status_code=404, detail="Assistant not found") - project_id = current.get("project_id", "") - if request.project_id is not None: - project_id = resolve_target_project(account["id"], request.project_id, username=account["username"])["id"] - payload = { - "name": request.name if request.name is not None else current["name"], - "description": request.description if request.description is not None else current.get("description", ""), - "system_prompt": request.system_prompt if request.system_prompt is not None else current.get("system_prompt", ""), - "generation_goal": request.generation_goal if request.generation_goal is not None else current.get("generation_goal", ""), - "project_id": project_id, - "model_profile_id": current.get("model_profile_id", ""), - } - if request.model_profile_id is not None: - payload["model_profile_id"] = model_profile_for_account(account["id"], request.model_profile_id)["id"] - db.execute( - """ - UPDATE assistants - SET project_id = ?, name = ?, description = ?, system_prompt = ?, generation_goal = ?, model_profile_id = ?, updated_at = ? - WHERE id = ? - """, - ( - payload["project_id"], - payload["name"], - payload["description"], - payload["system_prompt"], - payload["generation_goal"], - payload["model_profile_id"], - utc_now(), - assistant_id, - ), - ) - if request.knowledge_base_ids is not None: - db.execute("DELETE FROM assistant_knowledge_bases WHERE assistant_id = ?", (assistant_id,)) - for kb_id in request.knowledge_base_ids: - kb = db.fetch_one("SELECT id FROM knowledge_bases WHERE id = ? AND user_id = ?", (kb_id, account["id"])) - if kb: - db.execute("INSERT OR IGNORE INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)", (assistant_id, kb_id)) - return assistant_payload(db.fetch_one("SELECT * FROM assistants WHERE id = ?", (assistant_id,))) - - -@app.get("/v2/agents") -def list_agents(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]: - return list_assistants(account) - - -@app.post("/v2/agents") -def create_agent(request: AssistantCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - return create_assistant(request, account) - - -@app.patch("/v2/agents/{assistant_id}") -def update_agent(assistant_id: str, request: AssistantUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - return update_assistant(assistant_id, request, account) - - -@app.post("/v2/assistants/{assistant_id}/generate") -async def generate_copy(assistant_id: str, request: GenerateCopyRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: - assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (assistant_id, account["id"])) - if not assistant: - raise HTTPException(status_code=404, detail="Assistant not found") - kb_ids = request.knowledge_base_ids or [row["knowledge_base_id"] for row in db.fetch_all("SELECT knowledge_base_id FROM assistant_knowledge_bases WHERE assistant_id = ?", (assistant_id,))] - used_documents: list[dict[str, Any]] = [] - excerpts: list[str] = [] - for kb_id in kb_ids: - docs = db.fetch_all("SELECT * FROM knowledge_documents WHERE knowledge_base_id = ? ORDER BY created_at DESC LIMIT 3", (kb_id,)) - for doc in docs: - payload = document_payload(doc) - used_documents.append(payload) - excerpt = payload["combined_text"] or payload["style_summary"] or payload["transcript_text"] - excerpts.append(f"[{payload['title']}]\n{excerpt[:1200]}") - prompt_excerpt = "\n\n".join(excerpts)[:6000] - system_prompt = assistant.get("system_prompt") or "你是文案助手。" - generation_goal = assistant.get("generation_goal") or "生成短视频文案。" - user_prompt = ( - f"任务目标:{generation_goal}\n" - f"创作需求:{request.brief}\n" - f"平台:{request.platform}\n" - f"受众:{request.audience}\n" - f"额外要求:{request.extra_requirements or '无'}\n\n" - f"参考知识库素材:\n{prompt_excerpt or '暂无参考素材,请按通用短视频结构输出。'}\n\n" - "请输出完整文案,包含标题、开场钩子、正文结构和结尾行动指令。" - ) - profile = model_profile_for_account(account["id"], assistant.get("model_profile_id") or None) - content = await call_model(profile, system_prompt, user_prompt, temperature=0.7) - return { - "assistant_id": assistant_id, - "knowledge_base_ids": kb_ids, - "content": content, - "prompt_excerpt": prompt_excerpt[:2000], - "used_documents": used_documents, - } - - -def load_owned_job(job_id: str, account_id: str) -> dict[str, Any]: - row = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account_id)) - if not row: - raise HTTPException(status_code=404, detail="Job not found") - return row - - -def load_owned_content_source(source_id: str, account_id: str) -> dict[str, Any]: - row = db.fetch_one("SELECT * FROM content_sources WHERE id = ? AND user_id = ?", (source_id, account_id)) - if not row: - raise HTTPException(status_code=404, detail="Content source not found") - return row - - -def load_owned_review(review_id: str, account_id: str) -> dict[str, Any]: - row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ? AND user_id = ?", (review_id, account_id)) - if not row: - raise HTTPException(status_code=404, detail="Review not found") - return row - - -def load_internal_job(job_id: str) -> dict[str, Any]: - row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,)) - if not row: - raise HTTPException(status_code=404, detail="Job not found") - return row - - -def parse_job_artifacts(row: dict[str, Any]) -> dict[str, Any]: - raw = row.get("artifacts_json") or "{}" - try: - return json.loads(raw) - except json.JSONDecodeError: - return {} - - -def parse_job_result(row: dict[str, Any]) -> dict[str, Any]: - raw = row.get("result_json") or "{}" - try: - data = json.loads(raw) - return data if isinstance(data, dict) else {} - except json.JSONDecodeError: - return {} - - -def extract_source_storyboards(source_job: dict[str, Any] | None) -> list[dict[str, Any]]: - if not source_job: - return [] - return coerce_storyboards(parse_job_result(source_job).get("storyboards")) - - -def resolve_internal_job_id(request: InternalStepRequest | None, query_job_id: str = "") -> str: - resolved = (query_job_id or "").strip() - if not resolved and request is not None: - resolved = ( - request.job_id - or request.jobId - or str(request.payload.get("job_id") or request.payload.get("jobId") or "") - ).strip() - return resolved - - -def load_step_job(request: InternalStepRequest | None, query_job_id: str, workflow_key: str) -> dict[str, Any]: - resolved_job_id = resolve_internal_job_id(request, query_job_id) - if resolved_job_id: - return load_internal_job(resolved_job_id) - row = db.fetch_one( - """ - SELECT * FROM jobs - WHERE workflow_key = ? AND status IN ('pending', 'queued') - ORDER BY created_at ASC - LIMIT 1 - """, - (workflow_key,), - ) - if not row: - raise HTTPException(status_code=400, detail="job_id is required") - return row - - -@app.get("/internal/jobs/{job_id}/context") -def internal_job_context(job_id: str, _: bool = Depends(require_orchestrator)) -> dict[str, Any]: - return job_context_payload(load_internal_job(job_id)) - - -@app.post("/internal/jobs/steps/analyze") -async def internal_run_analysis( - request: InternalStepRequest | None = Body(default=None), - job_id: str = Query(default=""), - _: bool = Depends(require_orchestrator), -) -> dict[str, Any]: - row = load_step_job(request, job_id, "analysis_pipeline") - await process_job(row["id"]) - return job_context_payload(load_internal_job(row["id"])) - - -@app.post("/internal/jobs/steps/content-source-sync") -async def internal_content_source_sync( - request: InternalStepRequest | None = Body(default=None), - job_id: str = Query(default=""), - _: bool = Depends(require_orchestrator), -) -> dict[str, Any]: - row = load_step_job(request, job_id, "content_source_sync_pipeline") - artifacts = parse_job_artifacts(row) - source_url = str(artifacts.get("source_account_url") or row.get("source_url") or "").strip() - if not source_url: - raise HTTPException(status_code=400, detail="Content source sync job is missing source URL") - max_items = max(1, min(int(artifacts.get("max_items") or 5), 20)) - skip_existing = bool(artifacts.get("skip_existing", True)) - auto_trigger_analysis = bool(artifacts.get("auto_trigger_analysis", True)) - - update_job_state( - row["id"], - status="processing", - provider_name="collector", - provider_task_id=row["id"], - result={"sync_started": True}, - ) - - try: - discovered_items, debug_payload = discover_account_video_links(source_url, max_items) - child_jobs: list[dict[str, Any]] = [] - queued_jobs: list[dict[str, Any]] = [] - skipped_items: list[dict[str, Any]] = [] - - for index, item in enumerate(discovered_items, start=1): - video_url = str(item.get("video_url") or "").strip() - if not video_url: - continue - existing_row = db.fetch_one( - """ - SELECT * FROM jobs - WHERE user_id = ? AND project_id = ? AND source_type = 'video_link' AND source_url = ? - ORDER BY created_at DESC - LIMIT 1 - """, - (row["user_id"], row.get("project_id", ""), video_url), - ) - if existing_row and skip_existing: - skipped_items.append( - { - "video_url": video_url, - "title": item.get("title") or existing_row.get("title") or "短视频素材", - "existing_job_id": existing_row["id"], - "existing_status": existing_row.get("status", ""), - } - ) - continue - - content_source = create_content_source( - account_id=row["user_id"], - project_id=row.get("project_id", ""), - source_kind="video_link", - platform=str(artifacts.get("platform") or infer_platform_from_url(video_url)), - handle=str(artifacts.get("handle") or ""), - source_url=video_url, - title=str(item.get("title") or f"内容源视频 {index}"), - metadata={ - "origin_content_source_id": row.get("content_source_id", ""), - "origin_sync_job_id": row["id"], - "external_id": str(item.get("external_id") or ""), - "source_account_url": source_url, - }, - ) - child_row = create_job_record( - account_id=row["user_id"], - project_id=row.get("project_id", ""), - parent_job_id=row["id"], - knowledge_base_id=row["knowledge_base_id"], - source_type="video_link", - line_type="analysis", - workflow_key="analysis_pipeline", - title=str(item.get("title") or f"内容源视频 {index}"), - language=row.get("language", "auto"), - source_url=video_url, - assistant_id=row.get("assistant_id"), - content_source_id=content_source["id"], - artifacts={ - "origin_content_source_id": row.get("content_source_id", ""), - "origin_sync_job_id": row["id"], - "source_account_url": source_url, - }, - analysis_model_profile_id=row.get("analysis_model_profile_id", ""), - ) - child_jobs.append(job_payload(child_row)) - if auto_trigger_analysis: - queued_child = await trigger_orchestrated_job(child_row) - queued_jobs.append(job_payload(queued_child)) - - if row.get("content_source_id"): - update_content_source_metadata( - row["content_source_id"], - { - "last_sync_job_id": row["id"], - "last_sync_completed_at": utc_now(), - "last_discovered_count": len(discovered_items), - "last_enqueued_job_ids": [item["id"] for item in queued_jobs] or [item["id"] for item in child_jobs], - "last_skipped_existing_count": len(skipped_items), - "last_source_account_url": source_url, - "last_sync_error": "", - }, - ) - - updated = update_job_state( - row["id"], - status="completed", - provider_name="collector", - provider_task_id=row["id"], - artifacts={ - **debug_payload, - "discovered_videos": discovered_items, - "skipped_existing": skipped_items, - "child_job_ids": [item["id"] for item in child_jobs], - "queued_job_ids": [item["id"] for item in queued_jobs], - }, - result={ - "discovered_count": len(discovered_items), - "queued_count": len(queued_jobs) if auto_trigger_analysis else len(child_jobs), - "skipped_count": len(skipped_items), - "child_jobs": queued_jobs or child_jobs, - "skipped_existing": skipped_items, - }, - ) - return job_context_payload(updated) - except HTTPException as exc: - error = str(exc.detail) - except Exception as exc: - error = str(exc) - - if row.get("content_source_id"): - update_content_source_metadata( - row["content_source_id"], - { - "last_sync_job_id": row["id"], - "last_sync_completed_at": utc_now(), - "last_sync_error": error[:500], - "last_source_account_url": source_url, - }, - ) - updated = update_job_state( - row["id"], - status="failed", - error=error[:500], - provider_name="collector", - provider_task_id=row["id"], - ) - return job_context_payload(updated) - - -@app.post("/internal/jobs/steps/real-cut/submit") -async def internal_real_cut_submit( - request: InternalStepRequest | None = Body(default=None), - job_id: str = Query(default=""), - _: bool = Depends(require_orchestrator), -) -> dict[str, Any]: - if not cutvideo_client.enabled: - raise HTTPException(status_code=503, detail="CutVideo is not configured") - row = load_step_job(request, job_id, "real_cut_pipeline") - artifacts = parse_job_artifacts(row) - cutvideo_request = artifacts.get("cutvideo_request") or {} - if not isinstance(cutvideo_request, dict): - raise HTTPException(status_code=400, detail="Invalid cutvideo request payload") - append_job_event(row["id"], "cutvideo.submit.requested", cutvideo_request) - submit_result = await cutvideo_client.submit_job(cutvideo_request) - task_id = str(submit_result.get("task_id") or "") - updated = update_job_state( - row["id"], - status="processing", - provider_name="cutvideo", - provider_task_id=task_id, - result={"cutvideo_submit": submit_result}, - ) - return job_context_payload(updated) - - -@app.post("/internal/jobs/steps/real-cut/poll") -async def internal_real_cut_poll( - request: InternalStepRequest | None = Body(default=None), - job_id: str = Query(default=""), - _: bool = Depends(require_orchestrator), -) -> dict[str, Any]: - row = load_step_job(request, job_id, "real_cut_pipeline") - if not row.get("provider_task_id"): - raise HTTPException(status_code=409, detail="CutVideo task has not been submitted") - task_payload = await cutvideo_client.get_task(row["provider_task_id"]) - status = str(task_payload.get("status") or "").lower() - run_payload: dict[str, Any] = {} - artifacts: dict[str, Any] = {"cutvideo_task": task_payload} - next_status = row["status"] - error = row.get("error", "") - if status == "completed": - next_status = "completed" - run_id = str(task_payload.get("run_id") or "") - if run_id: - run_payload = await cutvideo_client.get_run(run_id) - artifacts["cutvideo_run"] = run_payload - elif status == "failed": - next_status = "failed" - error = str(task_payload.get("error") or "CutVideo task failed") - else: - next_status = "processing" - - updated = update_job_state( - row["id"], - status=next_status, - error=error, - provider_name="cutvideo", - provider_task_id=row["provider_task_id"], - artifacts=artifacts, - result={"cutvideo_run": run_payload} if run_payload else {"cutvideo_task": task_payload}, - ) - return job_context_payload(updated) - - -@app.post("/internal/jobs/steps/real-cut/run") -async def internal_real_cut_run( - request: InternalStepRequest | None = Body(default=None), - job_id: str = Query(default=""), - _: bool = Depends(require_orchestrator), -) -> dict[str, Any]: - if not cutvideo_client.enabled: - raise HTTPException(status_code=503, detail="CutVideo is not configured") - - row = load_step_job(request, job_id, "real_cut_pipeline") - if not row.get("provider_task_id"): - artifacts = parse_job_artifacts(row) - cutvideo_request = artifacts.get("cutvideo_request") or {} - if not isinstance(cutvideo_request, dict): - raise HTTPException(status_code=400, detail="Invalid cutvideo request payload") - submit_result = await cutvideo_client.submit_job(cutvideo_request) - row = update_job_state( - row["id"], - status="processing", - provider_name="cutvideo", - provider_task_id=str(submit_result.get("task_id") or ""), - result={"cutvideo_submit": submit_result}, - ) - - deadline = now_ts() + CUTVIDEO_MAX_WAIT_SEC - while True: - run_fallback: dict[str, Any] | None = None - try: - task_payload = await cutvideo_client.get_task(row["provider_task_id"]) - except httpx.HTTPStatusError as exc: - if exc.response is None or exc.response.status_code != 404: - raise - run_fallback = await find_cutvideo_run_for_job(row) - if run_fallback and cutvideo_run_has_materialized_outputs(run_fallback["detail"]): - updated = update_job_state( - row["id"], - status="completed", - provider_name="cutvideo", - provider_task_id=row["provider_task_id"], - artifacts={ - "cutvideo_task": { - "task_id": row["provider_task_id"], - "status": "missing", - "compatibility_mode": "run-fallback", - "error": "Task not found after submit; using run fallback", - }, - "cutvideo_run_lookup": run_fallback["summary"], - "cutvideo_run": run_fallback["detail"], - }, - result={ - **parse_job_result(row), - "cutvideo_task": { - "task_id": row["provider_task_id"], - "status": "missing", - "compatibility_mode": "run-fallback", - "error": "Task not found after submit; using run fallback", - }, - "cutvideo_run_lookup": run_fallback["summary"], - "cutvideo_run": run_fallback["detail"], - }, - ) - return job_context_payload(updated) - task_payload = { - "task_id": row["provider_task_id"], - "status": "missing", - "error": "Task not found", - } - status = str(task_payload.get("status") or "").lower() - if status == "completed": - run_payload: dict[str, Any] = {} - run_id = str(task_payload.get("run_id") or "") - if run_id: - run_payload = await cutvideo_client.get_run(run_id) - updated = update_job_state( - row["id"], - status="completed", - provider_name="cutvideo", - provider_task_id=row["provider_task_id"], - artifacts={"cutvideo_task": task_payload, "cutvideo_run": run_payload}, - result={"cutvideo_task": task_payload, "cutvideo_run": run_payload}, - ) - return job_context_payload(updated) - if status == "failed": - updated = update_job_state( - row["id"], - status="failed", - error=str(task_payload.get("error") or "CutVideo task failed"), - provider_name="cutvideo", - provider_task_id=row["provider_task_id"], - artifacts={"cutvideo_task": task_payload}, - result={"cutvideo_task": task_payload}, - ) - return job_context_payload(updated) - if now_ts() >= deadline: - updated = update_job_state( - row["id"], - status="failed", - error="CutVideo task timed out", - provider_name="cutvideo", - provider_task_id=row["provider_task_id"], - artifacts={"cutvideo_task": task_payload}, - result={"cutvideo_task": task_payload}, - ) - return job_context_payload(updated) - await asyncio.sleep(CUTVIDEO_POLL_INTERVAL_SEC) - row = load_internal_job(row["id"]) - - -@app.post("/internal/jobs/steps/ai-video/render") -async def internal_ai_video_render( - request: InternalStepRequest | None = Body(default=None), - job_id: str = Query(default=""), - _: bool = Depends(require_orchestrator), -) -> dict[str, Any]: - if not huobao_client.enabled: - raise HTTPException(status_code=503, detail="Huobao is not configured") - - row = load_step_job(request, job_id, "ai_video_pipeline") - artifacts = parse_job_artifacts(row) - assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) if row.get("assistant_id") else None - source_job = None - source_storyboards: list[dict[str, Any]] = [] - source_job_id = str(artifacts.get("source_job_id") or "").strip() - if source_job_id: - source_job = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (source_job_id, row["user_id"])) - if source_job: - source_storyboards = extract_source_storyboards(source_job) - - if source_storyboards: - storyboard_items = source_storyboards[: max(int(artifacts.get("shots") or 4), 1)] - else: - profile = model_profile_for_account(row["user_id"], row.get("analysis_model_profile_id") or None) - blueprint = await generate_content_blueprint( - profile, - title=row["title"], - transcript_text=str(artifacts.get("brief") or row["title"]), - style_summary=str(artifacts.get("style") or ""), - agent_prompt=(assistant or {}).get("system_prompt", ""), - generation_goal=(assistant or {}).get("generation_goal", "") or "生成适合视频模型的分镜与提示词", - ) - storyboard_items = coerce_storyboards(blueprint.get("storyboards"))[: max(int(artifacts.get("shots") or 4), 1)] - - if not storyboard_items: - raise HTTPException(status_code=400, detail="No storyboards available for AI video rendering") - - drama_payload = await huobao_client.create_drama( - { - "title": row["title"], - "description": str(artifacts.get("brief") or row["title"]), - "style": str(artifacts.get("style") or "realistic"), - "genre": "short_video", - "tags": "storyforge", - } - ) - drama_id = str(drama_payload.get("id") or "") - if not drama_id: - raise RuntimeError("Huobao did not return drama id") - - update_job_state( - row["id"], - status="processing", - provider_name="huobao-drama", - provider_task_id=drama_id, - result={"huobao_drama": drama_payload}, - ) - - rendered_scenes: list[dict[str, Any]] = [] - image_provider = str(artifacts.get("image_provider") or "openai") - image_model = str(artifacts.get("image_model") or "") - video_provider = str(artifacts.get("video_provider") or "doubao") - video_model = str(artifacts.get("video_model") or "") - aspect_ratio = str(artifacts.get("aspect_ratio") or "9:16") - image_size = huobao_image_size_for_aspect_ratio(aspect_ratio) - duration = int(artifacts.get("duration") or 5) - style = str(artifacts.get("style") or "realistic") - - for idx, storyboard in enumerate(storyboard_items, start=1): - first_prompt = str(storyboard.get("first_frame_prompt") or storyboard.get("visual") or storyboard.get("title") or row["title"]) - last_prompt = str(storyboard.get("last_frame_prompt") or storyboard.get("visual") or storyboard.get("title") or row["title"]) - video_prompt = str(storyboard.get("video_prompt") or storyboard.get("narration") or storyboard.get("title") or row["title"]) - - first_image = await huobao_client.generate_image( - { - "drama_id": drama_id, - "image_type": "storyboard", - "frame_type": "first", - "prompt": first_prompt, - "provider": image_provider, - "model": image_model, - "size": image_size, - "style": style, - } - ) - last_image = await huobao_client.generate_image( - { - "drama_id": drama_id, - "image_type": "storyboard", - "frame_type": "last", - "prompt": last_prompt, - "provider": image_provider, - "model": image_model, - "size": image_size, - "style": style, - } - ) - - first_ready = await wait_for_huobao_image(str(first_image.get("id") or "")) - last_ready = await wait_for_huobao_image(str(last_image.get("id") or "")) - if str(first_ready.get("status") or "").lower() != "completed": - raise RuntimeError(f"First frame generation failed for scene {idx}") - if str(last_ready.get("status") or "").lower() != "completed": - raise RuntimeError(f"Last frame generation failed for scene {idx}") - - first_frame_url = first_ready.get("image_url") or first_ready.get("local_path") - last_frame_url = last_ready.get("image_url") or last_ready.get("local_path") - if not first_frame_url or not last_frame_url: - raise RuntimeError(f"Huobao image output missing for scene {idx}") - - video_payload = await huobao_client.generate_video( - { - "drama_id": drama_id, - "prompt": video_prompt, - "provider": video_provider, - "model": video_model, - "reference_mode": "first_last", - "first_frame_url": first_frame_url, - "last_frame_url": last_frame_url, - "aspect_ratio": aspect_ratio, - "duration": duration, - "style": style, - } - ) - video_ready = await wait_for_huobao_video(str(video_payload.get("id") or "")) - if str(video_ready.get("status") or "").lower() != "completed": - raise RuntimeError(f"Video generation failed for scene {idx}") - - rendered_scenes.append( - { - "shot_index": storyboard.get("shot_index", idx), - "title": storyboard.get("title", f"镜头{idx}"), - "narration": storyboard.get("narration", ""), - "first_frame": first_ready, - "last_frame": last_ready, - "video": video_ready, - } - ) - - updated = update_job_state( - row["id"], - status="completed", - provider_name="huobao-drama", - provider_task_id=drama_id, - artifacts={ - "huobao_drama_id": drama_id, - "source_job_id": source_job_id, - }, - result={ - "huobao_drama": drama_payload, - "rendered_scenes": rendered_scenes, - "storyboards": storyboard_items, - }, - ) - return job_context_payload(updated) - - -@app.post("/internal/jobs/{job_id}/status") -def internal_update_job_status(job_id: str, request: JobStatusUpdateRequest, _: bool = Depends(require_orchestrator)) -> dict[str, Any]: - updated = update_job_state( - job_id, - status=request.status, - error=request.error, - provider_name=request.provider_name or None, - provider_task_id=request.provider_task_id or None, - artifacts=request.artifacts, - result=request.result, - ) - return job_context_payload(updated) - - -@app.get("/v2/admin/accounts/pending") -def pending_accounts(admin: dict[str, Any] = Depends(require_super_admin)) -> list[dict[str, Any]]: - rows = db.fetch_all("SELECT * FROM accounts WHERE approval_status = 'pending' ORDER BY created_at ASC") - return [normalize_account(row) for row in rows] - - -@app.post("/v2/admin/accounts/{account_id}/approve") -def approve_account(account_id: str, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]: - account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) - if not account: - raise HTTPException(status_code=404, detail="Account not found") - db.execute( - "UPDATE accounts SET approval_status = 'approved', approved_by = ?, approved_at = ?, updated_at = ? WHERE id = ?", - (admin["id"], utc_now(), utc_now(), account_id), - ) - approved = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) - project = ensure_default_project(account_id, username=approved["username"]) - ensure_user_kb(account_id, project["id"], username=approved["username"]) - return {"saved": True, "account": normalize_account(approved)} - - -@app.post("/v2/admin/accounts/{account_id}/reject") -def reject_account(account_id: str, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]: - account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) - if not account: - raise HTTPException(status_code=404, detail="Account not found") - db.execute( - "UPDATE accounts SET approval_status = 'rejected', approved_by = ?, approved_at = ?, updated_at = ? WHERE id = ?", - (admin["id"], utc_now(), utc_now(), account_id), - ) - rejected = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,)) - return {"saved": True, "account": normalize_account(rejected)} - - -@app.get("/api/v1/app/update/latest") -def latest_update( - platform: str = Query(default="android"), - channel: str = Query(default="stable"), - currentVersionCode: int | None = Query(default=None), -) -> dict[str, Any]: - row = db.fetch_one( - "SELECT * FROM app_updates WHERE platform = ? AND channel = ? AND is_active = 1 ORDER BY version_code DESC, published_at DESC LIMIT 1", - (platform, channel), - ) - if not row: - return { - "platform": platform, - "channel": channel, - "hasUpdate": False, - "latestVersionCode": currentVersionCode or 0, - "latestVersionName": "", - "minSupportedCode": 0, - "downloadUrl": "", - "apkSha256": "", - "releaseNotes": "", - "forceUpdate": False, - "publishedAt": 0, - } - latest_version_code = int(row["version_code"]) - return { - "platform": row["platform"], - "channel": row["channel"], - "hasUpdate": currentVersionCode is None or latest_version_code > currentVersionCode, - "latestVersionCode": latest_version_code, - "latestVersionName": row["version_name"], - "minSupportedCode": int(row["min_supported_code"]), - "downloadUrl": row["apk_url"], - "apkSha256": row.get("apk_sha256", ""), - "releaseNotes": row.get("notes", ""), - "forceUpdate": bool(row.get("force_update", 0)), - "publishedAt": int(row.get("published_at", 0)), - } - - -@app.post("/v2/admin/app/update/publish") -def publish_app_update(request: PublishAppUpdateRequest, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]: - db.execute( - "UPDATE app_updates SET is_active = 0 WHERE platform = ? AND channel = ?", - (request.platform, request.channel), - ) - db.execute( - """ - INSERT INTO app_updates ( - platform, channel, version_code, version_name, min_supported_code, - apk_url, apk_sha256, notes, force_update, is_active, published_at, created_by - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - request.platform, - request.channel, - request.versionCode, - request.versionName, - request.minSupportedCode, - request.apkUrl, - request.apkSha256, - request.notes, - 1 if request.forceUpdate else 0, - 1 if request.isActive else 0, - now_ts(), - admin["id"], - ), - ) - row = db.fetch_one( - """ - SELECT id - FROM app_updates - WHERE platform = ? AND channel = ? AND version_code = ? - ORDER BY id DESC - LIMIT 1 - """, - (request.platform, request.channel, request.versionCode), - ) - return {"saved": True, "action": "published", "updateId": row["id"] if row else 0} - - -register_douyin_routes(app, sys.modules[__name__]) -register_oneliner_routes(app, sys.modules[__name__]) +register_douyin_routes(app, core) +register_domestic_platform_routes(app, core, platform="xiaohongshu", label="小红书") +register_domestic_platform_routes(app, core, platform="bilibili", label="哔哩哔哩") +register_domestic_platform_routes(app, core, platform="kuaishou", label="快手") +register_domestic_platform_routes(app, core, platform="wechat_video", label="微信视频号") +register_oneliner_routes(app, core) +app.openapi_schema = None diff --git a/collector-service/app/wechat_video_features.py b/collector-service/app/wechat_video_features.py new file mode 100644 index 0000000..c00c061 --- /dev/null +++ b/collector-service/app/wechat_video_features.py @@ -0,0 +1,531 @@ +from __future__ import annotations + +import json +from collections import Counter +from typing import Any + +from fastapi import Depends, HTTPException, Query +from pydantic import BaseModel, Field + +# This module is intentionally self-contained because the task only allows +# writes to a new file. To activate it, import `register_wechat_video_routes` +# from `app.main` and call it with `(app, core)`. + +WECHAT_VIDEO_PLATFORM = "wechat_video" +ACCOUNT_SOURCE_KIND = "creator_account" +YOUTUBE_HOST_MARKERS = ("youtube.com", "youtu.be") + + +class WechatVideoAccountSyncRequest(BaseModel): + project_id: str = "" + knowledge_base_id: str = "" + assistant_id: str = "" + content_source_id: str = "" + profile_url: str = "" + handle: str = "" + title: str = "" + analysis_model_profile_id: str = "" + language: str = "auto" + max_items: int = Field(default=5, ge=1, le=20) + skip_existing: bool = True + auto_trigger_analysis: bool = True + + +class WechatVideoReviewCreateRequest(BaseModel): + project_id: str = "" + source_job_id: str = "" + assistant_id: str = "" + title: str = "" + content_type: str = "video" + publish_url: str = "" + published_at: str = "" + metrics: dict[str, Any] = Field(default_factory=dict) + verdict: str = "" + highlights: str = "" + next_actions: str = "" + notes: str = "" + + +def register_wechat_video_routes(app: Any, legacy: Any) -> None: + if getattr(app.state, "wechat_video_routes_registered", False): + return + app.state.wechat_video_routes_registered = True + + def _account_not_found() -> HTTPException: + return HTTPException(status_code=404, detail="WeChat Video account not found") + + def _normalize_wechat_source_url(source_url: str) -> str: + normalized = source_url.strip() + if not normalized: + return "" + lowered = normalized.lower() + if any(marker in lowered for marker in YOUTUBE_HOST_MARKERS): + raise HTTPException(status_code=400, detail="YouTube is not supported by wechat_video routes") + inferred = legacy.infer_platform_from_url(normalized) + if inferred != WECHAT_VIDEO_PLATFORM: + raise HTTPException( + status_code=400, + detail="wechat_video routes only accept channels.weixin.qq.com or mp.weixin.qq.com/s URLs", + ) + return normalized + + def _require_owned_account(source_id: str, user_id: str) -> dict[str, Any]: + row = legacy.load_owned_content_source(source_id, user_id) + if row.get("platform") != WECHAT_VIDEO_PLATFORM or row.get("source_kind") != ACCOUNT_SOURCE_KIND: + raise _account_not_found() + return row + + def _list_sync_job_rows(source_row: dict[str, Any], *, limit: int = 50) -> list[dict[str, Any]]: + return legacy.db.fetch_all( + """ + SELECT * + FROM jobs + WHERE user_id = ? AND content_source_id = ? AND source_type = 'content_source_sync' + ORDER BY created_at DESC + LIMIT ? + """, + (source_row["user_id"], source_row["id"], max(1, limit)), + ) + + def _list_video_job_rows(source_row: dict[str, Any], *, limit: int = 200) -> list[dict[str, Any]]: + sync_rows = _list_sync_job_rows(source_row, limit=max(1, limit)) + if not sync_rows: + return [] + parent_job_ids = [row["id"] for row in sync_rows] + placeholders = ",".join("?" for _ in parent_job_ids) + query = f""" + SELECT * + FROM jobs + WHERE user_id = ? AND source_type = 'video_link' AND parent_job_id IN ({placeholders}) + ORDER BY created_at DESC + """ + params: tuple[Any, ...] = (source_row["user_id"], *parent_job_ids) + return legacy.db.fetch_all(query, params)[: max(1, limit)] + + def _dedupe_latest_video_jobs(rows: list[dict[str, Any]]) -> list[dict[str, Any]]: + deduped: list[dict[str, Any]] = [] + seen_urls: set[str] = set() + for row in rows: + source_url = str(row.get("source_url") or "").strip() + if not source_url or source_url in seen_urls: + continue + seen_urls.add(source_url) + deduped.append(row) + return deduped + + def _fetch_content_source(source_id: str) -> dict[str, Any] | None: + if not source_id: + return None + return legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,)) + + def _load_related_reviews(source_row: dict[str, Any], video_rows: list[dict[str, Any]], *, limit: int = 50) -> list[dict[str, Any]]: + candidate_rows = legacy.db.fetch_all( + """ + SELECT * + FROM publish_reviews + WHERE user_id = ? AND platform = ? + ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC + LIMIT 400 + """, + (source_row["user_id"], WECHAT_VIDEO_PLATFORM), + ) + job_ids = {row["id"] for row in video_rows} + video_urls = {str(row.get("source_url") or "").strip() for row in video_rows if row.get("source_url")} + results: list[dict[str, Any]] = [] + for row in candidate_rows: + source_job_id = str(row.get("source_job_id") or "").strip() + publish_url = str(row.get("publish_url") or "").strip() + if source_job_id and source_job_id in job_ids: + results.append(row) + continue + if publish_url and publish_url in video_urls: + results.append(row) + return results[: max(1, limit)] + + def _load_related_documents(video_rows: list[dict[str, Any]], *, limit: int = 30) -> list[dict[str, Any]]: + kb_ids = {str(row.get("knowledge_base_id") or "").strip() for row in video_rows if row.get("knowledge_base_id")} + video_urls = {str(row.get("source_url") or "").strip() for row in video_rows if row.get("source_url")} + documents: list[dict[str, Any]] = [] + seen_document_ids: set[str] = set() + for kb_id in kb_ids: + for row in legacy.db.fetch_all( + """ + SELECT * + FROM knowledge_documents + WHERE knowledge_base_id = ? + ORDER BY created_at DESC + LIMIT 200 + """, + (kb_id,), + ): + if row["id"] in seen_document_ids: + continue + if str(row.get("source_url") or "").strip() not in video_urls: + continue + seen_document_ids.add(row["id"]) + documents.append(row) + if len(documents) >= limit: + return documents + return documents + + def _build_review_maps(review_rows: list[dict[str, Any]]) -> tuple[dict[str, dict[str, Any]], dict[str, dict[str, Any]]]: + by_job_id: dict[str, dict[str, Any]] = {} + by_url: dict[str, dict[str, Any]] = {} + for row in review_rows: + source_job_id = str(row.get("source_job_id") or "").strip() + publish_url = str(row.get("publish_url") or "").strip() + if source_job_id and source_job_id not in by_job_id: + by_job_id[source_job_id] = row + if publish_url and publish_url not in by_url: + by_url[publish_url] = row + return by_job_id, by_url + + def _build_document_map(document_rows: list[dict[str, Any]]) -> dict[str, dict[str, Any]]: + by_url: dict[str, dict[str, Any]] = {} + for row in document_rows: + source_url = str(row.get("source_url") or "").strip() + if source_url and source_url not in by_url: + by_url[source_url] = row + return by_url + + def _build_account_payload(source_row: dict[str, Any]) -> dict[str, Any]: + payload = legacy.content_source_payload(source_row) + metadata = payload.get("metadata") or {} + latest_sync_job = None + last_sync_job_id = str(metadata.get("last_sync_job_id") or "") + if last_sync_job_id: + latest_sync_job = legacy.db.fetch_one("SELECT * FROM jobs WHERE id = ?", (last_sync_job_id,)) + payload["platform_label"] = legacy.platform_label(WECHAT_VIDEO_PLATFORM) + payload["last_sync_job_id"] = last_sync_job_id + payload["last_sync_completed_at"] = str(metadata.get("last_sync_completed_at") or "") + payload["last_sync_error"] = str(metadata.get("last_sync_error") or "") + payload["last_sync_status"] = str((latest_sync_job or {}).get("status") or "") + payload["sync_mode"] = str(metadata.get("sync_mode") or "recent_uploads") + return payload + + def _build_video_item( + job_row: dict[str, Any], + review_by_job_id: dict[str, dict[str, Any]], + review_by_url: dict[str, dict[str, Any]], + document_by_url: dict[str, dict[str, Any]], + ) -> dict[str, Any]: + source_url = str(job_row.get("source_url") or "").strip() + content_source = _fetch_content_source(str(job_row.get("content_source_id") or "").strip()) + review_row = review_by_job_id.get(job_row["id"]) or review_by_url.get(source_url) + document_row = document_by_url.get(source_url) + artifacts = legacy.parse_job_artifacts(job_row) + return { + "id": job_row["id"], + "title": job_row.get("title", ""), + "status": job_row.get("status", ""), + "source_url": source_url, + "external_id": str(artifacts.get("external_id") or ""), + "origin_sync_job_id": str(artifacts.get("origin_sync_job_id") or ""), + "job": legacy.job_payload(job_row), + "content_source": legacy.content_source_payload(content_source) if content_source else None, + "latest_review": legacy.review_payload(review_row) if review_row else None, + "document": legacy.document_payload(document_row) if document_row else None, + } + + def _build_workspace_payload(source_row: dict[str, Any]) -> dict[str, Any]: + sync_rows = _list_sync_job_rows(source_row, limit=20) + video_rows = _dedupe_latest_video_jobs(_list_video_job_rows(source_row, limit=200)) + review_rows = _load_related_reviews(source_row, video_rows, limit=20) + document_rows = _load_related_documents(video_rows, limit=12) + review_by_job_id, review_by_url = _build_review_maps(review_rows) + document_by_url = _build_document_map(document_rows) + status_counts = Counter(str(row.get("status") or "").strip() or "unknown" for row in video_rows) + latest_sync = legacy.job_context_payload(sync_rows[0]) if sync_rows else None + return { + "account": _build_account_payload(source_row), + "latest_sync_job": latest_sync, + "sync_jobs": [legacy.job_payload(row) for row in sync_rows[:10]], + "videos": { + "total": len(video_rows), + "status_counts": dict(status_counts), + "items": [ + _build_video_item(row, review_by_job_id, review_by_url, document_by_url) + for row in video_rows[:20] + ], + }, + "reviews": [legacy.review_payload(row) for row in review_rows], + "recent_documents": [legacy.document_payload(row) for row in document_rows], + "stats": { + "sync_job_count": len(sync_rows), + "video_job_count": len(video_rows), + "completed_video_count": status_counts.get("completed", 0), + "failed_video_count": status_counts.get("failed", 0), + "review_count": len(review_rows), + "document_count": len(document_rows), + }, + } + + def _update_account_source( + source_row: dict[str, Any], + *, + source_url: str, + title: str, + handle: str, + metadata_updates: dict[str, Any], + ) -> dict[str, Any]: + merged_metadata = legacy.merge_json_field(source_row.get("metadata_json") or "{}", metadata_updates) + legacy.db.execute( + """ + UPDATE content_sources + SET handle = ?, source_url = ?, title = ?, platform = ?, metadata_json = ?, updated_at = ? + WHERE id = ? AND user_id = ? + """, + ( + handle, + source_url, + title, + WECHAT_VIDEO_PLATFORM, + merged_metadata, + legacy.utc_now(), + source_row["id"], + source_row["user_id"], + ), + ) + return legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_row["id"],)) + + def _job_belongs_to_account(job_row: dict[str, Any], source_row: dict[str, Any]) -> bool: + if str(job_row.get("content_source_id") or "").strip(): + content_source = _fetch_content_source(str(job_row.get("content_source_id") or "").strip()) + metadata = (legacy.content_source_payload(content_source).get("metadata") or {}) if content_source else {} + if content_source and str(metadata.get("origin_content_source_id") or "") == source_row["id"]: + return True + if str(job_row.get("parent_job_id") or "").strip(): + parent_row = legacy.db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["parent_job_id"],)) + if parent_row and str(parent_row.get("content_source_id") or "") == source_row["id"]: + return True + return False + + @app.get("/v2/wechat-video/accounts") + def list_wechat_video_accounts( + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + clauses = ["user_id = ?", "platform = ?", "source_kind = ?"] + params: list[Any] = [account["id"], WECHAT_VIDEO_PLATFORM, ACCOUNT_SOURCE_KIND] + if project_id: + project = legacy.resolve_target_project(account["id"], project_id, username=account["username"]) + clauses.append("project_id = ?") + params.append(project["id"]) + rows = legacy.db.fetch_all( + f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY updated_at DESC", + tuple(params), + ) + return [_build_account_payload(row) for row in rows] + + @app.post("/v2/wechat-video/accounts/sync") + async def sync_wechat_video_account( + request: WechatVideoAccountSyncRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_row = None + if request.content_source_id.strip(): + source_row = _require_owned_account(request.content_source_id.strip(), account["id"]) + + source_url = _normalize_wechat_source_url(request.profile_url or (source_row or {}).get("source_url", "")) + if not source_url: + raise HTTPException(status_code=400, detail="profile_url or content_source_id is required") + + requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "") + project = legacy.resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]: + raise HTTPException(status_code=400, detail="Content source does not belong to target project") + + kb = legacy.resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"]) + assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + profile = legacy.model_profile_for_account(account["id"], request.analysis_model_profile_id or None) + handle = request.handle.strip() or (source_row or {}).get("handle", "").strip() + title = request.title.strip() or (source_row or {}).get("title", "").strip() or handle or source_url + metadata_updates = { + "account_type": WECHAT_VIDEO_PLATFORM, + "sync_mode": "recent_uploads", + "max_items": request.max_items, + "analysis_model_profile_id": profile["id"], + "last_sync_error": "", + } + + if not source_row: + source_row = legacy.create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind=ACCOUNT_SOURCE_KIND, + platform=WECHAT_VIDEO_PLATFORM, + handle=handle, + source_url=source_url, + title=title, + metadata=metadata_updates, + ) + else: + source_row = _update_account_source( + source_row, + source_url=source_url, + title=title, + handle=handle, + metadata_updates=metadata_updates, + ) + + job_row = legacy.create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="content_source_sync", + line_type="content_source_sync", + workflow_key="content_source_sync_pipeline", + title=f"{title} 内容源同步", + language=request.language, + source_url=source_url, + assistant_id=(assistant or {}).get("id"), + content_source_id=source_row["id"], + artifacts={ + "platform": WECHAT_VIDEO_PLATFORM, + "handle": handle, + "source_account_url": source_url, + "source_title": title, + "max_items": request.max_items, + "skip_existing": request.skip_existing, + "auto_trigger_analysis": request.auto_trigger_analysis, + }, + analysis_model_profile_id=profile["id"], + ) + legacy.update_content_source_metadata( + source_row["id"], + { + "sync_mode": "recent_uploads", + "max_items": request.max_items, + "analysis_model_profile_id": profile["id"], + "last_sync_job_id": job_row["id"], + "last_sync_requested_at": legacy.utc_now(), + "last_sync_error": "", + }, + ) + queued_row = await legacy.trigger_orchestrated_job(job_row) + source_row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_row["id"],)) + workspace = _build_workspace_payload(source_row) + workspace["sync_job"] = legacy.job_payload(queued_row) + return workspace + + @app.get("/v2/wechat-video/accounts/{account_id}") + def get_wechat_video_account( + account_id: str, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_row = _require_owned_account(account_id, account["id"]) + return _build_workspace_payload(source_row) + + @app.get("/v2/wechat-video/accounts/{account_id}/workspace") + def get_wechat_video_account_workspace( + account_id: str, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_row = _require_owned_account(account_id, account["id"]) + return _build_workspace_payload(source_row) + + @app.get("/v2/wechat-video/accounts/{account_id}/videos") + def list_wechat_video_account_videos( + account_id: str, + limit: int = Query(default=50, ge=1, le=200), + status: str = Query(default=""), + q: str = Query(default=""), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_row = _require_owned_account(account_id, account["id"]) + video_rows = _dedupe_latest_video_jobs(_list_video_job_rows(source_row, limit=max(limit * 4, 200))) + normalized_status = status.strip().lower() + normalized_query = q.strip().lower() + if normalized_status: + video_rows = [row for row in video_rows if str(row.get("status") or "").lower() == normalized_status] + if normalized_query: + video_rows = [ + row + for row in video_rows + if normalized_query in str(row.get("title") or "").lower() + or normalized_query in str(row.get("source_url") or "").lower() + ] + selected_rows = video_rows[:limit] + review_rows = _load_related_reviews(source_row, selected_rows, limit=max(limit, 20)) + document_rows = _load_related_documents(selected_rows, limit=max(limit, 20)) + review_by_job_id, review_by_url = _build_review_maps(review_rows) + document_by_url = _build_document_map(document_rows) + return { + "account": _build_account_payload(source_row), + "total": len(video_rows), + "status_counts": dict(Counter(str(row.get("status") or "").strip() or "unknown" for row in video_rows)), + "items": [ + _build_video_item(row, review_by_job_id, review_by_url, document_by_url) + for row in selected_rows + ], + } + + @app.get("/v2/wechat-video/accounts/{account_id}/reviews") + def list_wechat_video_account_reviews( + account_id: str, + limit: int = Query(default=50, ge=1, le=200), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + source_row = _require_owned_account(account_id, account["id"]) + video_rows = _dedupe_latest_video_jobs(_list_video_job_rows(source_row, limit=200)) + review_rows = _load_related_reviews(source_row, video_rows, limit=limit) + return [legacy.review_payload(row) for row in review_rows] + + @app.post("/v2/wechat-video/accounts/{account_id}/reviews") + def create_wechat_video_review( + account_id: str, + request: WechatVideoReviewCreateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_row = _require_owned_account(account_id, account["id"]) + source_job = None + if request.source_job_id.strip(): + source_job = legacy.load_owned_job(request.source_job_id.strip(), account["id"]) + if not _job_belongs_to_account(source_job, source_row): + raise HTTPException(status_code=400, detail="source_job_id does not belong to the target WeChat Video account") + + requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else source_row.get("project_id", "")) + project = legacy.resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + if source_row.get("project_id") and source_row.get("project_id") != project["id"]: + raise HTTPException(status_code=400, detail="WeChat Video account does not belong to target project") + + assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + publish_url = request.publish_url.strip() or (source_job.get("source_url", "") if source_job else "") + if publish_url: + _normalize_wechat_source_url(publish_url) + title = request.title.strip() or (source_job.get("title", "") if source_job else "") or f"{source_row.get('title', '')} 复盘".strip() + if not title: + title = "微信视频号复盘" + + review_id = legacy.make_id("review") + timestamp = legacy.utc_now() + legacy.db.execute( + """ + INSERT INTO publish_reviews ( + id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, + publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + review_id, + account["id"], + project["id"], + source_job["id"] if source_job else None, + (assistant or {}).get("id") or None, + title, + WECHAT_VIDEO_PLATFORM, + request.content_type or "video", + publish_url, + request.published_at.strip(), + json.dumps(request.metrics, ensure_ascii=False), + request.verdict.strip(), + request.highlights.strip(), + request.next_actions.strip(), + request.notes.strip(), + timestamp, + timestamp, + ), + ) + row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) + return legacy.review_payload(row) diff --git a/collector-service/app/xiaohongshu_features.py b/collector-service/app/xiaohongshu_features.py new file mode 100644 index 0000000..5502653 --- /dev/null +++ b/collector-service/app/xiaohongshu_features.py @@ -0,0 +1,765 @@ +from __future__ import annotations + +import json +import re +from datetime import datetime, timezone +from html import unescape +from typing import Any, Iterable +from urllib.parse import unquote + +import httpx +from fastapi import Depends, HTTPException, Query +from pydantic import BaseModel, Field + +DEFAULT_TIMEOUT = 20.0 +MAX_HTML_SEARCH_BYTES = 2_000_000 +DEFAULT_USER_AGENT = ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" +) +XHS_PLATFORM = "xiaohongshu" + + +class XHSManualPageCapture(BaseModel): + url: str = "" + title: str = "" + payload: dict[str, Any] = Field(default_factory=dict) + + +class XiaohongshuContentSourceCreateRequest(BaseModel): + project_id: str = "" + source_kind: str + handle: str = "" + source_url: str = "" + title: str = "" + local_path: str = "" + metadata: dict[str, Any] = Field(default_factory=dict) + + +class XiaohongshuContentSourceSyncRequest(BaseModel): + project_id: str = "" + knowledge_base_id: str = "" + assistant_id: str = "" + content_source_id: str = "" + source_url: str = "" + handle: str = "" + title: str = "" + language: str = "auto" + max_items: int = Field(default=5, ge=1, le=20) + skip_existing: bool = True + auto_trigger_analysis: bool = True + manual_source_payload: dict[str, Any] | None = None + manual_pages: list[XHSManualPageCapture] = Field(default_factory=list) + discovery_note: str = "" + + +class XiaohongshuReviewCreateRequest(BaseModel): + project_id: str = "" + source_job_id: str = "" + assistant_id: str = "" + title: str = "" + platform: str = XHS_PLATFORM + content_type: str = "note" + publish_url: str = "" + published_at: str = "" + metrics: dict[str, Any] = Field(default_factory=dict) + verdict: str = "" + highlights: str = "" + next_actions: str = "" + notes: str = "" + + +class XiaohongshuReviewUpdateRequest(BaseModel): + title: str | None = None + platform: str | None = None + content_type: str | None = None + publish_url: str | None = None + published_at: str | None = None + metrics: dict[str, Any] | None = None + verdict: str | None = None + highlights: str | None = None + next_actions: str | None = None + notes: str | None = None + assistant_id: str | None = None + + +def _safe_json_dumps(value: Any) -> str: + return json.dumps(value, ensure_ascii=False, separators=(",", ":")) + + +def _first_non_empty(*values: Any) -> str: + for value in values: + if value is None: + continue + if isinstance(value, str): + stripped = value.strip() + if stripped: + return stripped + elif value not in ("", [], {}, ()): + return str(value) + return "" + + +def _dedupe_strings(values: Iterable[str]) -> list[str]: + result: list[str] = [] + seen: set[str] = set() + for value in values: + item = str(value).strip() + if not item: + continue + key = item.lower() + if key in seen: + continue + seen.add(key) + result.append(item) + return result + + +def _compact_text(value: Any, limit: int = 500) -> str: + text = str(value or "").strip() + if len(text) <= limit: + return text + return f"{text[: limit - 1]}…" + + +def _parse_count(value: Any) -> float: + if value is None: + return 0.0 + if isinstance(value, (int, float)): + return float(value) + text = str(value).strip().lower().replace(",", "") + if not text: + return 0.0 + multiplier = 1.0 + if text.endswith("w") or text.endswith("万"): + multiplier = 10_000.0 + text = text[:-1] + elif text.endswith("亿"): + multiplier = 100_000_000.0 + text = text[:-1] + text = text.replace("+", "") + match = re.search(r"-?\d+(?:\.\d+)?", text) + if not match: + return 0.0 + try: + return float(match.group()) * multiplier + except ValueError: + return 0.0 + + +def _normalize_timestamp(value: Any) -> str | None: + if value in (None, "", 0, "0"): + return None + if isinstance(value, str): + stripped = value.strip() + if not stripped: + return None + if re.match(r"^\d{4}-\d{2}-\d{2}T", stripped): + return stripped + if stripped.isdigit(): + value = int(stripped) + else: + return stripped + if isinstance(value, (int, float)): + ts = float(value) + if ts > 10_000_000_000: + ts /= 1000.0 + try: + return datetime.fromtimestamp(ts, tz=timezone.utc).replace(microsecond=0).isoformat() + except Exception: + return None + return None + + +def _extract_hashtags(*texts: str) -> list[str]: + tags: list[str] = [] + for text in texts: + if not text: + continue + tags.extend(match.group(1) for match in re.finditer(r"#([\w\u4e00-\u9fff]+)", text)) + return _dedupe_strings(tags) + + +def _extract_keywords(*texts: str) -> list[str]: + candidates: list[str] = [] + for text in texts: + if not text: + continue + candidates.extend(_extract_hashtags(text)) + candidates.extend(re.findall(r"[\u4e00-\u9fff]{2,8}", text)) + candidates.extend(re.findall(r"[A-Za-z][A-Za-z0-9_]{2,20}", text)) + stop_words = { + "小红书", + "笔记", + "内容", + "账号", + "发布", + "更多", + "关注", + "用户", + "xhs", + "xiaohongshu", + } + return _dedupe_strings(item for item in candidates if item.lower() not in stop_words) + + +def _walk_json(value: Any) -> Iterable[dict[str, Any]]: + if isinstance(value, dict): + yield value + for child in value.values(): + yield from _walk_json(child) + elif isinstance(value, list): + for child in value: + yield from _walk_json(child) + + +def _extract_json_objects_from_text(text: str) -> list[Any]: + decoder = json.JSONDecoder() + objects: list[Any] = [] + seen: set[str] = set() + if not text: + return objects + + candidates = [text, unquote(text), unescape(text), unescape(unquote(text))] + for candidate in candidates: + snippet = candidate[:MAX_HTML_SEARCH_BYTES] + for match in re.finditer(r"[\{\[]", snippet): + try: + obj, _ = decoder.raw_decode(snippet[match.start() :]) + except Exception: + continue + marker = _safe_json_dumps(obj) + if marker in seen: + continue + seen.add(marker) + objects.append(obj) + if len(objects) >= 50: + return objects + return objects + + +def _extract_json_blobs_from_html(html: str) -> list[dict[str, Any]]: + blobs: list[dict[str, Any]] = [] + seen: set[str] = set() + for attrs, content in re.findall(r"]*)>(.*?)", html, re.IGNORECASE | re.DOTALL): + script_id_match = re.search(r'id=["\']([^"\']+)["\']', attrs, re.IGNORECASE) + script_id = script_id_match.group(1) if script_id_match else "" + for obj in _extract_json_objects_from_text(content.strip()): + marker = _safe_json_dumps(obj) + if marker in seen: + continue + seen.add(marker) + blobs.append({"script_id": script_id, "payload": obj}) + return blobs + + +async def _fetch_html(url: str, cookie: str = "") -> tuple[str, str]: + headers = { + "User-Agent": DEFAULT_USER_AGENT, + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", + } + if cookie.strip(): + headers["Cookie"] = cookie.strip() + async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT, follow_redirects=True) as client: + response = await client.get(url, headers=headers) + response.raise_for_status() + return str(response.url), response.text + + +def _note_candidate_score(value: dict[str, Any]) -> int: + score = 0 + if any(key in value for key in ("note_id", "noteId", "id", "post_id")): + score += 2 + if any(key in value for key in ("title", "desc", "content", "text", "note")): + score += 2 + if any(key in value for key in ("author", "user", "owner")): + score += 2 + if "stats" in value and isinstance(value["stats"], dict): + score += 2 + return score + + +def _extract_note_candidates(payload: Any) -> list[dict[str, Any]]: + candidates: list[dict[str, Any]] = [] + for item in _walk_json(payload): + if _note_candidate_score(item) >= 4: + candidates.append(item) + for key in ("author", "user", "owner"): + child = item.get(key) + if isinstance(child, dict) and _note_candidate_score(child) >= 3: + candidates.append(child) + return candidates + + +def _normalize_note_candidate(candidate: dict[str, Any], fallback_url: str = "") -> dict[str, Any]: + stats_source = candidate.get("stats") if isinstance(candidate.get("stats"), dict) else {} + author = candidate.get("author") if isinstance(candidate.get("author"), dict) else {} + if not author and isinstance(candidate.get("user"), dict): + author = candidate["user"] + cover = candidate.get("cover") or candidate.get("image") or candidate.get("images") + if isinstance(cover, list) and cover: + cover = cover[0] + if isinstance(cover, dict): + cover = _first_non_empty( + cover.get("url_list", [""])[0] if isinstance(cover.get("url_list"), list) else "", + cover.get("url"), + ) + return { + "note_id": _first_non_empty(candidate.get("note_id"), candidate.get("noteId"), candidate.get("id"), candidate.get("post_id")), + "title": _first_non_empty(candidate.get("title"), candidate.get("desc"), candidate.get("content"), candidate.get("text")), + "content": _first_non_empty(candidate.get("content"), candidate.get("desc"), candidate.get("text"), candidate.get("note")), + "author_name": _first_non_empty(author.get("nickname"), author.get("name"), candidate.get("nickname")), + "author_url": _first_non_empty(author.get("profile_url"), candidate.get("profile_url")), + "share_url": _first_non_empty(candidate.get("share_url"), candidate.get("url"), fallback_url), + "cover_url": _first_non_empty(cover), + "published_at": _normalize_timestamp(candidate.get("publish_time") or candidate.get("created_at") or candidate.get("create_time")), + "tags": _extract_hashtags( + _first_non_empty(candidate.get("title")), + _first_non_empty(candidate.get("desc"), candidate.get("content")), + ), + "stats": { + "like": _parse_count(stats_source.get("like_count") or stats_source.get("liked_count") or candidate.get("like_count")), + "comment": _parse_count(stats_source.get("comment_count") or candidate.get("comment_count")), + "collect": _parse_count(stats_source.get("collect_count") or candidate.get("collect_count")), + "share": _parse_count(stats_source.get("share_count") or candidate.get("share_count")), + }, + "raw": candidate, + } + + +def _extract_notes(payloads: Iterable[Any]) -> list[dict[str, Any]]: + notes: list[dict[str, Any]] = [] + seen: set[str] = set() + for payload in payloads: + for candidate in _extract_note_candidates(payload): + normalized = _normalize_note_candidate(candidate) + dedupe_key = normalized["note_id"] or normalized["share_url"] or normalized["title"] + if not dedupe_key or dedupe_key in seen: + continue + seen.add(dedupe_key) + notes.append(normalized) + notes.sort( + key=lambda item: ( + item["stats"]["like"] + item["stats"]["comment"] * 3 + item["stats"]["collect"] * 2 + item["stats"]["share"] * 4 + ), + reverse=True, + ) + return notes + + +def _is_xhs_source_row(row: dict[str, Any]) -> bool: + platform = str(row.get("platform", "") or "").strip().lower() + if platform == XHS_PLATFORM: + return True + source_url = str(row.get("source_url", "") or "") + normalized = source_url.strip().lower() + return "xiaohongshu.com" in normalized or "xhslink.com" in normalized + + +def _job_matches_platform(row: dict[str, Any], legacy: Any) -> bool: + if row.get("content_source_id"): + source = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["content_source_id"],)) + if source: + return _is_xhs_source_row(source) + source_url = str(row.get("source_url") or "") + return "xiaohongshu.com" in source_url.lower() or "xhslink.com" in source_url.lower() + + +def _review_matches_platform(row: dict[str, Any], legacy: Any) -> bool: + return str(row.get("platform", "") or "").strip().lower() == XHS_PLATFORM + + +def _normalize_platform(value: str | None) -> str: + return str(value or "").strip().lower() + + +def _require_xhs_platform(value: str | None) -> str: + normalized = _normalize_platform(value or XHS_PLATFORM) + if normalized != XHS_PLATFORM: + raise HTTPException(status_code=400, detail="Xiaohongshu routes only support the xiaohongshu platform") + return normalized + + +def register_xiaohongshu_routes(app: Any, legacy: Any) -> None: + def now() -> str: + return legacy.utc_now() + + def make_id(prefix: str) -> str: + return legacy.make_id(prefix) + + def _content_source_row_or_404(source_id: str, account_id: str) -> dict[str, Any]: + row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ? AND user_id = ?", (source_id, account_id)) + if not row: + raise HTTPException(status_code=404, detail="Content source not found") + if not _is_xhs_source_row(row): + raise HTTPException(status_code=404, detail="Content source not found") + return row + + def _xhs_job_payload(row: dict[str, Any]) -> dict[str, Any]: + payload = legacy.job_payload(row) + if row.get("content_source_id"): + source_row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["content_source_id"],)) + if source_row and _is_xhs_source_row(source_row): + payload["content_source"] = legacy.content_source_payload(source_row) + return payload + + def _xhs_review_payload(row: dict[str, Any]) -> dict[str, Any]: + payload = legacy.review_payload(row) + if payload.get("platform", "") != XHS_PLATFORM: + payload["platform"] = XHS_PLATFORM + return payload + + async def _collect_public_source( + source_url: str, + manual_payload: dict[str, Any] | None, + manual_pages: list[XHSManualPageCapture], + ) -> dict[str, Any]: + source_url = source_url.strip() + blobs: list[dict[str, Any]] = [] + errors: list[str] = [] + + if manual_payload: + blobs.append({"script_id": "manual_source_payload", "payload": manual_payload}) + + for page in manual_pages: + blobs.append({ + "script_id": "manual_page_payload", + "url": page.url, + "title": page.title, + "payload": page.payload, + }) + + if source_url: + try: + final_url, html = await _fetch_html(source_url) + source_url = final_url + blobs.extend(_extract_json_blobs_from_html(html)) + except Exception as exc: + errors.append(f"source_fetch_failed: {exc}") + + payloads = [item["payload"] for item in blobs] + notes = _extract_notes(payloads) + source_title = _first_non_empty( + manual_payload.get("title", "") if manual_payload else "", + *(item.get("title", "") for item in notes[:3]), + source_url, + ) + return { + "source_url": source_url, + "title": source_title, + "notes": notes, + "raw_pages": blobs, + "errors": errors, + } + + @app.get("/v2/xiaohongshu/content-sources") + def list_content_sources( + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + clauses = ["user_id = ?", "platform = ?"] + params: list[Any] = [account["id"], XHS_PLATFORM] + if project_id is not None: + normalized_project = project_id.strip() + if normalized_project: + clauses.append("project_id = ?") + params.append(normalized_project) + else: + clauses.append("(project_id IS NULL OR project_id = '')") + rows = legacy.db.fetch_all( + f"SELECT * FROM content_sources WHERE {' AND '.join(clauses)} ORDER BY created_at DESC", + tuple(params), + ) + return [legacy.content_source_payload(row) for row in rows] + + @app.post("/v2/xiaohongshu/content-sources") + def create_content_source_api( + request: XiaohongshuContentSourceCreateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + project = legacy.resolve_target_project(account["id"], request.project_id or None, username=account["username"]) + row = legacy.create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind=request.source_kind.strip(), + platform=XHS_PLATFORM, + handle=request.handle.strip(), + source_url=request.source_url.strip(), + title=request.title.strip(), + local_path=request.local_path.strip(), + metadata={ + **request.metadata, + "platform_label": "小红书", + "platform": XHS_PLATFORM, + }, + ) + return legacy.content_source_payload(row) + + @app.get("/v2/xiaohongshu/content-sources/{source_id}") + def get_content_source(source_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + row = _content_source_row_or_404(source_id, account["id"]) + return legacy.content_source_payload(row) + + @app.post("/v2/xiaohongshu/content-sources/sync") + async def sync_content_source( + request: XiaohongshuContentSourceSyncRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_row = None + if request.content_source_id.strip(): + source_row = _content_source_row_or_404(request.content_source_id.strip(), account["id"]) + + requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "") + project = legacy.resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + kb = legacy.resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"]) + assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + source_url = (request.source_url or (source_row or {}).get("source_url") or "").strip() + if not source_url and not source_row: + raise HTTPException(status_code=400, detail="source_url or content_source_id is required") + + if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]: + raise HTTPException(status_code=400, detail="Content source does not belong to target project") + + if source_row and not _is_xhs_source_row(source_row): + raise HTTPException(status_code=400, detail="Content source is not scoped to Xiaohongshu") + + source_kind = (source_row or {}).get("source_kind", "creator_account") + handle = (request.handle or (source_row or {}).get("handle", "")).strip() + source_title = ( + request.title.strip() + or (source_row or {}).get("title", "").strip() + or handle + or source_url + ) + + if not source_row: + source_row = legacy.create_content_source( + account_id=account["id"], + project_id=project["id"], + source_kind=source_kind or "creator_account", + platform=XHS_PLATFORM, + handle=handle, + source_url=source_url, + title=source_title, + metadata={ + "platform": XHS_PLATFORM, + "platform_label": "小红书", + "sync_mode": "recent_notes", + "max_items": request.max_items, + }, + ) + + public_data = await _collect_public_source(source_url, request.manual_source_payload, request.manual_pages) + note_count = len(public_data["notes"]) + top_notes = [ + { + "note_id": item["note_id"], + "title": _compact_text(item["title"], 120), + "content": _compact_text(item["content"], 180), + "author_name": item["author_name"], + "published_at": item["published_at"], + "stats": item["stats"], + "tags": item["tags"][:6], + } + for item in public_data["notes"][: request.max_items] + ] + + job_row = legacy.create_job_record( + account_id=account["id"], + project_id=project["id"], + knowledge_base_id=kb["id"], + source_type="content_source_sync", + line_type="content_source_sync", + workflow_key="content_source_sync_pipeline", + title=f"{source_title} 内容源同步", + language=request.language, + source_url=source_url, + assistant_id=(assistant or {}).get("id"), + content_source_id=source_row["id"], + artifacts={ + "platform": XHS_PLATFORM, + "handle": handle, + "source_account_url": source_url, + "source_title": source_title, + "skip_existing": request.skip_existing, + "auto_trigger_analysis": request.auto_trigger_analysis, + "max_items": request.max_items, + "note_count": note_count, + "top_notes": top_notes, + "raw_pages": public_data["raw_pages"], + "errors": public_data["errors"], + "discovery_note": request.discovery_note.strip(), + }, + analysis_model_profile_id="", + ) + + legacy.update_content_source_metadata( + source_row["id"], + { + "platform": XHS_PLATFORM, + "platform_label": "小红书", + "sync_mode": "recent_notes", + "max_items": request.max_items, + "note_count": note_count, + "last_sync_job_id": job_row["id"], + "last_sync_requested_at": now(), + }, + ) + return legacy.job_payload(await legacy.trigger_orchestrated_job(job_row)) + + @app.get("/v2/xiaohongshu/jobs") + def list_jobs( + parent_job_id: str | None = Query(default=None), + line_type: str | None = Query(default=None), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + clauses = ["user_id = ?"] + params: list[Any] = [account["id"]] + if parent_job_id is not None: + normalized_parent = parent_job_id.strip() + if normalized_parent: + clauses.append("parent_job_id = ?") + params.append(normalized_parent) + else: + clauses.append("(parent_job_id IS NULL OR parent_job_id = '')") + if line_type: + clauses.append("line_type = ?") + params.append(line_type.strip()) + rows = legacy.db.fetch_all( + f"SELECT * FROM jobs WHERE {' AND '.join(clauses)} ORDER BY created_at DESC", + tuple(params), + ) + return [_xhs_job_payload(row) for row in rows if _job_matches_platform(row, legacy)] + + @app.get("/v2/xiaohongshu/jobs/{job_id}") + def get_job(job_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]: + row = legacy.db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"])) + if not row or not _job_matches_platform(row, legacy): + raise HTTPException(status_code=404, detail="Job not found") + return _xhs_job_payload(row) + + @app.get("/v2/xiaohongshu/jobs/{job_id}/events") + def get_job_events(job_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]: + row = legacy.db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"])) + if not row or not _job_matches_platform(row, legacy): + raise HTTPException(status_code=404, detail="Job not found") + return [ + legacy.job_event_payload(item) + for item in legacy.db.fetch_all("SELECT * FROM job_events WHERE job_id = ? ORDER BY created_at ASC", (job_id,)) + ] + + @app.get("/v2/xiaohongshu/reviews") + def list_reviews( + project_id: str | None = Query(default=None), + limit: int = Query(default=50, ge=1, le=200), + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> list[dict[str, Any]]: + clauses = ["user_id = ?", "platform = ?"] + params: list[Any] = [account["id"], XHS_PLATFORM] + if project_id is not None: + normalized_project = project_id.strip() + if normalized_project: + clauses.append("project_id = ?") + params.append(normalized_project) + else: + clauses.append("(project_id IS NULL OR project_id = '')") + sql = ( + f"SELECT * FROM publish_reviews WHERE {' AND '.join(clauses)} " + "ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC LIMIT ?" + ) + params.append(limit) + return [_xhs_review_payload(row) for row in legacy.db.fetch_all(sql, tuple(params))] + + @app.post("/v2/xiaohongshu/reviews") + def create_review( + request: XiaohongshuReviewCreateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + source_job = None + if request.source_job_id.strip(): + source_job = legacy.load_owned_job(request.source_job_id.strip(), account["id"]) + if not _job_matches_platform(source_job, legacy): + raise HTTPException(status_code=404, detail="Job not found") + requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "") + project = legacy.resolve_target_project(account["id"], requested_project_id or None, username=account["username"]) + assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, project["id"]) + review_id = make_id("review") + title = request.title.strip() or (source_job.get("title", "") if source_job else "") + if not title: + title = f"{project['name']} 复盘" + timestamp = now() + normalized_platform = _require_xhs_platform(request.platform) + legacy.db.execute( + """ + INSERT INTO publish_reviews ( + id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type, + publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + review_id, + account["id"], + project["id"], + source_job["id"] if source_job else None, + (assistant or {}).get("id") or None, + title, + normalized_platform, + request.content_type.strip() or "note", + request.publish_url.strip(), + request.published_at.strip(), + _safe_json_dumps(request.metrics), + request.verdict.strip(), + request.highlights.strip(), + request.next_actions.strip(), + request.notes.strip(), + timestamp, + timestamp, + ), + ) + row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) + return _xhs_review_payload(row) + + @app.patch("/v2/xiaohongshu/reviews/{review_id}") + def update_review( + review_id: str, + request: XiaohongshuReviewUpdateRequest, + account: dict[str, Any] = Depends(legacy.require_approved), + ) -> dict[str, Any]: + current = legacy.load_owned_review(review_id, account["id"]) + if not _review_matches_platform(current, legacy): + raise HTTPException(status_code=404, detail="Review not found") + assistant_id = current.get("assistant_id") or None + if request.assistant_id is not None: + assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, current.get("project_id", "")) + assistant_id = (assistant or {}).get("id") or None + normalized_platform = current.get("platform", XHS_PLATFORM) + if request.platform is not None: + normalized_platform = _require_xhs_platform(request.platform) + legacy.db.execute( + """ + UPDATE publish_reviews + SET title = ?, platform = ?, content_type = ?, publish_url = ?, published_at = ?, + metrics_json = ?, verdict = ?, highlights = ?, next_actions = ?, notes = ?, + assistant_id = ?, updated_at = ? + WHERE id = ? AND user_id = ? + """, + ( + request.title if request.title is not None else current.get("title", ""), + normalized_platform, + request.content_type if request.content_type is not None else current.get("content_type", "note"), + request.publish_url if request.publish_url is not None else current.get("publish_url", ""), + request.published_at if request.published_at is not None else current.get("published_at", ""), + _safe_json_dumps(request.metrics if request.metrics is not None else legacy.parse_json_object(current.get("metrics_json") or "{}")), + request.verdict if request.verdict is not None else current.get("verdict", ""), + request.highlights if request.highlights is not None else current.get("highlights", ""), + request.next_actions if request.next_actions is not None else current.get("next_actions", ""), + request.notes if request.notes is not None else current.get("notes", ""), + assistant_id, + now(), + review_id, + account["id"], + ), + ) + row = legacy.db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,)) + return _xhs_review_payload(row) diff --git a/collector-service/run_source_overlay.sh b/collector-service/run_source_overlay.sh new file mode 100755 index 0000000..c132678 --- /dev/null +++ b/collector-service/run_source_overlay.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PORT="${PORT:-18083}" +HOST="${HOST:-127.0.0.1}" + +# Mirror the current live collector runtime so we can verify the source overlay +# against the same database and external integrations without touching 8081. +export DATA_DIR="${DATA_DIR:-/Users/kris/code/StoryForge-gitea/data/collector}" +export DATABASE_PATH="${DATABASE_PATH:-$DATA_DIR/storyforge.db}" +export DEFAULT_EXTERNAL_BASE_URL="${DEFAULT_EXTERNAL_BASE_URL:-https://test.hyzq.net/storyforge}" +export LOCAL_OPENAI_BASE_URL="${LOCAL_OPENAI_BASE_URL:-http://host.docker.internal:8317/v1}" +export LOCAL_OPENAI_MODEL="${LOCAL_OPENAI_MODEL:-GLM-5}" +export LOCAL_OPENAI_API_KEY="${LOCAL_OPENAI_API_KEY:-}" +export YTDLP_BIN="${YTDLP_BIN:-yt-dlp}" +export FFMPEG_BIN="${FFMPEG_BIN:-ffmpeg}" +export WHISPER_BIN="${WHISPER_BIN:-}" +export WHISPER_MODEL="${WHISPER_MODEL:-$DATA_DIR/models/ggml-base.en.bin}" +export ASR_HTTP_BASE_URL="${ASR_HTTP_BASE_URL:-http://host.docker.internal:8088}" +export ASR_HTTP_TRANSCRIBE_PATH="${ASR_HTTP_TRANSCRIBE_PATH:-/transcribe}" +export ASR_HTTP_FIELD_NAME="${ASR_HTTP_FIELD_NAME:-wav}" +export ASR_HTTP_TIMEOUT_SEC="${ASR_HTTP_TIMEOUT_SEC:-120}" +export N8N_BASE_URL="${N8N_BASE_URL:-http://n8n:5678}" +export N8N_ANALYSIS_WEBHOOK_PATH="${N8N_ANALYSIS_WEBHOOK_PATH:-/webhook/storyforge-analysis}" +export N8N_REAL_CUT_WEBHOOK_PATH="${N8N_REAL_CUT_WEBHOOK_PATH:-/webhook/storyforge-real-cut}" +export N8N_AI_VIDEO_WEBHOOK_PATH="${N8N_AI_VIDEO_WEBHOOK_PATH:-/webhook/storyforge-ai-video}" +export N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH="${N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH:-/webhook/storyforge-content-source-sync}" +export ORCHESTRATOR_SHARED_SECRET="${ORCHESTRATOR_SHARED_SECRET:-storyforge-local-secret}" +export CUTVIDEO_BASE_URL="${CUTVIDEO_BASE_URL:-http://192.168.31.18:7860}" +export CUTVIDEO_API_KEY="${CUTVIDEO_API_KEY:-}" +export CUTVIDEO_BASE_CONFIG="${CUTVIDEO_BASE_CONFIG:-example.job.yaml}" +export CUTVIDEO_POLL_INTERVAL_SEC="${CUTVIDEO_POLL_INTERVAL_SEC:-10}" +export CUTVIDEO_MAX_WAIT_SEC="${CUTVIDEO_MAX_WAIT_SEC:-1800}" +export CUTVIDEO_UPLOAD_TIMEOUT_SEC="${CUTVIDEO_UPLOAD_TIMEOUT_SEC:-1800}" +export HUOBAO_BASE_URL="${HUOBAO_BASE_URL:-http://host.docker.internal:5678}" +export HUOBAO_POLL_INTERVAL_SEC="${HUOBAO_POLL_INTERVAL_SEC:-10}" +export HUOBAO_MAX_WAIT_SEC="${HUOBAO_MAX_WAIT_SEC:-900}" + +cd "$ROOT_DIR" +exec ./.venv311/bin/python -m uvicorn app.main:app --host "$HOST" --port "$PORT" diff --git a/deploy/cutover_storyforge_collector_overlay.sh b/deploy/cutover_storyforge_collector_overlay.sh new file mode 100755 index 0000000..9eebf25 --- /dev/null +++ b/deploy/cutover_storyforge_collector_overlay.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="${ROOT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}" +BASE_COMPOSE_FILE="${BASE_COMPOSE_FILE:-$ROOT_DIR/docker-compose.yml}" +RUNTIME_FIXES_COMPOSE_FILE="${RUNTIME_FIXES_COMPOSE_FILE:-$ROOT_DIR/deploy/storyforge-collector-runtime-fixes.yml}" +OVERLAY_COMPOSE_FILE="${OVERLAY_COMPOSE_FILE:-$ROOT_DIR/deploy/storyforge-collector-source-overlay.yml}" +PROJECT_NAME="${PROJECT_NAME:-storyforge-gitea}" +COLLECTOR_URL="${COLLECTOR_URL:-http://127.0.0.1:8081}" +MAX_ATTEMPTS="${MAX_ATTEMPTS:-25}" +SLEEP_SEC="${SLEEP_SEC:-2}" + +compose_with_overlay() { + docker compose -p "$PROJECT_NAME" -f "$BASE_COMPOSE_FILE" -f "$RUNTIME_FIXES_COMPOSE_FILE" -f "$OVERLAY_COMPOSE_FILE" "$@" +} + +compose_base() { + docker compose -p "$PROJECT_NAME" -f "$BASE_COMPOSE_FILE" -f "$RUNTIME_FIXES_COMPOSE_FILE" "$@" +} + +verify_overlay() { + curl -fsS "$COLLECTOR_URL/healthz" >/dev/null + local paths + paths="$(curl -fsS "$COLLECTOR_URL/openapi.json" | jq -r '.paths | keys[]')" + grep -qx '/v2/douyin/accounts' <<<"$paths" + grep -qx '/v2/pipelines/real-cut' <<<"$paths" + grep -qx '/v2/pipelines/ai-video' <<<"$paths" + grep -qx '/v2/pipelines/content-source-sync' <<<"$paths" +} + +echo "[cutover] recreating collector with source overlay" +compose_with_overlay up -d --force-recreate collector + +for attempt in $(seq 1 "$MAX_ATTEMPTS"); do + if verify_overlay; then + echo "[cutover] collector overlay is live on $COLLECTOR_URL" + exit 0 + fi + sleep "$SLEEP_SEC" +done + +echo "[cutover] verification failed, rolling back to base compose" +compose_base up -d --force-recreate collector +exit 1 diff --git a/deploy/rollback_storyforge_collector_overlay.sh b/deploy/rollback_storyforge_collector_overlay.sh new file mode 100755 index 0000000..7028fe6 --- /dev/null +++ b/deploy/rollback_storyforge_collector_overlay.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="${ROOT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}" +BASE_COMPOSE_FILE="${BASE_COMPOSE_FILE:-$ROOT_DIR/docker-compose.yml}" +RUNTIME_FIXES_COMPOSE_FILE="${RUNTIME_FIXES_COMPOSE_FILE:-$ROOT_DIR/deploy/storyforge-collector-runtime-fixes.yml}" +PROJECT_NAME="${PROJECT_NAME:-storyforge-gitea}" + +docker compose -p "$PROJECT_NAME" -f "$BASE_COMPOSE_FILE" -f "$RUNTIME_FIXES_COMPOSE_FILE" up -d --force-recreate collector diff --git a/deploy/storyforge-collector-runtime-fixes.yml b/deploy/storyforge-collector-runtime-fixes.yml new file mode 100644 index 0000000..af228a8 --- /dev/null +++ b/deploy/storyforge-collector-runtime-fixes.yml @@ -0,0 +1,4 @@ +services: + collector: + environment: + N8N_BASE_URL: http://n8n:5678 diff --git a/deploy/storyforge-collector-source-overlay.yml b/deploy/storyforge-collector-source-overlay.yml new file mode 100644 index 0000000..d4a71d3 --- /dev/null +++ b/deploy/storyforge-collector-source-overlay.yml @@ -0,0 +1,6 @@ +services: + collector: + environment: + N8N_BASE_URL: http://n8n:5678 + volumes: + - ${COLLECTOR_APP_OVERLAY_DIR:-/Users/kris/code/StoryForge/collector-service/app}:/app/app:ro