storyforge/collector-service/app/domestic_platform_features.py

from __future__ import annotations

import json
import re
from datetime import datetime, timezone
from typing import Any

from fastapi import Body, Depends, HTTPException, Query
from pydantic import BaseModel, Field

from .douyin_features import _extract_json_blobs_from_html, _fetch_html


class PlatformAnalysisRequest(BaseModel):
    model_profile_ids: list[str] = Field(default_factory=list)
    linked_account_ids: list[str] = Field(default_factory=list)
    include_linked_accounts: bool = True
    include_recent_similar_candidates: bool = True
    max_videos: int = Field(default=6, ge=1, le=20)
    extra_focus: str = ""
    temperature: float = 0.35
    auto_analyze_top_videos: bool = False
    top_video_analysis_count: int = Field(default=4, ge=1, le=10)


class PlatformTopVideoAnalysisRequest(BaseModel):
    model_profile_id: str = ""
    top_video_count: int = Field(default=5, ge=1, le=12)
    min_score: float = 0
    temperature: float = 0.25


class PlatformSimilaritySearchRequest(BaseModel):
    source_account_id: str = ""
    candidate_urls: list[str] = Field(default_factory=list)
    seed_linked_accounts: bool = True
    search_public_pages: bool = True
    model_profile_id: str = ""
    max_candidates: int = Field(default=8, ge=1, le=20)
    extra_requirements: str = ""


class PlatformBenchmarkLinksRequest(BaseModel):
    target_account_ids: list[str] = Field(default_factory=list)
    target_profile_urls: list[str] = Field(default_factory=list)
    relation_type: str = "benchmark"
    note: str = ""
    search_id: str = ""


class PlatformTrackingAccountRequest(BaseModel):
    tracked_account_id: str
    assistant_id: str = ""
    note: str = ""


class PlatformTrackingCursorRequest(BaseModel):
    last_seen_at: str


class PlatformManualPageCapture(BaseModel):
    url: str = ""
    title: str = ""
    payload: dict[str, Any] = Field(default_factory=dict)


class PlatformAccountSyncRequest(BaseModel):
    project_id: str = ""
    profile_url: str = ""
    handle: str = ""
    title: str = ""
    session_cookie: str = ""
    creator_center_urls: list[str] = Field(default_factory=list)
    manual_profile_payload: dict[str, Any] | None = None
    manual_creator_pages: list[PlatformManualPageCapture] = Field(default_factory=list)
    discovery_note: str = ""


def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, label: str) -> None:
    table_prefix = platform

    def now() -> str:
        return legacy.utc_now()

    def make_id(prefix: str) -> str:
        return legacy.make_id(prefix)

    def _safe_json_dumps(value: Any) -> str:
        return json.dumps(value or {}, ensure_ascii=False)

    def _parse_json(raw: str, fallback: Any) -> Any:
        cleaned = str(raw or "").strip()
        if not cleaned:
            return fallback
        try:
            value = json.loads(cleaned)
            return value
        except json.JSONDecodeError:
            return fallback

    def ensure_schema() -> None:
        schema = f"""
        CREATE TABLE IF NOT EXISTS {table_prefix}_analysis_reports (
            id TEXT PRIMARY KEY,
            user_id TEXT NOT NULL,
            account_source_id TEXT NOT NULL,
            focus_text TEXT NOT NULL DEFAULT '',
            prompt_text TEXT NOT NULL DEFAULT '',
            context_json TEXT NOT NULL DEFAULT '{{}}',
            created_at TEXT NOT NULL,
            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
            FOREIGN KEY(account_source_id) REFERENCES content_sources(id) ON DELETE CASCADE
        );

        CREATE INDEX IF NOT EXISTS idx_{table_prefix}_analysis_reports_account_created
        ON {table_prefix}_analysis_reports(account_source_id, created_at DESC);

        CREATE TABLE IF NOT EXISTS {table_prefix}_analysis_suggestions (
            id TEXT PRIMARY KEY,
            report_id TEXT NOT NULL,
            model_profile_id TEXT NOT NULL DEFAULT '',
            model_label TEXT NOT NULL DEFAULT '',
            status TEXT NOT NULL DEFAULT 'ok',
            suggestion_text TEXT NOT NULL DEFAULT '',
            parsed_json TEXT NOT NULL DEFAULT '{{}}',
            created_at TEXT NOT NULL,
            FOREIGN KEY(report_id) REFERENCES {table_prefix}_analysis_reports(id) ON DELETE CASCADE
        );

        CREATE TABLE IF NOT EXISTS {table_prefix}_similarity_searches (
            id TEXT PRIMARY KEY,
            user_id TEXT NOT NULL,
            source_account_id TEXT NOT NULL,
            prompt_text TEXT NOT NULL DEFAULT '',
            context_json TEXT NOT NULL DEFAULT '{{}}',
            created_at TEXT NOT NULL,
            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
            FOREIGN KEY(source_account_id) REFERENCES content_sources(id) ON DELETE CASCADE
        );

        CREATE TABLE IF NOT EXISTS {table_prefix}_similarity_candidates (
            id TEXT PRIMARY KEY,
            search_id TEXT NOT NULL,
            candidate_account_id TEXT,
            candidate_profile_url TEXT NOT NULL DEFAULT '',
            heuristic_score REAL NOT NULL DEFAULT 0,
            agent_score REAL NOT NULL DEFAULT 0,
            rationale_text TEXT NOT NULL DEFAULT '',
            dimensions_json TEXT NOT NULL DEFAULT '{{}}',
            raw_output_json TEXT NOT NULL DEFAULT '{{}}',
            rank_index INTEGER NOT NULL DEFAULT 0,
            created_at TEXT NOT NULL,
            FOREIGN KEY(search_id) REFERENCES {table_prefix}_similarity_searches(id) ON DELETE CASCADE,
            FOREIGN KEY(candidate_account_id) REFERENCES content_sources(id) ON DELETE SET NULL
        );

        CREATE INDEX IF NOT EXISTS idx_{table_prefix}_similarity_candidates_search_rank
        ON {table_prefix}_similarity_candidates(search_id, rank_index ASC);

        CREATE TABLE IF NOT EXISTS {table_prefix}_account_relations (
            id TEXT PRIMARY KEY,
            user_id TEXT NOT NULL,
            source_account_id TEXT NOT NULL,
            target_account_id TEXT,
            target_profile_url TEXT NOT NULL DEFAULT '',
            relation_type TEXT NOT NULL DEFAULT 'benchmark',
            note TEXT NOT NULL DEFAULT '',
            search_id TEXT NOT NULL DEFAULT '',
            created_at TEXT NOT NULL,
            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
            FOREIGN KEY(source_account_id) REFERENCES content_sources(id) ON DELETE CASCADE,
            FOREIGN KEY(target_account_id) REFERENCES content_sources(id) ON DELETE SET NULL
        );

        CREATE INDEX IF NOT EXISTS idx_{table_prefix}_account_relations_source
        ON {table_prefix}_account_relations(source_account_id, created_at DESC);

        CREATE TABLE IF NOT EXISTS {table_prefix}_tracked_accounts (
            id TEXT PRIMARY KEY,
            user_id TEXT NOT NULL,
            tracked_account_id TEXT NOT NULL,
            assistant_id TEXT,
            note TEXT NOT NULL DEFAULT '',
            created_at TEXT NOT NULL,
            updated_at TEXT NOT NULL,
            UNIQUE(user_id, tracked_account_id),
            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
            FOREIGN KEY(tracked_account_id) REFERENCES content_sources(id) ON DELETE CASCADE,
            FOREIGN KEY(assistant_id) REFERENCES assistants(id) ON DELETE SET NULL
        );

        CREATE INDEX IF NOT EXISTS idx_{table_prefix}_tracked_accounts_user_updated
        ON {table_prefix}_tracked_accounts(user_id, updated_at DESC);

        CREATE TABLE IF NOT EXISTS {table_prefix}_tracking_cursors (
            user_id TEXT PRIMARY KEY,
            last_seen_at TEXT NOT NULL,
            updated_at TEXT NOT NULL,
            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE
        );
        """
        with legacy.db.session() as conn:
            conn.executescript(schema)
            existing_columns = {row["name"] for row in conn.execute(f"PRAGMA table_info({table_prefix}_analysis_reports)").fetchall()}
            if "model_profile_ids_json" not in existing_columns:
                conn.execute(
                    f"ALTER TABLE {table_prefix}_analysis_reports ADD COLUMN model_profile_ids_json TEXT NOT NULL DEFAULT '[]'"
                )
            if "linked_account_ids_json" not in existing_columns:
                conn.execute(
                    f"ALTER TABLE {table_prefix}_analysis_reports ADD COLUMN linked_account_ids_json TEXT NOT NULL DEFAULT '[]'"
                )

    ensure_schema()

    def _content_source_rows(user_id: str, platform_value: str, kind: str = "") -> list[dict[str, Any]]:
        rows = legacy.db.fetch_all(
            "SELECT * FROM content_sources WHERE user_id = ? AND platform = ? ORDER BY updated_at DESC, created_at DESC",
            (user_id, platform_value),
        )
        if kind:
            rows = [row for row in rows if row.get("source_kind") == kind]
        return rows

    def _content_source_payload(row: dict[str, Any]) -> dict[str, Any]:
        return legacy.content_source_payload(row)

    def _source_metadata(row: dict[str, Any]) -> dict[str, Any]:
        return _content_source_payload(row).get("metadata", {})

    def _first_non_empty(*values: Any) -> str:
        for value in values:
            if value is None:
                continue
            if isinstance(value, str):
                stripped = value.strip()
                if stripped:
                    return stripped
            elif value not in ("", [], {}, ()):
                return str(value)
        return ""

    def _dedupe_strings(values: list[str]) -> list[str]:
        result: list[str] = []
        seen: set[str] = set()
        for value in values:
            item = str(value or "").strip()
            if not item:
                continue
            key = item.lower()
            if key in seen:
                continue
            seen.add(key)
            result.append(item)
        return result

    def _compact_text(value: Any, limit: int = 500) -> str:
        text = str(value or "").strip()
        if len(text) <= limit:
            return text
        return f"{text[: limit - 1]}…"

    def _normalize_id_list(values: list[str] | None) -> list[str]:
        return _dedupe_strings([str(value or "").strip() for value in values or [] if str(value or "").strip()])

    def _parse_count(value: Any) -> float:
        if value is None:
            return 0.0
        if isinstance(value, (int, float)):
            return float(value)
        text = str(value).strip().lower().replace(",", "")
        if not text:
            return 0.0
        multiplier = 1.0
        if text.endswith("w") or text.endswith("万"):
            multiplier = 10_000.0
            text = text[:-1]
        elif text.endswith("亿"):
            multiplier = 100_000_000.0
            text = text[:-1]
        match = re.search(r"-?\d+(?:\.\d+)?", text)
        if not match:
            return 0.0
        try:
            return float(match.group()) * multiplier
        except ValueError:
            return 0.0

    def _extract_hashtags(*texts: str) -> list[str]:
        tags: list[str] = []
        for text in texts:
            if not text:
                continue
            tags.extend(match.group(1) for match in re.finditer(r"#([\w\u4e00-\u9fff]+)", text))
        return _dedupe_strings(tags)

    def _extract_keywords(*texts: str) -> list[str]:
        candidates: list[str] = []
        for text in texts:
            if not text:
                continue
            candidates.extend(_extract_hashtags(text))
            candidates.extend(re.findall(r"[\u4e00-\u9fff]{2,8}", text))
            candidates.extend(re.findall(r"[A-Za-z][A-Za-z0-9_]{2,20}", text))
        stop_words = {
            "视频",
            "作品",
            "账号",
            "内容",
            "发布",
            "更多",
            "关注",
            "用户",
            "creator",
            "profile",
            platform,
            label,
        }
        return _dedupe_strings([item for item in candidates if item.lower() not in stop_words])

    def _normalize_timestamp(value: Any) -> str:
        if value in (None, ""):
            return ""
        if isinstance(value, (int, float)):
            timestamp = float(value)
            if timestamp > 1_000_000_000_000:
                timestamp /= 1000.0
            if timestamp <= 0:
                return ""
            return datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat()
        text = str(value or "").strip()
        if not text:
            return ""
        if re.fullmatch(r"\d{10,13}", text):
            return _normalize_timestamp(float(text))
        return text

    def _flatten_json(value: Any, prefix: str = "") -> list[tuple[str, str, str]]:
        rows: list[tuple[str, str, str]] = []
        if isinstance(value, dict):
            for key, child in value.items():
                next_prefix = f"{prefix}.{key}" if prefix else str(key)
                rows.extend(_flatten_json(child, next_prefix))
        elif isinstance(value, list):
            for index, child in enumerate(value):
                next_prefix = f"{prefix}[{index}]"
                rows.extend(_flatten_json(child, next_prefix))
        else:
            rows.append((prefix or "$", type(value).__name__, _compact_text(value, 2000)))
        return rows

    def _walk_json(value: Any) -> list[dict[str, Any]]:
        items: list[dict[str, Any]] = []
        if isinstance(value, dict):
            items.append(value)
            for child in value.values():
                items.extend(_walk_json(child))
        elif isinstance(value, list):
            for child in value:
                items.extend(_walk_json(child))
        return items

    def _default_creator_center_urls() -> list[str]:
        return {
            "kuaishou": [
                "https://creator.kuaishou.com/creator/home",
                "https://creator.kuaishou.com/creator/content/works",
                "https://creator.kuaishou.com/creator/data/overview",
            ],
            "xiaohongshu": [
                "https://creator.xiaohongshu.com/publish/publish",
            ],
            "bilibili": [
                "https://member.bilibili.com/platform/home",
            ],
            "wechat_video": [
                "https://channels.weixin.qq.com/platform",
            ],
        }.get(platform, [])

    def _profile_candidate_score(candidate: dict[str, Any]) -> int:
        score = 0
        interesting_keys = {
            "nickname",
            "name",
            "user_name",
            "author_name",
            "handle",
            "uid",
            "user_id",
            "kwaiId",
            "kwai_id",
            "bio",
            "description",
            "signature",
            "follower_count",
            "fans_count",
            "fans",
            "avatar_url",
            "head_url",
        }
        score += sum(1 for key in interesting_keys if key in candidate)
        if "profile" in candidate and isinstance(candidate["profile"], dict):
            score += 2
        return score

    def _extract_profile_candidates(payload: Any) -> list[dict[str, Any]]:
        candidates: list[dict[str, Any]] = []
        for item in _walk_json(payload):
            if _profile_candidate_score(item) >= 2:
                candidates.append(item)
            profile_value = item.get("profile")
            if isinstance(profile_value, dict) and _profile_candidate_score(profile_value) >= 2:
                candidates.append(profile_value)
        return candidates

    def _normalize_profile_candidate(
        candidate: dict[str, Any],
        *,
        fallback_url: str = "",
        fallback_title: str = "",
        fallback_handle: str = "",
    ) -> dict[str, Any]:
        avatar = (
            candidate.get("avatar_url")
            or candidate.get("avatar")
            or candidate.get("head_url")
            or candidate.get("headurl")
            or candidate.get("user_avatar")
            or candidate.get("profile_image_url")
        )
        if isinstance(avatar, dict):
            avatar = _first_non_empty(
                avatar.get("url_list", [""])[0] if isinstance(avatar.get("url_list"), list) else "",
                avatar.get("url"),
                avatar.get("src"),
            )
        nickname = _first_non_empty(
            candidate.get("nickname"),
            candidate.get("name"),
            candidate.get("user_name"),
            candidate.get("author_name"),
            candidate.get("display_name"),
            fallback_title,
            fallback_handle,
        )
        signature = _first_non_empty(
            candidate.get("signature"),
            candidate.get("bio"),
            candidate.get("description"),
            candidate.get("desc"),
            candidate.get("intro"),
            candidate.get("introduction"),
        )
        explicit_tags = candidate.get("tags") or candidate.get("content_tags") or candidate.get("keywords") or []
        if not isinstance(explicit_tags, list):
            explicit_tags = [explicit_tags]
        handle = _first_non_empty(
            candidate.get("handle"),
            candidate.get("user_id"),
            candidate.get("uid"),
            candidate.get("kwai_id"),
            candidate.get("kwaiId"),
            candidate.get("short_id"),
            fallback_handle,
        )
        return {
            "nickname": nickname,
            "signature": signature,
            "avatar_url": _first_non_empty(avatar),
            "profile_url": _first_non_empty(candidate.get("profile_url"), candidate.get("share_url"), fallback_url),
            "handle": handle,
            "stats": {
                "followers": _parse_count(
                    candidate.get("follower_count")
                    or candidate.get("fans_count")
                    or candidate.get("fans")
                    or candidate.get("followers")
                ),
                "likes": _parse_count(
                    candidate.get("like_count")
                    or candidate.get("liked_count")
                    or candidate.get("total_like")
                ),
                "plays": _parse_count(
                    candidate.get("play_count")
                    or candidate.get("view_count")
                    or candidate.get("views")
                ),
                "videos": _parse_count(
                    candidate.get("video_count")
                    or candidate.get("works_count")
                    or candidate.get("published_count")
                    or candidate.get("aweme_count")
                ),
            },
            "tags": _dedupe_strings(
                [str(item) for item in explicit_tags if isinstance(item, (str, int, float))]
                + _extract_hashtags(signature, nickname)
            ),
            "keywords": _extract_keywords(nickname, signature, handle),
        }

    def _video_candidate_score(candidate: dict[str, Any]) -> int:
        score = 0
        score += 2 if _first_non_empty(candidate.get("title"), candidate.get("name"), candidate.get("caption")) else 0
        score += 2 if _first_non_empty(candidate.get("share_url"), candidate.get("video_url"), candidate.get("play_url"), candidate.get("url")) else 0
        score += 2 if _first_non_empty(candidate.get("video_id"), candidate.get("aweme_id"), candidate.get("item_id"), candidate.get("note_id"), candidate.get("works_id")) else 0
        score += 1 if _first_non_empty(candidate.get("description"), candidate.get("desc"), candidate.get("summary"), candidate.get("text")) else 0
        score += 1 if _first_non_empty(candidate.get("cover_url"), candidate.get("published_at"), candidate.get("publish_time"), candidate.get("create_time")) else 0
        score += 1 if _first_non_empty(candidate.get("duration_sec"), candidate.get("duration"), candidate.get("play_count"), candidate.get("view_count")) else 0
        stats_value = candidate.get("stats") or candidate.get("statistics")
        score += 1 if isinstance(stats_value, dict) else 0
        return score

    def _extract_video_candidates(payload: Any) -> list[dict[str, Any]]:
        candidates: list[dict[str, Any]] = []
        for item in _walk_json(payload):
            if _video_candidate_score(item) >= 4:
                candidates.append(item)
        return candidates

    def _video_metric_bundle(candidate: dict[str, Any]) -> dict[str, float]:
        stats_source = candidate.get("stats") if isinstance(candidate.get("stats"), dict) else {}
        if not stats_source and isinstance(candidate.get("statistics"), dict):
            stats_source = candidate.get("statistics")
        return {
            "play": _parse_count(candidate.get("play_count") or candidate.get("view_count") or stats_source.get("play_count") or stats_source.get("view_count")),
            "like": _parse_count(candidate.get("like_count") or candidate.get("digg_count") or stats_source.get("like_count") or stats_source.get("digg_count")),
            "comment": _parse_count(candidate.get("comment_count") or stats_source.get("comment_count")),
            "share": _parse_count(candidate.get("share_count") or stats_source.get("share_count")),
        }

    def _heuristic_video_performance_score(stats: dict[str, Any]) -> float:
        play = _parse_count(stats.get("play") or stats.get("play_count"))
        like = _parse_count(stats.get("like") or stats.get("like_count"))
        comment = _parse_count(stats.get("comment") or stats.get("comment_count"))
        share = _parse_count(stats.get("share") or stats.get("share_count"))
        if play <= 0 and like <= 0 and comment <= 0 and share <= 0:
            return 0.0
        score = (play / 5000.0) + (like / 100.0) + (comment / 20.0) + (share / 10.0)
        return round(min(100.0, score), 1)

    def _normalize_video_candidate(candidate: dict[str, Any]) -> dict[str, Any]:
        cover = candidate.get("cover_url") or candidate.get("cover") or candidate.get("poster") or candidate.get("image") or candidate.get("thumbnail")
        if isinstance(cover, dict):
            cover = _first_non_empty(
                cover.get("url"),
                cover.get("src"),
                cover.get("url_list", [""])[0] if isinstance(cover.get("url_list"), list) and cover.get("url_list") else "",
            )
        tags = candidate.get("tags") or candidate.get("content_tags") or candidate.get("keywords") or []
        if not isinstance(tags, list):
            tags = [tags]
        stats = _video_metric_bundle(candidate)
        title = _first_non_empty(candidate.get("title"), candidate.get("name"), candidate.get("caption"), candidate.get("desc"))
        description = _first_non_empty(candidate.get("description"), candidate.get("desc"), candidate.get("summary"), candidate.get("text"), title)
        return {
            "external_id": _first_non_empty(candidate.get("video_id"), candidate.get("aweme_id"), candidate.get("item_id"), candidate.get("note_id"), candidate.get("works_id")),
            "title": title or "未命名作品",
            "description": description,
            "share_url": _first_non_empty(candidate.get("share_url"), candidate.get("video_url"), candidate.get("play_url"), candidate.get("url")),
            "cover_url": _first_non_empty(cover),
            "duration_sec": float(candidate.get("duration_sec") or candidate.get("duration") or 0),
            "published_at": _normalize_timestamp(candidate.get("published_at") or candidate.get("publish_time") or candidate.get("create_time")),
            "tags": _dedupe_strings([str(item) for item in tags if isinstance(item, (str, int, float))]),
            "stats": stats,
            "performance_score": _heuristic_video_performance_score(stats),
            "raw": candidate,
        }

    def _extract_videos(payloads: list[Any]) -> list[dict[str, Any]]:
        videos: list[dict[str, Any]] = []
        seen: set[str] = set()
        for payload in payloads:
            for candidate in _extract_video_candidates(payload):
                normalized = _normalize_video_candidate(candidate)
                dedupe_key = normalized["share_url"] or normalized["external_id"] or normalized["title"]
                if not dedupe_key or dedupe_key in seen:
                    continue
                seen.add(dedupe_key)
                videos.append(normalized)
        videos.sort(
            key=lambda item: (float(item.get("performance_score") or 0), str(item.get("published_at") or "")),
            reverse=True,
        )
        return videos

    def _pick_best_profile(
        payloads: list[Any],
        *,
        fallback_url: str = "",
        fallback_title: str = "",
        fallback_handle: str = "",
    ) -> dict[str, Any]:
        candidates: list[dict[str, Any]] = []
        for payload in payloads:
            candidates.extend(_extract_profile_candidates(payload))
        if not candidates and payloads and isinstance(payloads[0], dict):
            candidates = [payloads[0]]
        best = _normalize_profile_candidate(
            {},
            fallback_url=fallback_url,
            fallback_title=fallback_title,
            fallback_handle=fallback_handle,
        )
        best_score = -1
        for candidate in candidates:
            normalized = _normalize_profile_candidate(
                candidate,
                fallback_url=fallback_url,
                fallback_title=fallback_title,
                fallback_handle=fallback_handle,
            )
            score = 0
            score += 3 if normalized["nickname"] else 0
            score += 2 if normalized["signature"] else 0
            score += 2 if normalized["handle"] else 0
            score += 1 if normalized["stats"]["followers"] else 0
            score += 1 if normalized["avatar_url"] else 0
            if score > best_score:
                best = normalized
                best_score = score
        return best

    def _fields_payload(payload: Any) -> tuple[list[dict[str, Any]], int]:
        flattened = _flatten_json(payload)
        return (
            [
                {
                    "field_path": field_path,
                    "field_type": field_type,
                    "field_value_text": field_value_text,
                }
                for field_path, field_type, field_value_text in flattened[:300]
            ],
            len(flattened),
        )

    def _snapshot_summary_from_payload(payload: Any) -> dict[str, Any]:
        fields, field_count = _fields_payload(payload)
        summary: dict[str, Any] = {}
        for item in fields[:8]:
            summary[item["field_path"]] = item["field_value_text"]
        if field_count and "field_count" not in summary:
            summary["field_count"] = field_count
        return summary

    def _build_snapshot(snapshot_type: str, source_url: str, payload: Any) -> dict[str, Any]:
        fields, field_count = _fields_payload(payload)
        collected_at = now()
        return {
            "id": make_id(f"{platform}_snapshot"),
            "snapshot_type": snapshot_type,
            "source_url": source_url,
            "field_count": field_count,
            "collected_at": collected_at,
            "summary": _snapshot_summary_from_payload(payload),
            "raw_payload": payload,
            "fields": fields,
        }

    def _creator_center_state(metadata: dict[str, Any]) -> dict[str, Any]:
        state = metadata.get("creator_center")
        return state if isinstance(state, dict) else {}

    def _list_snapshots(account_row: dict[str, Any]) -> list[dict[str, Any]]:
        metadata = _source_metadata(account_row)
        snapshots = _creator_center_state(metadata).get("snapshots") or []
        if not isinstance(snapshots, list):
            return []
        normalized = [item for item in snapshots if isinstance(item, dict)]
        normalized.sort(key=lambda item: str(item.get("collected_at") or ""), reverse=True)
        return normalized

    def _snapshot_brief(snapshot: dict[str, Any]) -> dict[str, Any]:
        return {
            "id": snapshot.get("id", ""),
            "snapshot_type": snapshot.get("snapshot_type", ""),
            "source_url": snapshot.get("source_url", ""),
            "field_count": snapshot.get("field_count", 0),
            "collected_at": snapshot.get("collected_at", ""),
            "summary": snapshot.get("summary") or {},
        }

    def _latest_snapshot(account_row: dict[str, Any], snapshot_type: str) -> dict[str, Any] | None:
        return next((item for item in _list_snapshots(account_row) if item.get("snapshot_type") == snapshot_type), None)

    def _snapshot_detail(account_row: dict[str, Any], snapshot_id: str) -> dict[str, Any]:
        snapshot = next((item for item in _list_snapshots(account_row) if item.get("id") == snapshot_id), None)
        if not snapshot:
            raise HTTPException(status_code=404, detail="Snapshot not found")
        return {
            "id": snapshot.get("id", ""),
            "snapshot_type": snapshot.get("snapshot_type", ""),
            "source_url": snapshot.get("source_url", ""),
            "field_count": snapshot.get("field_count", 0),
            "collected_at": snapshot.get("collected_at", ""),
            "summary": snapshot.get("summary") or {},
            "raw_payload": snapshot.get("raw_payload") or {},
            "fields": snapshot.get("fields") or [],
        }

    async def _collect_public_profile(
        source_url: str,
        manual_payload: dict[str, Any] | None,
        *,
        fallback_title: str = "",
        fallback_handle: str = "",
    ) -> dict[str, Any]:
        payloads: list[Any] = []
        errors: list[str] = []
        resolved_url = source_url.strip()
        if manual_payload:
            payloads.append(manual_payload)
        elif resolved_url:
            try:
                final_url, html = await _fetch_html(resolved_url)
                resolved_url = final_url
                blobs = _extract_json_blobs_from_html(html)
                payloads.extend([item["payload"] for item in blobs if isinstance(item.get("payload"), (dict, list))])
            except Exception as exc:
                errors.append(f"public_profile_fetch_failed: {exc}")
        profile = _pick_best_profile(
            payloads,
            fallback_url=resolved_url,
            fallback_title=fallback_title,
            fallback_handle=fallback_handle,
        )
        raw_payload: Any = {}
        if payloads:
            raw_payload = payloads[0] if len(payloads) == 1 else {"items": payloads[:8]}
        elif resolved_url:
            raw_payload = {
                "profile_url": resolved_url,
                "title": fallback_title,
                "handle": fallback_handle,
            }
        return {
            "profile": profile,
            "payload": raw_payload,
            "errors": errors,
            "source_url": resolved_url,
        }

    async def _collect_creator_center_pages(
        urls: list[str],
        cookie: str,
        manual_pages: list[PlatformManualPageCapture],
        *,
        fallback_title: str = "",
        fallback_handle: str = "",
    ) -> dict[str, Any]:
        pages: list[dict[str, Any]] = []
        errors: list[str] = []
        payloads: list[Any] = []

        for page in manual_pages:
            payload = page.payload if isinstance(page.payload, dict) else {}
            pages.append({"url": page.url, "title": page.title, "payload": payload})
            payloads.append(payload)

        candidate_urls = _dedupe_strings(urls or _default_creator_center_urls())
        if cookie.strip():
            for url in candidate_urls:
                try:
                    final_url, html = await _fetch_html(url, cookie=cookie)
                    blobs = _extract_json_blobs_from_html(html)
                    extracted_payloads = [item["payload"] for item in blobs if isinstance(item.get("payload"), (dict, list))]
                    payload = extracted_payloads[0] if len(extracted_payloads) == 1 else {"items": extracted_payloads[:8]}
                    if not extracted_payloads and html.strip():
                        payload = {"html_excerpt": _compact_text(html, 1600)}
                    pages.append({"url": final_url, "title": "", "payload": payload})
                    if extracted_payloads:
                        payloads.extend(extracted_payloads)
                    elif payload:
                        payloads.append(payload)
                except Exception as exc:
                    errors.append(f"creator_center_fetch_failed[{url}]: {exc}")

        profile = _pick_best_profile(
            payloads,
            fallback_title=fallback_title,
            fallback_handle=fallback_handle,
        )
        aggregated_payload = {
            "pages": pages,
            "page_count": len(pages),
        } if pages else {}
        return {
            "profile": profile,
            "payload": aggregated_payload,
            "pages": pages,
            "errors": errors,
        }

    def _resolve_project_id(requested_project_id: str, owner_id: str, existing: dict[str, Any] | None) -> str:
        if requested_project_id.strip():
            project_row = legacy.db.fetch_one(
                "SELECT * FROM projects WHERE id = ? AND user_id = ? LIMIT 1",
                (requested_project_id.strip(), owner_id),
            )
            if not project_row:
                raise HTTPException(status_code=400, detail="归属项目不存在或不属于当前账号")
            return project_row["id"]
        if existing and str(existing.get("project_id") or "").strip():
            return str(existing.get("project_id") or "")
        project_row = legacy.db.fetch_one(
            "SELECT * FROM projects WHERE user_id = ? ORDER BY updated_at DESC LIMIT 1",
            (owner_id,),
        )
        if project_row:
            return project_row["id"]
        raise HTTPException(status_code=400, detail="请先创建或选择归属项目")

    def _find_existing_account(user_id: str, profile_url: str, handle: str) -> dict[str, Any] | None:
        rows = _content_source_rows(user_id, platform, "creator_account")
        normalized_url = profile_url.strip()
        normalized_handle = handle.strip().lower()
        if normalized_url:
            for row in rows:
                if str(row.get("source_url") or "").strip() == normalized_url:
                    return row
        if normalized_handle:
            for row in rows:
                if str(row.get("handle") or "").strip().lower() == normalized_handle:
                    return row
        return None

    def _upsert_account_source(
        owner: dict[str, Any],
        sync_request: PlatformAccountSyncRequest,
        public_data: dict[str, Any],
        creator_data: dict[str, Any],
    ) -> dict[str, Any]:
        existing = _find_existing_account(owner["id"], sync_request.profile_url, sync_request.handle)
        project_id = _resolve_project_id(sync_request.project_id, owner["id"], existing)
        existing_metadata = _source_metadata(existing) if existing else {}
        existing_state = _creator_center_state(existing_metadata)
        existing_snapshots = existing_state.get("snapshots") or []
        existing_snapshots = [item for item in existing_snapshots if isinstance(item, dict)]
        public_profile = public_data.get("profile") or {}
        creator_profile = creator_data.get("profile") or {}
        resolved_profile = {
            "nickname": _first_non_empty(public_profile.get("nickname"), creator_profile.get("nickname"), sync_request.title, sync_request.handle),
            "signature": _first_non_empty(public_profile.get("signature"), creator_profile.get("signature"), existing_metadata.get("bio"), existing_metadata.get("description")),
            "avatar_url": _first_non_empty(public_profile.get("avatar_url"), creator_profile.get("avatar_url"), existing_metadata.get("avatar_url")),
            "profile_url": _first_non_empty(public_data.get("source_url"), sync_request.profile_url, public_profile.get("profile_url"), creator_profile.get("profile_url"), existing.get("source_url") if existing else ""),
            "handle": _first_non_empty(sync_request.handle, public_profile.get("handle"), creator_profile.get("handle"), existing.get("handle") if existing else ""),
            "stats": {
                "followers": public_profile.get("stats", {}).get("followers") or creator_profile.get("stats", {}).get("followers") or 0,
                "likes": public_profile.get("stats", {}).get("likes") or creator_profile.get("stats", {}).get("likes") or 0,
                "plays": public_profile.get("stats", {}).get("plays") or creator_profile.get("stats", {}).get("plays") or 0,
                "videos": public_profile.get("stats", {}).get("videos") or creator_profile.get("stats", {}).get("videos") or 0,
            },
            "tags": _dedupe_strings(
                (existing_metadata.get("tags") or [])
                + (public_profile.get("tags") or [])
                + (creator_profile.get("tags") or [])
                + _extract_keywords(
                    public_profile.get("nickname") or "",
                    creator_profile.get("nickname") or "",
                    public_profile.get("signature") or "",
                    creator_profile.get("signature") or "",
                )
            ),
            "keywords": _dedupe_strings(
                (existing_metadata.get("keywords") or [])
                + (public_profile.get("keywords") or [])
                + (creator_profile.get("keywords") or [])
                + _extract_keywords(
                    public_profile.get("nickname") or "",
                    creator_profile.get("nickname") or "",
                    public_profile.get("signature") or "",
                    creator_profile.get("signature") or "",
                )
            ),
        }
        new_snapshots: list[dict[str, Any]] = []
        if public_data.get("payload") not in ({}, None, "") or resolved_profile["profile_url"]:
            public_payload = public_data.get("payload") or {
                "profile_url": resolved_profile["profile_url"],
                "nickname": resolved_profile["nickname"],
                "handle": resolved_profile["handle"],
                "signature": resolved_profile["signature"],
                "avatar_url": resolved_profile["avatar_url"],
                "stats": resolved_profile["stats"],
            }
            new_snapshots.append(_build_snapshot("public_profile", resolved_profile["profile_url"], public_payload))
        if creator_data.get("payload"):
            creator_url = creator_data.get("pages", [{}])[0].get("url", "") if creator_data.get("pages") else ""
            new_snapshots.append(_build_snapshot("creator_center", creator_url, creator_data["payload"]))
        merged_snapshots = [*new_snapshots, *existing_snapshots][:12]
        latest_public_snapshot = next((item for item in merged_snapshots if item.get("snapshot_type") == "public_profile"), None)
        latest_creator_snapshot = next((item for item in merged_snapshots if item.get("snapshot_type") == "creator_center"), None)
        errors = [*public_data.get("errors", []), *creator_data.get("errors", [])]
        metadata = {
            **existing_metadata,
            "bio": resolved_profile["signature"],
            "description": resolved_profile["signature"],
            "avatar_url": resolved_profile["avatar_url"],
            "tags": resolved_profile["tags"],
            "keywords": resolved_profile["keywords"],
            "profile_stats": resolved_profile["stats"],
            "last_sync_error": "；".join([str(item) for item in errors if str(item).strip()]),
            "source_mode": "creator_center" if latest_creator_snapshot else "public",
            "last_public_sync_at": latest_public_snapshot.get("collected_at", "") if latest_public_snapshot else existing_metadata.get("last_public_sync_at", ""),
            "last_creator_sync_at": latest_creator_snapshot.get("collected_at", "") if latest_creator_snapshot else existing_metadata.get("last_creator_sync_at", ""),
            "discovery_note": sync_request.discovery_note or existing_metadata.get("discovery_note", ""),
            "creator_center": {
                "snapshots": merged_snapshots,
                "sync_errors": errors,
                "creator_center_urls": _dedupe_strings(sync_request.creator_center_urls or _default_creator_center_urls()),
                "last_synced_at": now(),
            },
        }
        account_id = existing["id"] if existing else make_id(f"{platform}_acct")
        created_at = existing["created_at"] if existing else now()
        title = _first_non_empty(sync_request.title, resolved_profile["nickname"], existing.get("title") if existing else "", resolved_profile["handle"], label)
        handle = _first_non_empty(sync_request.handle, resolved_profile["handle"], existing.get("handle") if existing else "")
        source_url = resolved_profile["profile_url"]
        if existing:
            legacy.db.execute(
                """
                UPDATE content_sources
                SET project_id = ?, handle = ?, source_url = ?, title = ?, metadata_json = ?, updated_at = ?
                WHERE id = ?
                """,
                (
                    project_id,
                    handle,
                    source_url,
                    title,
                    _safe_json_dumps(metadata),
                    now(),
                    account_id,
                ),
            )
        else:
            legacy.db.execute(
                """
                INSERT INTO content_sources (
                    id, user_id, project_id, source_kind, platform, handle, source_url, title, local_path,
                    metadata_json, created_at, updated_at
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """,
                (
                    account_id,
                    owner["id"],
                    project_id,
                    "creator_account",
                    platform,
                    handle,
                    source_url,
                    title,
                    "",
                    _safe_json_dumps(metadata),
                    created_at,
                    now(),
                ),
            )
        row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (account_id,))
        assert row is not None
        return row

    def _upsert_account_videos(account_row: dict[str, Any], videos: list[dict[str, Any]]) -> list[dict[str, Any]]:
        if not videos:
            return []
        project_id = account_row.get("project_id", "")
        source_account_url = str(account_row.get("source_url") or "").strip()
        existing_rows = _linked_video_sources(account_row)
        existing_by_url: dict[str, dict[str, Any]] = {}
        existing_by_external_id: dict[str, dict[str, Any]] = {}
        for row in existing_rows:
            metadata = _source_metadata(row)
            source_url = str(row.get("source_url") or "").strip()
            external_id = str(metadata.get("external_id") or "").strip()
            if source_url:
                existing_by_url[source_url] = row
            if external_id:
                existing_by_external_id[external_id] = row

        saved_rows: list[dict[str, Any]] = []
        for index, video in enumerate(videos, start=1):
            share_url = str(video.get("share_url") or "").strip()
            external_id = str(video.get("external_id") or "").strip()
            if not share_url and not external_id:
                continue
            metadata = {
                "summary": video.get("description") or "",
                "description": video.get("description") or "",
                "cover_url": video.get("cover_url") or "",
                "published_at": video.get("published_at") or "",
                "tags": video.get("tags") or [],
                "content_type": "video",
                "duration_sec": float(video.get("duration_sec") or 0),
                "external_id": external_id,
                "origin_content_source_id": account_row["id"],
                "source_account_url": source_account_url,
                "stats": video.get("stats") or {},
                "performance_score": float(video.get("performance_score") or 0),
                "raw_payload": video.get("raw") or {},
            }
            existing = (existing_by_url.get(share_url) if share_url else None) or (existing_by_external_id.get(external_id) if external_id else None)
            if existing:
                merged_metadata = {
                    **_source_metadata(existing),
                    **metadata,
                }
                legacy.db.execute(
                    """
                    UPDATE content_sources
                    SET source_url = ?, title = ?, metadata_json = ?, updated_at = ?
                    WHERE id = ?
                    """,
                    (
                        share_url or str(existing.get("source_url") or ""),
                        video.get("title") or existing.get("title") or f"{label} 作品 {index}",
                        _safe_json_dumps(merged_metadata),
                        now(),
                        existing["id"],
                    ),
                )
                row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (existing["id"],))
            else:
                row_id = make_id(f"{platform}_video")
                created_at = now()
                legacy.db.execute(
                    """
                    INSERT INTO content_sources (
                        id, user_id, project_id, source_kind, platform, handle, source_url, title, local_path,
                        metadata_json, created_at, updated_at
                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                    """,
                    (
                        row_id,
                        account_row["user_id"],
                        project_id,
                        "video_link",
                        platform,
                        account_row.get("handle", ""),
                        share_url,
                        video.get("title") or f"{label} 作品 {index}",
                        "",
                        _safe_json_dumps(metadata),
                        created_at,
                        created_at,
                    ),
                )
                row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row_id,))
            if row:
                saved_rows.append(row)
                if share_url:
                    existing_by_url[share_url] = row
                if external_id:
                    existing_by_external_id[external_id] = row
        return saved_rows

    def _require_account(account_id: str, user_id: str) -> dict[str, Any]:
        row = legacy.db.fetch_one(
            "SELECT * FROM content_sources WHERE id = ? AND user_id = ? AND source_kind = 'creator_account' AND platform = ?",
            (account_id, user_id, platform),
        )
        if not row:
            raise HTTPException(status_code=404, detail=f"{label} account not found")
        return row

    def _linked_video_sources(account_row: dict[str, Any]) -> list[dict[str, Any]]:
        project_id = account_row.get("project_id", "")
        rows = legacy.db.fetch_all(
            "SELECT * FROM content_sources WHERE user_id = ? AND project_id = ? AND source_kind = 'video_link' AND platform = ? ORDER BY updated_at DESC, created_at DESC",
            (account_row["user_id"], project_id, platform),
        )
        account_id = account_row["id"]
        source_url = str(account_row.get("source_url") or "").strip()
        linked: list[dict[str, Any]] = []
        for row in rows:
            metadata = _source_metadata(row)
            if metadata.get("origin_content_source_id") == account_id or metadata.get("source_account_url") == source_url:
                linked.append(row)
        return linked

    def _jobs_for_source(source_id: str) -> list[dict[str, Any]]:
        return legacy.db.fetch_all(
            "SELECT * FROM jobs WHERE content_source_id = ? ORDER BY created_at DESC",
            (source_id,),
        )

    def _latest_job_for_source(source_id: str) -> dict[str, Any] | None:
        return legacy.db.fetch_one(
            "SELECT * FROM jobs WHERE content_source_id = ? ORDER BY created_at DESC LIMIT 1",
            (source_id,),
        )

    def _extract_performance_score(job_row: dict[str, Any] | None) -> float:
        if not job_row:
            return 0.0
        result_map = _parse_json(job_row.get("result_json") or "{}", {})
        artifacts_map = _parse_json(job_row.get("artifacts_json") or "{}", {})
        candidates = [
            result_map.get("performance_score"),
            (result_map.get("analysis") or {}).get("performance_score"),
            (result_map.get("scores") or {}).get("performance_score"),
            artifacts_map.get("performance_score"),
            (artifacts_map.get("scores") or {}).get("performance_score"),
        ]
        for value in candidates:
            try:
                return float(value)
            except (TypeError, ValueError):
                continue
        return 0.0

    def _extract_metrics(job_row: dict[str, Any] | None) -> dict[str, Any]:
        if not job_row:
            return {}
        result_map = _parse_json(job_row.get("result_json") or "{}", {})
        artifacts_map = _parse_json(job_row.get("artifacts_json") or "{}", {})
        return (
            result_map.get("metrics")
            or artifacts_map.get("metrics")
            or result_map.get("stats")
            or artifacts_map.get("stats")
            or {}
        )

    def _video_payload(source_row: dict[str, Any]) -> dict[str, Any]:
        payload = _content_source_payload(source_row)
        metadata = payload.get("metadata", {})
        latest_job = _latest_job_for_source(source_row["id"])
        metrics = _extract_metrics(latest_job)
        metadata_stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {}
        tags = metadata.get("tags") or []
        if not isinstance(tags, list):
            tags = []
        resolved_stats = {
            "play": metrics.get("play_count") or metrics.get("play") or metadata_stats.get("play_count") or metadata_stats.get("play") or 0,
            "like": metrics.get("like_count") or metrics.get("like") or metadata_stats.get("like_count") or metadata_stats.get("like") or 0,
            "comment": metrics.get("comment_count") or metrics.get("comment") or metadata_stats.get("comment_count") or metadata_stats.get("comment") or 0,
            "share": metrics.get("share_count") or metrics.get("share") or metadata_stats.get("share_count") or metadata_stats.get("share") or 0,
        }
        performance_score = _extract_performance_score(latest_job)
        if performance_score <= 0:
            performance_score = _parse_count(metadata.get("performance_score")) or _heuristic_video_performance_score(resolved_stats)
        return {
            "id": source_row["id"],
            "aweme_id": str(metadata.get("external_id") or source_row["id"]),
            "title": payload.get("title") or "未命名作品",
            "description": metadata.get("summary") or metadata.get("description") or (latest_job or {}).get("style_summary", ""),
            "share_url": payload.get("source_url", ""),
            "cover_url": metadata.get("cover_url") or "",
            "duration_sec": float(metadata.get("duration_sec") or 0),
            "published_at": metadata.get("published_at") or source_row.get("created_at"),
            "tags": tags,
            "content_type": metadata.get("content_type") or "video",
            "stats": resolved_stats,
            "score": {
                "performance_score": performance_score,
            },
            "source": payload,
            "latest_job_id": (latest_job or {}).get("id", ""),
        }

    def _account_payload(account_row: dict[str, Any]) -> dict[str, Any]:
        payload = _content_source_payload(account_row)
        metadata = payload.get("metadata", {})
        videos = [_video_payload(item) for item in _linked_video_sources(account_row)]
        play_values = [float(video["stats"].get("play") or 0) for video in videos if float(video["stats"].get("play") or 0) > 0]
        like_values = [float(video["stats"].get("like") or 0) for video in videos if float(video["stats"].get("like") or 0) > 0]
        tags = metadata.get("tags") or []
        if not isinstance(tags, list):
            tags = []
        return {
            "id": account_row["id"],
            "platform": platform,
            "profile_url": payload.get("source_url", ""),
            "canonical_profile_url": payload.get("source_url", ""),
            "handle": payload.get("handle", ""),
            "nickname": payload.get("title") or payload.get("handle") or "未命名账号",
            "signature": metadata.get("bio") or metadata.get("description") or "",
            "avatar_url": metadata.get("avatar_url") or "",
            "tags": tags,
            "keywords": metadata.get("keywords") or [],
            "profile_stats": metadata.get("profile_stats") or {},
            "sync_status": "ready" if payload.get("metadata", {}).get("last_sync_error", "") == "" else "partial",
            "video_summary": {
                "count": len(videos),
                "avg_play": sum(play_values) / len(play_values) if play_values else 0,
                "avg_like": sum(like_values) / len(like_values) if like_values else 0,
                "videos": videos[:8],
            },
            "project_id": payload.get("project_id", ""),
            "created_at": payload.get("created_at", ""),
            "updated_at": payload.get("updated_at", ""),
        }

    def _relation_payload(row: dict[str, Any]) -> dict[str, Any]:
        target = None
        if row.get("target_account_id"):
            target = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["target_account_id"],))
        return {
            "id": row["id"],
            "source_account_id": row["source_account_id"],
            "target_account_id": row.get("target_account_id", "") or "",
            "target_profile_url": row.get("target_profile_url", ""),
            "target_nickname": (_account_payload(target)["nickname"] if target else ""),
            "relation_type": row.get("relation_type", "benchmark"),
            "note": row.get("note", ""),
            "search_id": row.get("search_id", ""),
            "created_at": row["created_at"],
        }

    def _model_profile_payload(row: dict[str, Any]) -> dict[str, Any]:
        return {
            "id": row["id"],
            "name": row["name"],
            "model_name": row["model_name"],
            "base_url": row["base_url"],
            "is_default": bool(row.get("is_default", 0)),
        }

    def _report_payload(row: dict[str, Any]) -> dict[str, Any]:
        suggestions = [
            {
                "id": suggestion["id"],
                "status": suggestion.get("status", "ok"),
                "model_profile_id": suggestion.get("model_profile_id", ""),
                "model_label": suggestion.get("model_label", ""),
                "suggestion_text": suggestion.get("suggestion_text", ""),
                "parsed_json": _parse_json(suggestion.get("parsed_json") or "{}", {}),
                "created_at": suggestion.get("created_at", ""),
            }
            for suggestion in legacy.db.fetch_all(
                f"SELECT * FROM {table_prefix}_analysis_suggestions WHERE report_id = ? ORDER BY created_at ASC",
                (row["id"],),
            )
        ]
        return {
            "id": row["id"],
            "focus_text": row.get("focus_text", ""),
            "model_profile_ids": _parse_json(row.get("model_profile_ids_json") or "[]", []),
            "linked_account_ids": _parse_json(row.get("linked_account_ids_json") or "[]", []),
            "suggestions": suggestions,
            "created_at": row["created_at"],
        }

    def _workspace_payload(account_row: dict[str, Any]) -> dict[str, Any]:
        reports = legacy.db.fetch_all(
            f"SELECT * FROM {table_prefix}_analysis_reports WHERE account_source_id = ? ORDER BY created_at DESC LIMIT 6",
            (account_row["id"],),
        )
        relations = legacy.db.fetch_all(
            f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC",
            (account_row["id"],),
        )
        recent_searches = legacy.db.fetch_all(
            f"SELECT * FROM {table_prefix}_similarity_searches WHERE source_account_id = ? ORDER BY created_at DESC LIMIT 5",
            (account_row["id"],),
        )
        latest_public_snapshot = _latest_snapshot(account_row, "public_profile")
        latest_creator_snapshot = _latest_snapshot(account_row, "creator_center")
        return {
            "account": _account_payload(account_row),
            "latest_public_snapshot": _snapshot_brief(latest_public_snapshot) if latest_public_snapshot else None,
            "latest_creator_snapshot": _snapshot_brief(latest_creator_snapshot) if latest_creator_snapshot else None,
            "recent_reports": [_report_payload(row) for row in reports],
            "linked_accounts": [_relation_payload(row) for row in relations],
            "recent_similarity_searches": [
                {
                    "id": row["id"],
                    "prompt_text": row.get("prompt_text", ""),
                    "context": _parse_json(row.get("context_json") or "{}", {}),
                    "created_at": row["created_at"],
                }
                for row in recent_searches
            ],
            "available_model_profiles": [_model_profile_payload(row) for row in legacy.db.fetch_all(
                """
                SELECT *
                FROM model_profiles
                WHERE owner_account_id IS NULL OR owner_account_id = ?
                ORDER BY is_default DESC, created_at ASC
                """,
                (account_row["user_id"],),
            )],
        }

    async def _call_reasoning_model(user_id: str, prompt: str, *, system_prompt: str, model_profile_id: str = "", temperature: float = 0.3) -> tuple[str, dict[str, Any]]:
        profile = legacy.model_profile_for_account(user_id, model_profile_id or None)
        output = await legacy.call_model(profile, system_prompt=system_prompt, user_prompt=prompt, temperature=temperature)
        parsed = legacy.parse_json_object(output)
        return output, parsed if isinstance(parsed, dict) else {}

    def _resolve_model_profiles(user_id: str, requested_ids: list[str] | None) -> list[dict[str, Any]]:
        normalized_ids = _normalize_id_list(requested_ids)
        profiles: list[dict[str, Any]] = []
        seen: set[str] = set()
        for profile_id in normalized_ids:
            try:
                profile = legacy.model_profile_for_account(user_id, profile_id)
            except Exception:
                continue
            resolved_id = str(profile.get("id") or "").strip()
            if not resolved_id or resolved_id in seen:
                continue
            seen.add(resolved_id)
            profiles.append(profile)
        if profiles:
            return profiles
        fallback = legacy.model_profile_for_account(user_id, None)
        fallback_id = str(fallback.get("id") or "").strip()
        return [fallback] if fallback_id else []

    def _linked_account_context(
        account_row: dict[str, Any],
        requested_linked_account_ids: list[str] | None,
        *,
        include_linked_accounts: bool,
        limit: int = 5,
    ) -> list[dict[str, Any]]:
        normalized_requested = set(_normalize_id_list(requested_linked_account_ids))
        relations = legacy.db.fetch_all(
            f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC",
            (account_row["id"],),
        )
        items: list[dict[str, Any]] = []
        for relation in relations:
            target_account_id = str(relation.get("target_account_id") or "").strip()
            if normalized_requested and target_account_id and target_account_id not in normalized_requested:
                continue
            if not include_linked_accounts and not (normalized_requested and target_account_id in normalized_requested):
                continue
            target_account = _require_account(target_account_id, account_row["user_id"]) if target_account_id else None
            items.append(
                {
                    "relation_id": relation["id"],
                    "relation_type": relation.get("relation_type", "benchmark"),
                    "note": relation.get("note", ""),
                    "target_account_id": target_account_id,
                    "target_profile_url": relation.get("target_profile_url", ""),
                    "target_nickname": _account_payload(target_account).get("nickname", "") if target_account else "",
                    "account": _account_payload(target_account) if target_account else None,
                }
            )
        return items[:limit]

    def _recent_similarity_candidates_context(account_row: dict[str, Any], limit: int = 6) -> list[dict[str, Any]]:
        candidate_rows = legacy.db.fetch_all(
            f"""
            SELECT candidates.*
            FROM {table_prefix}_similarity_candidates AS candidates
            JOIN {table_prefix}_similarity_searches AS searches
              ON searches.id = candidates.search_id
            WHERE searches.source_account_id = ?
            ORDER BY searches.created_at DESC, candidates.rank_index ASC
            LIMIT ?
            """,
            (account_row["id"], limit),
        )
        items: list[dict[str, Any]] = []
        for row in candidate_rows:
            payload = _parse_json(row.get("raw_output_json") or "{}", {})
            payload.setdefault("candidate_account_id", row.get("candidate_account_id", "") or "")
            payload.setdefault("candidate_profile_url", row.get("candidate_profile_url", ""))
            payload.setdefault("candidate_nickname", payload.get("candidate_nickname", ""))
            payload.setdefault("rationale_text", row.get("rationale_text", ""))
            payload.setdefault("heuristic_score", row.get("heuristic_score", 0))
            payload.setdefault("agent_score", row.get("agent_score", 0))
            items.append(payload)
        return items

    async def _analyze_top_videos_for_account(
        account_row: dict[str, Any],
        user_id: str,
        *,
        model_profile_id: str = "",
        top_video_count: int = 5,
        min_score: float = 0,
        temperature: float = 0.25,
    ) -> list[dict[str, Any]]:
        videos = [_video_payload(row) for row in _linked_video_sources(account_row)]
        ranked = [
            video for video in sorted(
                videos,
                key=lambda item: (item["score"]["performance_score"], item.get("published_at") or ""),
                reverse=True,
            )
            if float(video["score"]["performance_score"] or 0) >= float(min_score or 0)
        ][: max(1, min(top_video_count, 12))]
        results: list[dict[str, Any]] = []
        for video in ranked:
            prompt = (
                f"请拆解这条{label}作品为什么值得关注，输出 summary、borrow_points、risks。"
                f"\n\n输入：\n{json.dumps(video, ensure_ascii=False, indent=2)}"
            )
            output, parsed = await _call_reasoning_model(
                user_id,
                prompt,
                system_prompt="你是短视频内容拆解助手。尽量输出 JSON，字段包括 summary、borrow_points、risks。",
                model_profile_id=model_profile_id,
                temperature=temperature,
            )
            summary_text = str(parsed.get("summary") or parsed.get("headline_summary") or output)[:240]
            results.append(
                {
                    "id": make_id(f"{platform}_va"),
                    "video_id": video["id"],
                    "video_title": video["title"],
                    "status": "ok",
                    "summary_text": summary_text,
                    "parsed_json": parsed,
                    "performance_score": video["score"]["performance_score"],
                    "created_at": now(),
                }
            )
        return results

    def _similarity_tokens(payload: dict[str, Any], extra_text: str = "") -> set[str]:
        return {
            token.lower()
            for token in _extract_keywords(
                payload.get("nickname") or "",
                payload.get("signature") or "",
                " ".join(payload.get("tags") or []),
                " ".join(payload.get("keywords") or []),
                extra_text,
            )
            if token
        }

    def _candidate_title_from_url(url: str) -> str:
        cleaned = str(url or "").strip().rstrip("/")
        tail = cleaned.rsplit("/", 1)[-1] if cleaned else ""
        tail = tail.split("?", 1)[0].replace("-", " ").replace("_", " ").strip()
        return tail or cleaned or "候选账号"

    async def _create_sync_job_for_account(account_row: dict[str, Any], assistant_id: str = "") -> dict[str, Any]:
        project_id = account_row.get("project_id") or ""
        if not project_id:
            raise HTTPException(status_code=400, detail="Account source is not attached to a project")
        kb = legacy.resolve_target_kb(account_row["user_id"], None, project_id)
        source_payload = _content_source_payload(account_row)
        profile = legacy.model_profile_for_account(account_row["user_id"], None)
        job_row = legacy.create_job_record(
            account_id=account_row["user_id"],
            project_id=project_id,
            knowledge_base_id=kb["id"],
            content_source_id=account_row["id"],
            assistant_id=assistant_id or None,
            source_type="creator_account",
            line_type="analysis",
            workflow_key="content_source_sync_pipeline",
            title=f"{source_payload.get('title') or source_payload.get('handle') or label} 内容同步",
            language="auto",
            source_url=source_payload.get("source_url", ""),
            artifacts={
                "source_account_url": source_payload.get("source_url", ""),
                "platform": platform,
                "handle": source_payload.get("handle", ""),
                "max_items": int(source_payload.get("metadata", {}).get("max_items") or 5),
                "skip_existing": True,
                "auto_trigger_analysis": True,
            },
            analysis_model_profile_id=profile["id"],
        )
        queued = await legacy.trigger_orchestrated_job(job_row)
        return legacy.job_payload(queued)

    def _tracking_cursor(user_id: str) -> dict[str, Any] | None:
        return legacy.db.fetch_one(
            f"SELECT * FROM {table_prefix}_tracking_cursors WHERE user_id = ?",
            (user_id,),
        )

    def _set_tracking_cursor(user_id: str, last_seen_at: str) -> dict[str, Any]:
        existing = _tracking_cursor(user_id)
        updated_at = now()
        if existing:
            legacy.db.execute(
                f"UPDATE {table_prefix}_tracking_cursors SET last_seen_at = ?, updated_at = ? WHERE user_id = ?",
                (last_seen_at, updated_at, user_id),
            )
        else:
            legacy.db.execute(
                f"INSERT INTO {table_prefix}_tracking_cursors (user_id, last_seen_at, updated_at) VALUES (?, ?, ?)",
                (user_id, last_seen_at, updated_at),
            )
        return legacy.db.fetch_one(
            f"SELECT * FROM {table_prefix}_tracking_cursors WHERE user_id = ?",
            (user_id,),
        )

    def _tracked_account_payload(tracked_row: dict[str, Any]) -> dict[str, Any]:
        assistant_name = ""
        if tracked_row.get("assistant_id"):
            assistant_row = legacy.db.fetch_one("SELECT * FROM assistants WHERE id = ?", (tracked_row["assistant_id"],))
            if assistant_row:
                assistant_name = legacy.assistant_payload(assistant_row).get("name", "")
        account_row = _require_account(tracked_row["tracked_account_id"], tracked_row["user_id"])
        return {
            "id": tracked_row["id"],
            "platform": platform,
            "tracked_account_id": tracked_row["tracked_account_id"],
            "assistant_id": tracked_row.get("assistant_id", "") or "",
            "assistant_name": assistant_name,
            "note": tracked_row.get("note", ""),
            "created_at": tracked_row.get("created_at", ""),
            "updated_at": tracked_row["updated_at"],
            "account": _account_payload(account_row),
        }

    def _list_tracked_accounts(user_id: str) -> list[dict[str, Any]]:
        rows = legacy.db.fetch_all(
            f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? ORDER BY updated_at DESC",
            (user_id,),
        )
        return [_tracked_account_payload(row) for row in rows]

    def _tracking_digest_item(tracked_item: dict[str, Any], video: dict[str, Any]) -> dict[str, Any]:
        latest_job = _latest_job_for_source(video["id"])
        summary = (latest_job or {}).get("style_summary") or video.get("description") or "已发现更新内容"
        borrowing_points = [point for point in [summary[:36], video.get("title", "")[:36]] if point]
        performance_score = float(video.get("score", {}).get("performance_score") or 0)
        return {
            "tracking_id": tracked_item["id"],
            "platform": platform,
            "tracked_account_id": tracked_item["tracked_account_id"],
            "tracked_account_name": tracked_item["account"]["nickname"],
            "assistant_id": tracked_item.get("assistant_id", "") or "",
            "assistant_name": tracked_item.get("assistant_name", ""),
            "note": tracked_item.get("note", ""),
            "account": tracked_item["account"],
            "video": video,
            "summary": summary,
            "summary_text": summary,
            "borrowing_points": borrowing_points[:3],
            "is_high_value": performance_score >= 60,
            "created_at": video.get("published_at") or now(),
        }

    def _tracking_digest(user_id: str, since_value: str = "", limit: int = 24) -> dict[str, Any]:
        tracked_items = _list_tracked_accounts(user_id)
        cursor = _tracking_cursor(user_id)
        threshold = (since_value or (cursor or {}).get("last_seen_at") or "").strip()
        items: list[dict[str, Any]] = []
        for tracked in tracked_items:
            for video in tracked["account"]["video_summary"]["videos"]:
                published_at = str(video.get("published_at") or "")
                if threshold and published_at and published_at <= threshold:
                    continue
                items.append(_tracking_digest_item(tracked, video))
        items.sort(key=lambda item: item.get("created_at", ""), reverse=True)
        return {
            "generated_at": now(),
            "since": threshold,
            "items": items[:limit],
            "tracked_accounts": tracked_items,
            "cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""),
        }

    @app.post(f"/v2/{platform}/accounts/sync")
    async def sync_platform_account(
        request: PlatformAccountSyncRequest,
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        if (
            not request.profile_url.strip()
            and not request.manual_profile_payload
            and not request.manual_creator_pages
        ):
            raise HTTPException(
                status_code=400,
                detail="profile_url、manual_profile_payload 或 manual_creator_pages 至少需要传一个",
            )
        public_data = await _collect_public_profile(
            request.profile_url,
            request.manual_profile_payload,
            fallback_title=request.title,
            fallback_handle=request.handle,
        )
        creator_data = await _collect_creator_center_pages(
            request.creator_center_urls,
            request.session_cookie,
            request.manual_creator_pages,
            fallback_title=request.title,
            fallback_handle=request.handle,
        )
        if (
            not public_data["profile"].get("nickname")
            and not public_data["profile"].get("profile_url")
            and not creator_data["pages"]
        ):
            raise HTTPException(status_code=400, detail=f"No {label} profile or creator-center data could be extracted")
        account_row = _upsert_account_source(account, request, public_data, creator_data)
        _upsert_account_videos(
            account_row,
            _extract_videos([
                public_data.get("payload") or {},
                creator_data.get("payload") or {},
            ]),
        )
        account_row = _require_account(account_row["id"], account["id"])
        workspace = _workspace_payload(account_row)
        workspace["sync_errors"] = [*public_data["errors"], *creator_data["errors"]]
        return workspace

    @app.get(f"/v2/{platform}/accounts")
    def list_platform_accounts(account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]:
        return [_account_payload(row) for row in _content_source_rows(account["id"], platform, "creator_account")]

    @app.get(f"/v2/{platform}/accounts/{{account_id}}/workspace")
    def get_platform_account_workspace(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
        account_row = _require_account(account_id, account["id"])
        return _workspace_payload(account_row)

    @app.get(f"/v2/{platform}/accounts/{{account_id}}/analysis-reports")
    def list_platform_analysis_reports(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]:
        account_row = _require_account(account_id, account["id"])
        return _workspace_payload(account_row)["recent_reports"]

    @app.get(f"/v2/{platform}/accounts/{{account_id}}/snapshots")
    def list_platform_snapshots(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]:
        account_row = _require_account(account_id, account["id"])
        return [_snapshot_brief(item) for item in _list_snapshots(account_row)]

    @app.get(f"/v2/{platform}/accounts/{{account_id}}/snapshots/{{snapshot_id}}")
    def get_platform_snapshot_detail(
        account_id: str,
        snapshot_id: str,
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        account_row = _require_account(account_id, account["id"])
        return _snapshot_detail(account_row, snapshot_id)

    @app.get(f"/v2/{platform}/accounts/{{account_id}}/creator-fields")
    def get_platform_creator_fields(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
        account_row = _require_account(account_id, account["id"])
        latest_creator_snapshot = _latest_snapshot(account_row, "creator_center")
        if not latest_creator_snapshot:
            raise HTTPException(status_code=404, detail="No creator-center snapshot found")
        return _snapshot_detail(account_row, str(latest_creator_snapshot.get("id") or ""))

    @app.get(f"/v2/{platform}/accounts/{{account_id}}/videos")
    def list_platform_account_videos(
        account_id: str,
        limit: int = Query(default=80, ge=1, le=200),
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        account_row = _require_account(account_id, account["id"])
        items = [_video_payload(row) for row in _linked_video_sources(account_row)]
        items.sort(key=lambda item: (item["score"]["performance_score"], item.get("published_at") or ""), reverse=True)
        top_ids = [item["id"] for item in items if float(item["score"]["performance_score"] or 0) >= 60][:12]
        latest_ids = [item["id"] for item in sorted(items, key=lambda item: item.get("published_at") or "", reverse=True)[:12]]
        return {
            "items": items[:limit],
            "count": len(items),
            "meta": {"platform": platform, "account_id": account_id},
            "top_scored_video_ids": top_ids,
            "latest_video_ids": latest_ids,
            "high_score_threshold": 60,
        }

    @app.post(f"/v2/{platform}/accounts/{{account_id}}/analysis")
    async def analyze_platform_account(
        account_id: str,
        request: PlatformAnalysisRequest,
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        account_row = _require_account(account_id, account["id"])
        workspace = _workspace_payload(account_row)
        latest_public_snapshot = _latest_snapshot(account_row, "public_profile")
        latest_creator_snapshot = _latest_snapshot(account_row, "creator_center")
        profiles = _resolve_model_profiles(account["id"], request.model_profile_ids)
        selected_model_profile_ids = [str(profile.get("id") or "").strip() for profile in profiles if str(profile.get("id") or "").strip()]
        requested_linked_account_ids = _normalize_id_list(request.linked_account_ids)
        linked_accounts = _linked_account_context(
            account_row,
            requested_linked_account_ids,
            include_linked_accounts=bool(request.include_linked_accounts),
            limit=6,
        )
        recent_similarity_candidates = (
            _recent_similarity_candidates_context(account_row, limit=6)
            if request.include_recent_similar_candidates
            else []
        )
        context = {
            "account": workspace["account"],
            "top_videos": workspace["account"]["video_summary"]["videos"][: max(1, min(request.max_videos, 8))],
            "linked_accounts": linked_accounts,
            "recent_similar_candidates": recent_similarity_candidates,
            "creator_center": {
                "snapshot_count": len(_list_snapshots(account_row)),
                "latest_public_snapshot": _snapshot_brief(latest_public_snapshot) if latest_public_snapshot else None,
                "latest_creator_snapshot": _snapshot_detail(account_row, str(latest_creator_snapshot.get("id") or "")) if latest_creator_snapshot else None,
            },
            "request_options": {
                "include_linked_accounts": bool(request.include_linked_accounts),
                "include_recent_similar_candidates": bool(request.include_recent_similar_candidates),
                "auto_analyze_top_videos": bool(request.auto_analyze_top_videos),
                "top_video_analysis_count": int(request.top_video_analysis_count),
                "max_videos": int(request.max_videos),
            },
            "requested_model_profile_ids": _normalize_id_list(request.model_profile_ids),
            "selected_model_profile_ids": selected_model_profile_ids,
            "extra_focus": request.extra_focus,
        }
        prompt = (
            f"请从新媒体商业化运营视角，分析这个{label}账号，输出执行摘要、可借鉴点、风险提醒和下一步动作。"
            f"\n\n输入：\n{json.dumps(context, ensure_ascii=False, indent=2)}"
        )
        report_id = make_id(f"{platform}_report")
        legacy.db.execute(
            f"""INSERT INTO {table_prefix}_analysis_reports
            (id, user_id, account_source_id, focus_text, model_profile_ids_json, linked_account_ids_json, prompt_text, context_json, created_at)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                report_id,
                account["id"],
                account_row["id"],
                request.extra_focus or "",
                _safe_json_dumps(selected_model_profile_ids),
                _safe_json_dumps([item.get("target_account_id", "") for item in linked_accounts if item.get("target_account_id")]),
                prompt,
                _safe_json_dumps(context),
                now(),
            ),
        )

        suggestions: list[dict[str, Any]] = []
        for profile in profiles:
            output, parsed = await _call_reasoning_model(
                account["id"],
                prompt,
                system_prompt="你是新媒体账号分析顾问。尽量输出 JSON，字段包括 executive_summary、borrow_points、risks、next_actions。",
                model_profile_id=str(profile.get("id") or ""),
                temperature=request.temperature,
            )
            suggestion_id = make_id(f"{platform}_suggestion")
            model_label = f"{profile.get('name', '')} · {profile.get('model_name', '')}".strip(" ·")
            legacy.db.execute(
                f"INSERT INTO {table_prefix}_analysis_suggestions (id, report_id, model_profile_id, model_label, status, suggestion_text, parsed_json, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
                (
                    suggestion_id,
                    report_id,
                    profile["id"],
                    model_label,
                    "ok",
                    output[:4000],
                    _safe_json_dumps(parsed),
                    now(),
                ),
            )
            suggestions.append(
                {
                    "id": suggestion_id,
                    "status": "ok",
                    "model_profile_id": profile["id"],
                    "model_label": model_label,
                    "suggestion_text": output[:4000],
                    "parsed_json": parsed,
                    "created_at": now(),
                }
            )

        top_video_min_score = 45.0
        if not any(float(item.get("score", {}).get("performance_score") or 0) >= top_video_min_score for item in context["top_videos"]):
            top_video_min_score = 0.0
        top_video_analyses = (
            await _analyze_top_videos_for_account(
                account_row,
                account["id"],
                model_profile_id=selected_model_profile_ids[0] if selected_model_profile_ids else "",
                top_video_count=request.top_video_analysis_count,
                min_score=top_video_min_score,
                temperature=min(max(request.temperature, 0.15), 0.4),
            )
            if request.auto_analyze_top_videos and profiles
            else []
        )
        report_row = legacy.db.fetch_one(
            f"SELECT * FROM {table_prefix}_analysis_reports WHERE id = ?",
            (report_id,),
        )
        report_payload = _report_payload(report_row)
        return {
            "report_id": report_id,
            "account_id": account_row["id"],
            "created_at": now(),
            "suggestions": report_payload["suggestions"] or suggestions,
            "context": context,
            "top_video_analyses": top_video_analyses,
        }

    @app.post(f"/v2/{platform}/accounts/{{account_id}}/videos/analyze-top")
    async def analyze_platform_top_videos(
        account_id: str,
        request: PlatformTopVideoAnalysisRequest,
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        account_row = _require_account(account_id, account["id"])
        results = await _analyze_top_videos_for_account(
            account_row,
            account["id"],
            model_profile_id=request.model_profile_id,
            top_video_count=request.top_video_count,
            min_score=request.min_score,
            temperature=request.temperature,
        )
        return {
            "account_id": account_row["id"],
            "analyzed_count": len(results),
            "items": results,
        }

    @app.post(f"/v2/{platform}/similar-searches")
    async def create_platform_similarity_search(
        request: PlatformSimilaritySearchRequest,
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        account_row = _require_account(request.source_account_id, account["id"])
        source_payload = _account_payload(account_row)
        ranked_candidates: list[dict[str, Any]] = []
        seen_keys: set[str] = set()
        source_tokens = _similarity_tokens(source_payload)
        extra_requirement_tokens = {
            token.lower()
            for token in _extract_keywords(request.extra_requirements or "")
            if token
        }

        def _append_candidate(item: dict[str, Any]) -> None:
            candidate_account_id = str(item.get("candidate_account_id") or "").strip()
            candidate_profile_url = str(item.get("candidate_profile_url") or "").strip()
            dedupe_key = candidate_account_id or candidate_profile_url.lower()
            if not dedupe_key or dedupe_key in seen_keys:
                return
            seen_keys.add(dedupe_key)
            ranked_candidates.append(item)

        if request.seed_linked_accounts:
            for linked in _linked_account_context(account_row, [], include_linked_accounts=True, limit=max(4, request.max_candidates)):
                linked_account = linked.get("account") or {}
                candidate_tokens = _similarity_tokens(linked_account)
                source_overlap = len(source_tokens.intersection(candidate_tokens))
                requirement_overlap = len(extra_requirement_tokens.intersection(candidate_tokens))
                heuristic = 65 + source_overlap * 6 + requirement_overlap * 8
                _append_candidate(
                    {
                        "candidate_account_id": linked.get("target_account_id", ""),
                        "candidate_profile_url": linked.get("target_profile_url") or linked_account.get("profile_url", ""),
                        "candidate_nickname": linked_account.get("nickname", "") or linked.get("target_nickname", ""),
                        "heuristic_score": float(heuristic),
                        "agent_score": float(heuristic),
                        "rationale_text": "来自已建立的对标关系，优先纳入相似账号候选池。",
                        "dimensions_json": {"source": "linked_account", "source_overlap": source_overlap, "requirement_overlap": requirement_overlap},
                    }
                )

        for url in _normalize_id_list(request.candidate_urls):
            title = _candidate_title_from_url(url)
            candidate_tokens = _similarity_tokens({"nickname": title, "signature": "", "tags": [], "keywords": []})
            source_overlap = len(source_tokens.intersection(candidate_tokens))
            requirement_overlap = len(extra_requirement_tokens.intersection(candidate_tokens))
            heuristic = 52 + source_overlap * 4 + requirement_overlap * 6
            _append_candidate(
                {
                    "candidate_account_id": "",
                    "candidate_profile_url": url,
                    "candidate_nickname": title,
                    "heuristic_score": float(heuristic),
                    "agent_score": float(heuristic),
                    "rationale_text": "来自手动提供的主页链接，已纳入相似账号候选池。",
                    "dimensions_json": {"source": "manual_url", "source_overlap": source_overlap, "requirement_overlap": requirement_overlap, "search_public_pages": bool(request.search_public_pages)},
                }
            )

        candidates = [
            row for row in _content_source_rows(account["id"], platform, "creator_account")
            if row["id"] != account_row["id"]
        ][: max(5, request.max_candidates * 2)]
        for index, row in enumerate(candidates, start=1):
            payload = _account_payload(row)
            candidate_tokens = _similarity_tokens(payload)
            source_overlap = len(source_tokens.intersection(candidate_tokens))
            requirement_overlap = len(extra_requirement_tokens.intersection(candidate_tokens))
            heuristic = source_overlap * 10 + requirement_overlap * 8 + max(0, 50 - index)
            rationale = f"与源账号同平台，标签/关键词重合 {source_overlap}，适合作为{label}对标候选。"
            _append_candidate(
                {
                    "candidate_account_id": row["id"],
                    "candidate_profile_url": payload.get("profile_url", ""),
                    "candidate_nickname": payload.get("nickname", ""),
                    "heuristic_score": float(heuristic),
                    "agent_score": float(heuristic),
                    "rationale_text": rationale,
                    "dimensions_json": {"source": "local_account", "source_overlap": source_overlap, "requirement_overlap": requirement_overlap},
                }
            )
        ranked_candidates.sort(key=lambda item: item["agent_score"], reverse=True)
        ranked_candidates = ranked_candidates[: request.max_candidates]
        search_id = make_id(f"{platform}_search")
        legacy.db.execute(
            f"INSERT INTO {table_prefix}_similarity_searches (id, user_id, source_account_id, prompt_text, context_json, created_at) VALUES (?, ?, ?, ?, ?, ?)",
            (
                search_id,
                account["id"],
                account_row["id"],
                request.extra_requirements or "",
                _safe_json_dumps(
                    {
                        "source_account": source_payload,
                        "candidate_urls": _normalize_id_list(request.candidate_urls),
                        "seed_linked_accounts": bool(request.seed_linked_accounts),
                        "search_public_pages": bool(request.search_public_pages),
                        "model_profile_id": request.model_profile_id or "",
                    }
                ),
                now(),
            ),
        )
        for idx, item in enumerate(ranked_candidates):
            legacy.db.execute(
                f"""INSERT INTO {table_prefix}_similarity_candidates
                (id, search_id, candidate_account_id, candidate_profile_url, heuristic_score, agent_score, rationale_text, dimensions_json, raw_output_json, rank_index, created_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    make_id(f"{platform}_candidate"),
                    search_id,
                    item.get("candidate_account_id") or None,
                    item.get("candidate_profile_url", ""),
                    item.get("heuristic_score", 0),
                    item.get("agent_score", 0),
                    item.get("rationale_text", ""),
                    _safe_json_dumps(item.get("dimensions_json") or {}),
                    _safe_json_dumps(item),
                    idx,
                    now(),
                ),
            )
        return {"id": search_id, "search_id": search_id}

    @app.get(f"/v2/{platform}/similar-searches/{{search_id}}")
    def get_platform_similarity_search(search_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
        search_row = legacy.db.fetch_one(
            f"SELECT * FROM {table_prefix}_similarity_searches WHERE id = ? AND user_id = ?",
            (search_id, account["id"]),
        )
        if not search_row:
            raise HTTPException(status_code=404, detail="Similarity search not found")
        candidate_rows = legacy.db.fetch_all(
            f"SELECT * FROM {table_prefix}_similarity_candidates WHERE search_id = ? ORDER BY rank_index ASC",
            (search_id,),
        )
        candidates = []
        for row in candidate_rows:
            payload = _parse_json(row.get("raw_output_json") or "{}", {})
            payload.setdefault("candidate_account_id", row.get("candidate_account_id", ""))
            payload.setdefault("candidate_profile_url", row.get("candidate_profile_url", ""))
            payload.setdefault("rationale_text", row.get("rationale_text", ""))
            payload.setdefault("agent_score", row.get("agent_score", 0))
            payload.setdefault("heuristic_score", row.get("heuristic_score", 0))
            candidates.append(payload)
        return {
            "id": search_row["id"],
            "search_id": search_row["id"],
            "source_account_id": search_row["source_account_id"],
            "context": _parse_json(search_row.get("context_json") or "{}", {}),
            "candidates": candidates,
            "created_at": search_row["created_at"],
        }

    @app.get(f"/v2/{platform}/accounts/{{account_id}}/benchmark-links")
    def list_platform_benchmark_links(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]:
        _require_account(account_id, account["id"])
        rows = legacy.db.fetch_all(
            f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC",
            (account_id,),
        )
        return [_relation_payload(row) for row in rows]

    @app.post(f"/v2/{platform}/accounts/{{account_id}}/benchmark-links")
    def create_platform_benchmark_links(
        account_id: str,
        request: PlatformBenchmarkLinksRequest,
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        source_account = _require_account(account_id, account["id"])
        created: list[dict[str, Any]] = []
        for target_account_id in request.target_account_ids:
            target = _require_account(target_account_id, account["id"])
            relation_id = make_id(f"{platform}_link")
            legacy.db.execute(
                f"INSERT INTO {table_prefix}_account_relations (id, user_id, source_account_id, target_account_id, target_profile_url, relation_type, note, search_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                (
                    relation_id,
                    account["id"],
                    source_account["id"],
                    target["id"],
                    target.get("source_url", ""),
                    request.relation_type or "benchmark",
                    request.note or "",
                    request.search_id or "",
                    now(),
                ),
            )
            created.append(_relation_payload(legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_account_relations WHERE id = ?", (relation_id,))))
        for target_profile_url in request.target_profile_urls:
            cleaned = str(target_profile_url or "").strip()
            if not cleaned:
                continue
            relation_id = make_id(f"{platform}_link")
            legacy.db.execute(
                f"INSERT INTO {table_prefix}_account_relations (id, user_id, source_account_id, target_account_id, target_profile_url, relation_type, note, search_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                (
                    relation_id,
                    account["id"],
                    source_account["id"],
                    None,
                    cleaned,
                    request.relation_type or "benchmark",
                    request.note or "",
                    request.search_id or "",
                    now(),
                ),
            )
            created.append(_relation_payload(legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_account_relations WHERE id = ?", (relation_id,))))
        return {"links": created}

    @app.get(f"/v2/{platform}/tracking/accounts")
    def list_platform_tracking_accounts(account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
        cursor = _tracking_cursor(account["id"])
        return {
            "items": _list_tracked_accounts(account["id"]),
            "cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""),
        }

    @app.post(f"/v2/{platform}/tracking/accounts")
    def create_platform_tracking_account(
        request: PlatformTrackingAccountRequest,
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        tracked = _require_account(request.tracked_account_id, account["id"])
        assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, tracked.get("project_id", ""))
        existing = legacy.db.fetch_one(
            f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? AND tracked_account_id = ?",
            (account["id"], tracked["id"]),
        )
        if existing:
            legacy.db.execute(
                f"UPDATE {table_prefix}_tracked_accounts SET assistant_id = ?, note = ?, updated_at = ? WHERE id = ?",
                (((assistant or {}).get("id") or None), request.note or "", now(), existing["id"]),
            )
            row = legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_tracked_accounts WHERE id = ?", (existing["id"],))
        else:
            tracking_id = make_id(f"{platform}_track")
            legacy.db.execute(
                f"INSERT INTO {table_prefix}_tracked_accounts (id, user_id, tracked_account_id, assistant_id, note, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
                (
                    tracking_id,
                    account["id"],
                    tracked["id"],
                    (assistant or {}).get("id") or None,
                    request.note or "",
                    now(),
                    now(),
                ),
            )
            row = legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_tracked_accounts WHERE id = ?", (tracking_id,))
        return _tracked_account_payload(row)

    @app.post(f"/v2/{platform}/tracking/accounts/{{tracked_account_id}}/refresh")
    async def refresh_platform_tracked_account(tracked_account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
        tracked_row = legacy.db.fetch_one(
            f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? AND tracked_account_id = ?",
            (account["id"], tracked_account_id),
        )
        if not tracked_row:
            raise HTTPException(status_code=404, detail="Tracked account not found")
        account_row = _require_account(tracked_account_id, account["id"])
        queued = await _create_sync_job_for_account(account_row, assistant_id=tracked_row.get("assistant_id", "") or "")
        legacy.db.execute(
            f"UPDATE {table_prefix}_tracked_accounts SET updated_at = ? WHERE id = ?",
            (now(), tracked_row["id"]),
        )
        return {"tracking_id": tracked_row["id"], "tracked_account_id": tracked_account_id, "sync_job_id": queued["id"], "status": queued["status"]}

    @app.post(f"/v2/{platform}/tracking/refresh")
    async def refresh_platform_tracking(account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
        tracked_rows = legacy.db.fetch_all(
            f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? ORDER BY updated_at DESC",
            (account["id"],),
        )
        refreshed = 0
        failed = 0
        items: list[dict[str, Any]] = []
        for row in tracked_rows:
            try:
                account_row = _require_account(row["tracked_account_id"], account["id"])
                queued = await _create_sync_job_for_account(account_row, assistant_id=row.get("assistant_id", "") or "")
                refreshed += 1
                items.append({"tracking_id": row["id"], "tracked_account_id": row["tracked_account_id"], "sync_job_id": queued["id"], "status": queued["status"]})
            except Exception as exc:
                failed += 1
                items.append({"tracking_id": row["id"], "tracked_account_id": row["tracked_account_id"], "error": str(exc)})
        return {"refreshed": refreshed, "failed": failed, "items": items}

    @app.post(f"/v2/{platform}/tracking/cursor")
    def update_platform_tracking_cursor(request: PlatformTrackingCursorRequest, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
        cursor = _set_tracking_cursor(account["id"], request.last_seen_at)
        return {"last_seen_at": cursor["last_seen_at"], "updated_at": cursor["updated_at"]}

    @app.get(f"/v2/{platform}/tracking/digest")
    def get_platform_tracking_digest(
        since: str = Query(default=""),
        limit: int = Query(default=24, ge=1, le=100),
        account: dict[str, Any] = Depends(legacy.require_approved),
    ) -> dict[str, Any]:
        return _tracking_digest(account["id"], since_value=(since or "").strip(), limit=limit)