Files
storyforge/collector-service/app/domestic_platform_features.py
kris 65db3cd336
Some checks failed
StoryForge CI / Baseline checks (push) Has been cancelled
StoryForge CI / Backend tests (push) Has been cancelled
StoryForge CI / Web tests (push) Has been cancelled
chore: sync storyforge handoff state
2026-05-02 17:50:21 +08:00

2120 lines
99 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import json
import re
from datetime import datetime, timezone
from typing import Any
from fastapi import Body, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from .douyin_features import _extract_json_blobs_from_html, _fetch_html
class PlatformAnalysisRequest(BaseModel):
model_profile_ids: list[str] = Field(default_factory=list)
linked_account_ids: list[str] = Field(default_factory=list)
include_linked_accounts: bool = True
include_recent_similar_candidates: bool = True
max_videos: int = Field(default=6, ge=1, le=20)
extra_focus: str = ""
temperature: float = 0.35
auto_analyze_top_videos: bool = False
top_video_analysis_count: int = Field(default=4, ge=1, le=10)
class PlatformTopVideoAnalysisRequest(BaseModel):
model_profile_id: str = ""
top_video_count: int = Field(default=5, ge=1, le=12)
min_score: float = 0
temperature: float = 0.25
class PlatformSimilaritySearchRequest(BaseModel):
source_account_id: str = ""
candidate_urls: list[str] = Field(default_factory=list)
seed_linked_accounts: bool = True
search_public_pages: bool = True
model_profile_id: str = ""
max_candidates: int = Field(default=8, ge=1, le=20)
extra_requirements: str = ""
class PlatformBenchmarkLinksRequest(BaseModel):
target_account_ids: list[str] = Field(default_factory=list)
target_profile_urls: list[str] = Field(default_factory=list)
relation_type: str = "benchmark"
note: str = ""
search_id: str = ""
class PlatformTrackingAccountRequest(BaseModel):
tracked_account_id: str
assistant_id: str = ""
note: str = ""
class PlatformTrackingCursorRequest(BaseModel):
last_seen_at: str
class PlatformManualPageCapture(BaseModel):
url: str = ""
title: str = ""
payload: dict[str, Any] = Field(default_factory=dict)
class PlatformAccountSyncRequest(BaseModel):
project_id: str = ""
profile_url: str = ""
handle: str = ""
title: str = ""
session_cookie: str = ""
creator_center_urls: list[str] = Field(default_factory=list)
manual_profile_payload: dict[str, Any] | None = None
manual_creator_pages: list[PlatformManualPageCapture] = Field(default_factory=list)
discovery_note: str = ""
def register_domestic_platform_routes(app: Any, legacy: Any, *, platform: str, label: str) -> None:
table_prefix = platform
def now() -> str:
return legacy.utc_now()
def make_id(prefix: str) -> str:
return legacy.make_id(prefix)
def _safe_json_dumps(value: Any) -> str:
return json.dumps(value or {}, ensure_ascii=False)
def _parse_json(raw: str, fallback: Any) -> Any:
cleaned = str(raw or "").strip()
if not cleaned:
return fallback
try:
value = json.loads(cleaned)
return value
except json.JSONDecodeError:
return fallback
def ensure_schema() -> None:
schema = f"""
CREATE TABLE IF NOT EXISTS {table_prefix}_analysis_reports (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
account_source_id TEXT NOT NULL,
focus_text TEXT NOT NULL DEFAULT '',
prompt_text TEXT NOT NULL DEFAULT '',
context_json TEXT NOT NULL DEFAULT '{{}}',
created_at TEXT NOT NULL,
FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
FOREIGN KEY(account_source_id) REFERENCES content_sources(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_{table_prefix}_analysis_reports_account_created
ON {table_prefix}_analysis_reports(account_source_id, created_at DESC);
CREATE TABLE IF NOT EXISTS {table_prefix}_analysis_suggestions (
id TEXT PRIMARY KEY,
report_id TEXT NOT NULL,
model_profile_id TEXT NOT NULL DEFAULT '',
model_label TEXT NOT NULL DEFAULT '',
status TEXT NOT NULL DEFAULT 'ok',
suggestion_text TEXT NOT NULL DEFAULT '',
parsed_json TEXT NOT NULL DEFAULT '{{}}',
created_at TEXT NOT NULL,
FOREIGN KEY(report_id) REFERENCES {table_prefix}_analysis_reports(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS {table_prefix}_similarity_searches (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
source_account_id TEXT NOT NULL,
prompt_text TEXT NOT NULL DEFAULT '',
context_json TEXT NOT NULL DEFAULT '{{}}',
created_at TEXT NOT NULL,
FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
FOREIGN KEY(source_account_id) REFERENCES content_sources(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS {table_prefix}_similarity_candidates (
id TEXT PRIMARY KEY,
search_id TEXT NOT NULL,
candidate_account_id TEXT,
candidate_profile_url TEXT NOT NULL DEFAULT '',
heuristic_score REAL NOT NULL DEFAULT 0,
agent_score REAL NOT NULL DEFAULT 0,
rationale_text TEXT NOT NULL DEFAULT '',
dimensions_json TEXT NOT NULL DEFAULT '{{}}',
raw_output_json TEXT NOT NULL DEFAULT '{{}}',
rank_index INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL,
FOREIGN KEY(search_id) REFERENCES {table_prefix}_similarity_searches(id) ON DELETE CASCADE,
FOREIGN KEY(candidate_account_id) REFERENCES content_sources(id) ON DELETE SET NULL
);
CREATE INDEX IF NOT EXISTS idx_{table_prefix}_similarity_candidates_search_rank
ON {table_prefix}_similarity_candidates(search_id, rank_index ASC);
CREATE TABLE IF NOT EXISTS {table_prefix}_account_relations (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
source_account_id TEXT NOT NULL,
target_account_id TEXT,
target_profile_url TEXT NOT NULL DEFAULT '',
relation_type TEXT NOT NULL DEFAULT 'benchmark',
note TEXT NOT NULL DEFAULT '',
search_id TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL,
FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
FOREIGN KEY(source_account_id) REFERENCES content_sources(id) ON DELETE CASCADE,
FOREIGN KEY(target_account_id) REFERENCES content_sources(id) ON DELETE SET NULL
);
CREATE INDEX IF NOT EXISTS idx_{table_prefix}_account_relations_source
ON {table_prefix}_account_relations(source_account_id, created_at DESC);
CREATE TABLE IF NOT EXISTS {table_prefix}_tracked_accounts (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
tracked_account_id TEXT NOT NULL,
assistant_id TEXT,
note TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
UNIQUE(user_id, tracked_account_id),
FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
FOREIGN KEY(tracked_account_id) REFERENCES content_sources(id) ON DELETE CASCADE,
FOREIGN KEY(assistant_id) REFERENCES assistants(id) ON DELETE SET NULL
);
CREATE INDEX IF NOT EXISTS idx_{table_prefix}_tracked_accounts_user_updated
ON {table_prefix}_tracked_accounts(user_id, updated_at DESC);
CREATE TABLE IF NOT EXISTS {table_prefix}_tracking_cursors (
user_id TEXT PRIMARY KEY,
last_seen_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE
);
"""
with legacy.db.session() as conn:
conn.executescript(schema)
existing_columns = {row["name"] for row in conn.execute(f"PRAGMA table_info({table_prefix}_analysis_reports)").fetchall()}
if "model_profile_ids_json" not in existing_columns:
conn.execute(
f"ALTER TABLE {table_prefix}_analysis_reports ADD COLUMN model_profile_ids_json TEXT NOT NULL DEFAULT '[]'"
)
if "linked_account_ids_json" not in existing_columns:
conn.execute(
f"ALTER TABLE {table_prefix}_analysis_reports ADD COLUMN linked_account_ids_json TEXT NOT NULL DEFAULT '[]'"
)
ensure_schema()
def _content_source_rows(user_id: str, platform_value: str, kind: str = "") -> list[dict[str, Any]]:
rows = legacy.db.fetch_all(
"SELECT * FROM content_sources WHERE user_id = ? AND platform = ? ORDER BY updated_at DESC, created_at DESC",
(user_id, platform_value),
)
if kind:
rows = [row for row in rows if row.get("source_kind") == kind]
return rows
def _content_source_payload(row: dict[str, Any]) -> dict[str, Any]:
return legacy.content_source_payload(row)
def _source_metadata(row: dict[str, Any]) -> dict[str, Any]:
return _content_source_payload(row).get("metadata", {})
def _first_non_empty(*values: Any) -> str:
for value in values:
if value is None:
continue
if isinstance(value, str):
stripped = value.strip()
if stripped:
return stripped
elif value not in ("", [], {}, ()):
return str(value)
return ""
def _dedupe_strings(values: list[str]) -> list[str]:
result: list[str] = []
seen: set[str] = set()
for value in values:
item = str(value or "").strip()
if not item:
continue
key = item.lower()
if key in seen:
continue
seen.add(key)
result.append(item)
return result
def _compact_text(value: Any, limit: int = 500) -> str:
text = str(value or "").strip()
if len(text) <= limit:
return text
return f"{text[: limit - 1]}"
def _normalize_id_list(values: list[str] | None) -> list[str]:
return _dedupe_strings([str(value or "").strip() for value in values or [] if str(value or "").strip()])
def _parse_count(value: Any) -> float:
if value is None:
return 0.0
if isinstance(value, (int, float)):
return float(value)
text = str(value).strip().lower().replace(",", "")
if not text:
return 0.0
multiplier = 1.0
if text.endswith("w") or text.endswith(""):
multiplier = 10_000.0
text = text[:-1]
elif text.endswith("亿"):
multiplier = 100_000_000.0
text = text[:-1]
match = re.search(r"-?\d+(?:\.\d+)?", text)
if not match:
return 0.0
try:
return float(match.group()) * multiplier
except ValueError:
return 0.0
def _extract_hashtags(*texts: str) -> list[str]:
tags: list[str] = []
for text in texts:
if not text:
continue
tags.extend(match.group(1) for match in re.finditer(r"#([\w\u4e00-\u9fff]+)", text))
return _dedupe_strings(tags)
def _extract_keywords(*texts: str) -> list[str]:
candidates: list[str] = []
for text in texts:
if not text:
continue
candidates.extend(_extract_hashtags(text))
candidates.extend(re.findall(r"[\u4e00-\u9fff]{2,8}", text))
candidates.extend(re.findall(r"[A-Za-z][A-Za-z0-9_]{2,20}", text))
stop_words = {
"视频",
"作品",
"账号",
"内容",
"发布",
"更多",
"关注",
"用户",
"creator",
"profile",
platform,
label,
}
return _dedupe_strings([item for item in candidates if item.lower() not in stop_words])
def _normalize_timestamp(value: Any) -> str:
if value in (None, ""):
return ""
if isinstance(value, (int, float)):
timestamp = float(value)
if timestamp > 1_000_000_000_000:
timestamp /= 1000.0
if timestamp <= 0:
return ""
return datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat()
text = str(value or "").strip()
if not text:
return ""
if re.fullmatch(r"\d{10,13}", text):
return _normalize_timestamp(float(text))
return text
def _flatten_json(value: Any, prefix: str = "") -> list[tuple[str, str, str]]:
rows: list[tuple[str, str, str]] = []
if isinstance(value, dict):
for key, child in value.items():
next_prefix = f"{prefix}.{key}" if prefix else str(key)
rows.extend(_flatten_json(child, next_prefix))
elif isinstance(value, list):
for index, child in enumerate(value):
next_prefix = f"{prefix}[{index}]"
rows.extend(_flatten_json(child, next_prefix))
else:
rows.append((prefix or "$", type(value).__name__, _compact_text(value, 2000)))
return rows
def _walk_json(value: Any) -> list[dict[str, Any]]:
items: list[dict[str, Any]] = []
if isinstance(value, dict):
items.append(value)
for child in value.values():
items.extend(_walk_json(child))
elif isinstance(value, list):
for child in value:
items.extend(_walk_json(child))
return items
def _default_creator_center_urls() -> list[str]:
return {
"kuaishou": [
"https://creator.kuaishou.com/creator/home",
"https://creator.kuaishou.com/creator/content/works",
"https://creator.kuaishou.com/creator/data/overview",
],
"xiaohongshu": [
"https://creator.xiaohongshu.com/publish/publish",
],
"bilibili": [
"https://member.bilibili.com/platform/home",
],
"wechat_video": [
"https://channels.weixin.qq.com/platform",
],
}.get(platform, [])
def _profile_candidate_score(candidate: dict[str, Any]) -> int:
score = 0
interesting_keys = {
"nickname",
"name",
"user_name",
"author_name",
"handle",
"uid",
"user_id",
"kwaiId",
"kwai_id",
"bio",
"description",
"signature",
"follower_count",
"fans_count",
"fans",
"avatar_url",
"head_url",
}
score += sum(1 for key in interesting_keys if key in candidate)
if "profile" in candidate and isinstance(candidate["profile"], dict):
score += 2
return score
def _extract_profile_candidates(payload: Any) -> list[dict[str, Any]]:
candidates: list[dict[str, Any]] = []
for item in _walk_json(payload):
if _profile_candidate_score(item) >= 2:
candidates.append(item)
profile_value = item.get("profile")
if isinstance(profile_value, dict) and _profile_candidate_score(profile_value) >= 2:
candidates.append(profile_value)
return candidates
def _normalize_profile_candidate(
candidate: dict[str, Any],
*,
fallback_url: str = "",
fallback_title: str = "",
fallback_handle: str = "",
) -> dict[str, Any]:
avatar = (
candidate.get("avatar_url")
or candidate.get("avatar")
or candidate.get("head_url")
or candidate.get("headurl")
or candidate.get("user_avatar")
or candidate.get("profile_image_url")
)
if isinstance(avatar, dict):
avatar = _first_non_empty(
avatar.get("url_list", [""])[0] if isinstance(avatar.get("url_list"), list) else "",
avatar.get("url"),
avatar.get("src"),
)
nickname = _first_non_empty(
candidate.get("nickname"),
candidate.get("name"),
candidate.get("user_name"),
candidate.get("author_name"),
candidate.get("display_name"),
fallback_title,
fallback_handle,
)
signature = _first_non_empty(
candidate.get("signature"),
candidate.get("bio"),
candidate.get("description"),
candidate.get("desc"),
candidate.get("intro"),
candidate.get("introduction"),
)
explicit_tags = candidate.get("tags") or candidate.get("content_tags") or candidate.get("keywords") or []
if not isinstance(explicit_tags, list):
explicit_tags = [explicit_tags]
handle = _first_non_empty(
candidate.get("handle"),
candidate.get("user_id"),
candidate.get("uid"),
candidate.get("kwai_id"),
candidate.get("kwaiId"),
candidate.get("short_id"),
fallback_handle,
)
return {
"nickname": nickname,
"signature": signature,
"avatar_url": _first_non_empty(avatar),
"profile_url": _first_non_empty(candidate.get("profile_url"), candidate.get("share_url"), fallback_url),
"handle": handle,
"stats": {
"followers": _parse_count(
candidate.get("follower_count")
or candidate.get("fans_count")
or candidate.get("fans")
or candidate.get("followers")
),
"likes": _parse_count(
candidate.get("like_count")
or candidate.get("liked_count")
or candidate.get("total_like")
),
"plays": _parse_count(
candidate.get("play_count")
or candidate.get("view_count")
or candidate.get("views")
),
"videos": _parse_count(
candidate.get("video_count")
or candidate.get("works_count")
or candidate.get("published_count")
or candidate.get("aweme_count")
),
},
"tags": _dedupe_strings(
[str(item) for item in explicit_tags if isinstance(item, (str, int, float))]
+ _extract_hashtags(signature, nickname)
),
"keywords": _extract_keywords(nickname, signature, handle),
}
def _video_candidate_score(candidate: dict[str, Any]) -> int:
score = 0
score += 2 if _first_non_empty(candidate.get("title"), candidate.get("name"), candidate.get("caption")) else 0
score += 2 if _first_non_empty(candidate.get("share_url"), candidate.get("video_url"), candidate.get("play_url"), candidate.get("url")) else 0
score += 2 if _first_non_empty(candidate.get("video_id"), candidate.get("aweme_id"), candidate.get("item_id"), candidate.get("note_id"), candidate.get("works_id")) else 0
score += 1 if _first_non_empty(candidate.get("description"), candidate.get("desc"), candidate.get("summary"), candidate.get("text")) else 0
score += 1 if _first_non_empty(candidate.get("cover_url"), candidate.get("published_at"), candidate.get("publish_time"), candidate.get("create_time")) else 0
score += 1 if _first_non_empty(candidate.get("duration_sec"), candidate.get("duration"), candidate.get("play_count"), candidate.get("view_count")) else 0
stats_value = candidate.get("stats") or candidate.get("statistics")
score += 1 if isinstance(stats_value, dict) else 0
return score
def _extract_video_candidates(payload: Any) -> list[dict[str, Any]]:
candidates: list[dict[str, Any]] = []
for item in _walk_json(payload):
if _video_candidate_score(item) >= 4:
candidates.append(item)
return candidates
def _video_metric_bundle(candidate: dict[str, Any]) -> dict[str, float]:
stats_source = candidate.get("stats") if isinstance(candidate.get("stats"), dict) else {}
if not stats_source and isinstance(candidate.get("statistics"), dict):
stats_source = candidate.get("statistics")
return {
"play": _parse_count(candidate.get("play_count") or candidate.get("view_count") or stats_source.get("play_count") or stats_source.get("view_count")),
"like": _parse_count(candidate.get("like_count") or candidate.get("digg_count") or stats_source.get("like_count") or stats_source.get("digg_count")),
"comment": _parse_count(candidate.get("comment_count") or stats_source.get("comment_count")),
"share": _parse_count(candidate.get("share_count") or stats_source.get("share_count")),
}
def _heuristic_video_performance_score(stats: dict[str, Any]) -> float:
play = _parse_count(stats.get("play") or stats.get("play_count"))
like = _parse_count(stats.get("like") or stats.get("like_count"))
comment = _parse_count(stats.get("comment") or stats.get("comment_count"))
share = _parse_count(stats.get("share") or stats.get("share_count"))
if play <= 0 and like <= 0 and comment <= 0 and share <= 0:
return 0.0
score = (play / 5000.0) + (like / 100.0) + (comment / 20.0) + (share / 10.0)
return round(min(100.0, score), 1)
def _normalize_video_candidate(candidate: dict[str, Any]) -> dict[str, Any]:
cover = candidate.get("cover_url") or candidate.get("cover") or candidate.get("poster") or candidate.get("image") or candidate.get("thumbnail")
if isinstance(cover, dict):
cover = _first_non_empty(
cover.get("url"),
cover.get("src"),
cover.get("url_list", [""])[0] if isinstance(cover.get("url_list"), list) and cover.get("url_list") else "",
)
tags = candidate.get("tags") or candidate.get("content_tags") or candidate.get("keywords") or []
if not isinstance(tags, list):
tags = [tags]
stats = _video_metric_bundle(candidate)
title = _first_non_empty(candidate.get("title"), candidate.get("name"), candidate.get("caption"), candidate.get("desc"))
description = _first_non_empty(candidate.get("description"), candidate.get("desc"), candidate.get("summary"), candidate.get("text"), title)
return {
"external_id": _first_non_empty(candidate.get("video_id"), candidate.get("aweme_id"), candidate.get("item_id"), candidate.get("note_id"), candidate.get("works_id")),
"title": title or "未命名作品",
"description": description,
"share_url": _first_non_empty(candidate.get("share_url"), candidate.get("video_url"), candidate.get("play_url"), candidate.get("url")),
"cover_url": _first_non_empty(cover),
"duration_sec": float(candidate.get("duration_sec") or candidate.get("duration") or 0),
"published_at": _normalize_timestamp(candidate.get("published_at") or candidate.get("publish_time") or candidate.get("create_time")),
"tags": _dedupe_strings([str(item) for item in tags if isinstance(item, (str, int, float))]),
"stats": stats,
"performance_score": _heuristic_video_performance_score(stats),
"raw": candidate,
}
def _extract_videos(payloads: list[Any]) -> list[dict[str, Any]]:
videos: list[dict[str, Any]] = []
seen: set[str] = set()
for payload in payloads:
for candidate in _extract_video_candidates(payload):
normalized = _normalize_video_candidate(candidate)
dedupe_key = normalized["share_url"] or normalized["external_id"] or normalized["title"]
if not dedupe_key or dedupe_key in seen:
continue
seen.add(dedupe_key)
videos.append(normalized)
videos.sort(
key=lambda item: (float(item.get("performance_score") or 0), str(item.get("published_at") or "")),
reverse=True,
)
return videos
def _pick_best_profile(
payloads: list[Any],
*,
fallback_url: str = "",
fallback_title: str = "",
fallback_handle: str = "",
) -> dict[str, Any]:
candidates: list[dict[str, Any]] = []
for payload in payloads:
candidates.extend(_extract_profile_candidates(payload))
if not candidates and payloads and isinstance(payloads[0], dict):
candidates = [payloads[0]]
best = _normalize_profile_candidate(
{},
fallback_url=fallback_url,
fallback_title=fallback_title,
fallback_handle=fallback_handle,
)
best_score = -1
for candidate in candidates:
normalized = _normalize_profile_candidate(
candidate,
fallback_url=fallback_url,
fallback_title=fallback_title,
fallback_handle=fallback_handle,
)
score = 0
score += 3 if normalized["nickname"] else 0
score += 2 if normalized["signature"] else 0
score += 2 if normalized["handle"] else 0
score += 1 if normalized["stats"]["followers"] else 0
score += 1 if normalized["avatar_url"] else 0
if score > best_score:
best = normalized
best_score = score
return best
def _fields_payload(payload: Any) -> tuple[list[dict[str, Any]], int]:
flattened = _flatten_json(payload)
return (
[
{
"field_path": field_path,
"field_type": field_type,
"field_value_text": field_value_text,
}
for field_path, field_type, field_value_text in flattened[:300]
],
len(flattened),
)
def _snapshot_summary_from_payload(payload: Any) -> dict[str, Any]:
fields, field_count = _fields_payload(payload)
summary: dict[str, Any] = {}
for item in fields[:8]:
summary[item["field_path"]] = item["field_value_text"]
if field_count and "field_count" not in summary:
summary["field_count"] = field_count
return summary
def _build_snapshot(snapshot_type: str, source_url: str, payload: Any) -> dict[str, Any]:
fields, field_count = _fields_payload(payload)
collected_at = now()
return {
"id": make_id(f"{platform}_snapshot"),
"snapshot_type": snapshot_type,
"source_url": source_url,
"field_count": field_count,
"collected_at": collected_at,
"summary": _snapshot_summary_from_payload(payload),
"raw_payload": payload,
"fields": fields,
}
def _creator_center_state(metadata: dict[str, Any]) -> dict[str, Any]:
state = metadata.get("creator_center")
return state if isinstance(state, dict) else {}
def _list_snapshots(account_row: dict[str, Any]) -> list[dict[str, Any]]:
metadata = _source_metadata(account_row)
snapshots = _creator_center_state(metadata).get("snapshots") or []
if not isinstance(snapshots, list):
return []
normalized = [item for item in snapshots if isinstance(item, dict)]
normalized.sort(key=lambda item: str(item.get("collected_at") or ""), reverse=True)
return normalized
def _snapshot_brief(snapshot: dict[str, Any]) -> dict[str, Any]:
return {
"id": snapshot.get("id", ""),
"snapshot_type": snapshot.get("snapshot_type", ""),
"source_url": snapshot.get("source_url", ""),
"field_count": snapshot.get("field_count", 0),
"collected_at": snapshot.get("collected_at", ""),
"summary": snapshot.get("summary") or {},
}
def _latest_snapshot(account_row: dict[str, Any], snapshot_type: str) -> dict[str, Any] | None:
return next((item for item in _list_snapshots(account_row) if item.get("snapshot_type") == snapshot_type), None)
def _snapshot_detail(account_row: dict[str, Any], snapshot_id: str) -> dict[str, Any]:
snapshot = next((item for item in _list_snapshots(account_row) if item.get("id") == snapshot_id), None)
if not snapshot:
raise HTTPException(status_code=404, detail="Snapshot not found")
return {
"id": snapshot.get("id", ""),
"snapshot_type": snapshot.get("snapshot_type", ""),
"source_url": snapshot.get("source_url", ""),
"field_count": snapshot.get("field_count", 0),
"collected_at": snapshot.get("collected_at", ""),
"summary": snapshot.get("summary") or {},
"raw_payload": snapshot.get("raw_payload") or {},
"fields": snapshot.get("fields") or [],
}
async def _collect_public_profile(
source_url: str,
manual_payload: dict[str, Any] | None,
*,
fallback_title: str = "",
fallback_handle: str = "",
) -> dict[str, Any]:
payloads: list[Any] = []
errors: list[str] = []
resolved_url = source_url.strip()
if manual_payload:
payloads.append(manual_payload)
elif resolved_url:
try:
final_url, html = await _fetch_html(resolved_url)
resolved_url = final_url
blobs = _extract_json_blobs_from_html(html)
payloads.extend([item["payload"] for item in blobs if isinstance(item.get("payload"), (dict, list))])
except Exception as exc:
errors.append(f"public_profile_fetch_failed: {exc}")
profile = _pick_best_profile(
payloads,
fallback_url=resolved_url,
fallback_title=fallback_title,
fallback_handle=fallback_handle,
)
raw_payload: Any = {}
if payloads:
raw_payload = payloads[0] if len(payloads) == 1 else {"items": payloads[:8]}
elif resolved_url:
raw_payload = {
"profile_url": resolved_url,
"title": fallback_title,
"handle": fallback_handle,
}
return {
"profile": profile,
"payload": raw_payload,
"errors": errors,
"source_url": resolved_url,
}
async def _collect_creator_center_pages(
urls: list[str],
cookie: str,
manual_pages: list[PlatformManualPageCapture],
*,
fallback_title: str = "",
fallback_handle: str = "",
) -> dict[str, Any]:
pages: list[dict[str, Any]] = []
errors: list[str] = []
payloads: list[Any] = []
for page in manual_pages:
payload = page.payload if isinstance(page.payload, dict) else {}
pages.append({"url": page.url, "title": page.title, "payload": payload})
payloads.append(payload)
candidate_urls = _dedupe_strings(urls or _default_creator_center_urls())
if cookie.strip():
for url in candidate_urls:
try:
final_url, html = await _fetch_html(url, cookie=cookie)
blobs = _extract_json_blobs_from_html(html)
extracted_payloads = [item["payload"] for item in blobs if isinstance(item.get("payload"), (dict, list))]
payload = extracted_payloads[0] if len(extracted_payloads) == 1 else {"items": extracted_payloads[:8]}
if not extracted_payloads and html.strip():
payload = {"html_excerpt": _compact_text(html, 1600)}
pages.append({"url": final_url, "title": "", "payload": payload})
if extracted_payloads:
payloads.extend(extracted_payloads)
elif payload:
payloads.append(payload)
except Exception as exc:
errors.append(f"creator_center_fetch_failed[{url}]: {exc}")
profile = _pick_best_profile(
payloads,
fallback_title=fallback_title,
fallback_handle=fallback_handle,
)
aggregated_payload = {
"pages": pages,
"page_count": len(pages),
} if pages else {}
return {
"profile": profile,
"payload": aggregated_payload,
"pages": pages,
"errors": errors,
}
def _resolve_project_id(requested_project_id: str, owner_id: str, existing: dict[str, Any] | None) -> str:
if requested_project_id.strip():
project_row = legacy.db.fetch_one(
"SELECT * FROM projects WHERE id = ? AND user_id = ? LIMIT 1",
(requested_project_id.strip(), owner_id),
)
if not project_row:
raise HTTPException(status_code=400, detail="归属项目不存在或不属于当前账号")
return project_row["id"]
if existing and str(existing.get("project_id") or "").strip():
return str(existing.get("project_id") or "")
project_row = legacy.db.fetch_one(
"SELECT * FROM projects WHERE user_id = ? ORDER BY updated_at DESC LIMIT 1",
(owner_id,),
)
if project_row:
return project_row["id"]
raise HTTPException(status_code=400, detail="请先创建或选择归属项目")
def _find_existing_account(user_id: str, profile_url: str, handle: str) -> dict[str, Any] | None:
rows = _content_source_rows(user_id, platform, "creator_account")
normalized_url = profile_url.strip()
normalized_handle = handle.strip().lower()
if normalized_url:
for row in rows:
if str(row.get("source_url") or "").strip() == normalized_url:
return row
if normalized_handle:
for row in rows:
if str(row.get("handle") or "").strip().lower() == normalized_handle:
return row
return None
def _upsert_account_source(
owner: dict[str, Any],
sync_request: PlatformAccountSyncRequest,
public_data: dict[str, Any],
creator_data: dict[str, Any],
) -> dict[str, Any]:
existing = _find_existing_account(owner["id"], sync_request.profile_url, sync_request.handle)
project_id = _resolve_project_id(sync_request.project_id, owner["id"], existing)
existing_metadata = _source_metadata(existing) if existing else {}
existing_state = _creator_center_state(existing_metadata)
existing_snapshots = existing_state.get("snapshots") or []
existing_snapshots = [item for item in existing_snapshots if isinstance(item, dict)]
public_profile = public_data.get("profile") or {}
creator_profile = creator_data.get("profile") or {}
resolved_profile = {
"nickname": _first_non_empty(public_profile.get("nickname"), creator_profile.get("nickname"), sync_request.title, sync_request.handle),
"signature": _first_non_empty(public_profile.get("signature"), creator_profile.get("signature"), existing_metadata.get("bio"), existing_metadata.get("description")),
"avatar_url": _first_non_empty(public_profile.get("avatar_url"), creator_profile.get("avatar_url"), existing_metadata.get("avatar_url")),
"profile_url": _first_non_empty(public_data.get("source_url"), sync_request.profile_url, public_profile.get("profile_url"), creator_profile.get("profile_url"), existing.get("source_url") if existing else ""),
"handle": _first_non_empty(sync_request.handle, public_profile.get("handle"), creator_profile.get("handle"), existing.get("handle") if existing else ""),
"stats": {
"followers": public_profile.get("stats", {}).get("followers") or creator_profile.get("stats", {}).get("followers") or 0,
"likes": public_profile.get("stats", {}).get("likes") or creator_profile.get("stats", {}).get("likes") or 0,
"plays": public_profile.get("stats", {}).get("plays") or creator_profile.get("stats", {}).get("plays") or 0,
"videos": public_profile.get("stats", {}).get("videos") or creator_profile.get("stats", {}).get("videos") or 0,
},
"tags": _dedupe_strings(
(existing_metadata.get("tags") or [])
+ (public_profile.get("tags") or [])
+ (creator_profile.get("tags") or [])
+ _extract_keywords(
public_profile.get("nickname") or "",
creator_profile.get("nickname") or "",
public_profile.get("signature") or "",
creator_profile.get("signature") or "",
)
),
"keywords": _dedupe_strings(
(existing_metadata.get("keywords") or [])
+ (public_profile.get("keywords") or [])
+ (creator_profile.get("keywords") or [])
+ _extract_keywords(
public_profile.get("nickname") or "",
creator_profile.get("nickname") or "",
public_profile.get("signature") or "",
creator_profile.get("signature") or "",
)
),
}
new_snapshots: list[dict[str, Any]] = []
if public_data.get("payload") not in ({}, None, "") or resolved_profile["profile_url"]:
public_payload = public_data.get("payload") or {
"profile_url": resolved_profile["profile_url"],
"nickname": resolved_profile["nickname"],
"handle": resolved_profile["handle"],
"signature": resolved_profile["signature"],
"avatar_url": resolved_profile["avatar_url"],
"stats": resolved_profile["stats"],
}
new_snapshots.append(_build_snapshot("public_profile", resolved_profile["profile_url"], public_payload))
if creator_data.get("payload"):
creator_url = creator_data.get("pages", [{}])[0].get("url", "") if creator_data.get("pages") else ""
new_snapshots.append(_build_snapshot("creator_center", creator_url, creator_data["payload"]))
merged_snapshots = [*new_snapshots, *existing_snapshots][:12]
latest_public_snapshot = next((item for item in merged_snapshots if item.get("snapshot_type") == "public_profile"), None)
latest_creator_snapshot = next((item for item in merged_snapshots if item.get("snapshot_type") == "creator_center"), None)
errors = [*public_data.get("errors", []), *creator_data.get("errors", [])]
metadata = {
**existing_metadata,
"bio": resolved_profile["signature"],
"description": resolved_profile["signature"],
"avatar_url": resolved_profile["avatar_url"],
"tags": resolved_profile["tags"],
"keywords": resolved_profile["keywords"],
"profile_stats": resolved_profile["stats"],
"last_sync_error": "".join([str(item) for item in errors if str(item).strip()]),
"source_mode": "creator_center" if latest_creator_snapshot else "public",
"last_public_sync_at": latest_public_snapshot.get("collected_at", "") if latest_public_snapshot else existing_metadata.get("last_public_sync_at", ""),
"last_creator_sync_at": latest_creator_snapshot.get("collected_at", "") if latest_creator_snapshot else existing_metadata.get("last_creator_sync_at", ""),
"discovery_note": sync_request.discovery_note or existing_metadata.get("discovery_note", ""),
"creator_center": {
"snapshots": merged_snapshots,
"sync_errors": errors,
"creator_center_urls": _dedupe_strings(sync_request.creator_center_urls or _default_creator_center_urls()),
"last_synced_at": now(),
},
}
account_id = existing["id"] if existing else make_id(f"{platform}_acct")
created_at = existing["created_at"] if existing else now()
title = _first_non_empty(sync_request.title, resolved_profile["nickname"], existing.get("title") if existing else "", resolved_profile["handle"], label)
handle = _first_non_empty(sync_request.handle, resolved_profile["handle"], existing.get("handle") if existing else "")
source_url = resolved_profile["profile_url"]
if existing:
legacy.db.execute(
"""
UPDATE content_sources
SET project_id = ?, handle = ?, source_url = ?, title = ?, metadata_json = ?, updated_at = ?
WHERE id = ?
""",
(
project_id,
handle,
source_url,
title,
_safe_json_dumps(metadata),
now(),
account_id,
),
)
else:
legacy.db.execute(
"""
INSERT INTO content_sources (
id, user_id, project_id, source_kind, platform, handle, source_url, title, local_path,
metadata_json, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
account_id,
owner["id"],
project_id,
"creator_account",
platform,
handle,
source_url,
title,
"",
_safe_json_dumps(metadata),
created_at,
now(),
),
)
row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (account_id,))
assert row is not None
return row
def _upsert_account_videos(account_row: dict[str, Any], videos: list[dict[str, Any]]) -> list[dict[str, Any]]:
if not videos:
return []
project_id = account_row.get("project_id", "")
source_account_url = str(account_row.get("source_url") or "").strip()
existing_rows = _linked_video_sources(account_row)
existing_by_url: dict[str, dict[str, Any]] = {}
existing_by_external_id: dict[str, dict[str, Any]] = {}
for row in existing_rows:
metadata = _source_metadata(row)
source_url = str(row.get("source_url") or "").strip()
external_id = str(metadata.get("external_id") or "").strip()
if source_url:
existing_by_url[source_url] = row
if external_id:
existing_by_external_id[external_id] = row
saved_rows: list[dict[str, Any]] = []
for index, video in enumerate(videos, start=1):
share_url = str(video.get("share_url") or "").strip()
external_id = str(video.get("external_id") or "").strip()
if not share_url and not external_id:
continue
metadata = {
"summary": video.get("description") or "",
"description": video.get("description") or "",
"cover_url": video.get("cover_url") or "",
"published_at": video.get("published_at") or "",
"tags": video.get("tags") or [],
"content_type": "video",
"duration_sec": float(video.get("duration_sec") or 0),
"external_id": external_id,
"origin_content_source_id": account_row["id"],
"source_account_url": source_account_url,
"stats": video.get("stats") or {},
"performance_score": float(video.get("performance_score") or 0),
"raw_payload": video.get("raw") or {},
}
existing = (existing_by_url.get(share_url) if share_url else None) or (existing_by_external_id.get(external_id) if external_id else None)
if existing:
merged_metadata = {
**_source_metadata(existing),
**metadata,
}
legacy.db.execute(
"""
UPDATE content_sources
SET source_url = ?, title = ?, metadata_json = ?, updated_at = ?
WHERE id = ?
""",
(
share_url or str(existing.get("source_url") or ""),
video.get("title") or existing.get("title") or f"{label} 作品 {index}",
_safe_json_dumps(merged_metadata),
now(),
existing["id"],
),
)
row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (existing["id"],))
else:
row_id = make_id(f"{platform}_video")
created_at = now()
legacy.db.execute(
"""
INSERT INTO content_sources (
id, user_id, project_id, source_kind, platform, handle, source_url, title, local_path,
metadata_json, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
row_id,
account_row["user_id"],
project_id,
"video_link",
platform,
account_row.get("handle", ""),
share_url,
video.get("title") or f"{label} 作品 {index}",
"",
_safe_json_dumps(metadata),
created_at,
created_at,
),
)
row = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row_id,))
if row:
saved_rows.append(row)
if share_url:
existing_by_url[share_url] = row
if external_id:
existing_by_external_id[external_id] = row
return saved_rows
def _require_account(account_id: str, user_id: str) -> dict[str, Any]:
row = legacy.db.fetch_one(
"SELECT * FROM content_sources WHERE id = ? AND user_id = ? AND source_kind = 'creator_account' AND platform = ?",
(account_id, user_id, platform),
)
if not row:
raise HTTPException(status_code=404, detail=f"{label} account not found")
return row
def _linked_video_sources(account_row: dict[str, Any]) -> list[dict[str, Any]]:
project_id = account_row.get("project_id", "")
rows = legacy.db.fetch_all(
"SELECT * FROM content_sources WHERE user_id = ? AND project_id = ? AND source_kind = 'video_link' AND platform = ? ORDER BY updated_at DESC, created_at DESC",
(account_row["user_id"], project_id, platform),
)
account_id = account_row["id"]
source_url = str(account_row.get("source_url") or "").strip()
linked: list[dict[str, Any]] = []
for row in rows:
metadata = _source_metadata(row)
if metadata.get("origin_content_source_id") == account_id or metadata.get("source_account_url") == source_url:
linked.append(row)
return linked
def _jobs_for_source(source_id: str) -> list[dict[str, Any]]:
return legacy.db.fetch_all(
"SELECT * FROM jobs WHERE content_source_id = ? ORDER BY created_at DESC",
(source_id,),
)
def _latest_job_for_source(source_id: str) -> dict[str, Any] | None:
return legacy.db.fetch_one(
"SELECT * FROM jobs WHERE content_source_id = ? ORDER BY created_at DESC LIMIT 1",
(source_id,),
)
def _extract_performance_score(job_row: dict[str, Any] | None) -> float:
if not job_row:
return 0.0
result_map = _parse_json(job_row.get("result_json") or "{}", {})
artifacts_map = _parse_json(job_row.get("artifacts_json") or "{}", {})
candidates = [
result_map.get("performance_score"),
(result_map.get("analysis") or {}).get("performance_score"),
(result_map.get("scores") or {}).get("performance_score"),
artifacts_map.get("performance_score"),
(artifacts_map.get("scores") or {}).get("performance_score"),
]
for value in candidates:
try:
return float(value)
except (TypeError, ValueError):
continue
return 0.0
def _extract_metrics(job_row: dict[str, Any] | None) -> dict[str, Any]:
if not job_row:
return {}
result_map = _parse_json(job_row.get("result_json") or "{}", {})
artifacts_map = _parse_json(job_row.get("artifacts_json") or "{}", {})
return (
result_map.get("metrics")
or artifacts_map.get("metrics")
or result_map.get("stats")
or artifacts_map.get("stats")
or {}
)
def _video_payload(source_row: dict[str, Any]) -> dict[str, Any]:
payload = _content_source_payload(source_row)
metadata = payload.get("metadata", {})
latest_job = _latest_job_for_source(source_row["id"])
metrics = _extract_metrics(latest_job)
metadata_stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {}
tags = metadata.get("tags") or []
if not isinstance(tags, list):
tags = []
resolved_stats = {
"play": metrics.get("play_count") or metrics.get("play") or metadata_stats.get("play_count") or metadata_stats.get("play") or 0,
"like": metrics.get("like_count") or metrics.get("like") or metadata_stats.get("like_count") or metadata_stats.get("like") or 0,
"comment": metrics.get("comment_count") or metrics.get("comment") or metadata_stats.get("comment_count") or metadata_stats.get("comment") or 0,
"share": metrics.get("share_count") or metrics.get("share") or metadata_stats.get("share_count") or metadata_stats.get("share") or 0,
}
performance_score = _extract_performance_score(latest_job)
if performance_score <= 0:
performance_score = _parse_count(metadata.get("performance_score")) or _heuristic_video_performance_score(resolved_stats)
return {
"id": source_row["id"],
"aweme_id": str(metadata.get("external_id") or source_row["id"]),
"title": payload.get("title") or "未命名作品",
"description": metadata.get("summary") or metadata.get("description") or (latest_job or {}).get("style_summary", ""),
"share_url": payload.get("source_url", ""),
"cover_url": metadata.get("cover_url") or "",
"duration_sec": float(metadata.get("duration_sec") or 0),
"published_at": metadata.get("published_at") or source_row.get("created_at"),
"tags": tags,
"content_type": metadata.get("content_type") or "video",
"stats": resolved_stats,
"score": {
"performance_score": performance_score,
},
"source": payload,
"latest_job_id": (latest_job or {}).get("id", ""),
}
def _account_payload(account_row: dict[str, Any]) -> dict[str, Any]:
payload = _content_source_payload(account_row)
metadata = payload.get("metadata", {})
videos = [_video_payload(item) for item in _linked_video_sources(account_row)]
play_values = [float(video["stats"].get("play") or 0) for video in videos if float(video["stats"].get("play") or 0) > 0]
like_values = [float(video["stats"].get("like") or 0) for video in videos if float(video["stats"].get("like") or 0) > 0]
tags = metadata.get("tags") or []
if not isinstance(tags, list):
tags = []
return {
"id": account_row["id"],
"platform": platform,
"profile_url": payload.get("source_url", ""),
"canonical_profile_url": payload.get("source_url", ""),
"handle": payload.get("handle", ""),
"nickname": payload.get("title") or payload.get("handle") or "未命名账号",
"signature": metadata.get("bio") or metadata.get("description") or "",
"avatar_url": metadata.get("avatar_url") or "",
"tags": tags,
"keywords": metadata.get("keywords") or [],
"profile_stats": metadata.get("profile_stats") or {},
"sync_status": "ready" if payload.get("metadata", {}).get("last_sync_error", "") == "" else "partial",
"video_summary": {
"count": len(videos),
"avg_play": sum(play_values) / len(play_values) if play_values else 0,
"avg_like": sum(like_values) / len(like_values) if like_values else 0,
"videos": videos[:8],
},
"project_id": payload.get("project_id", ""),
"created_at": payload.get("created_at", ""),
"updated_at": payload.get("updated_at", ""),
}
def _relation_payload(row: dict[str, Any]) -> dict[str, Any]:
target = None
if row.get("target_account_id"):
target = legacy.db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["target_account_id"],))
return {
"id": row["id"],
"source_account_id": row["source_account_id"],
"target_account_id": row.get("target_account_id", "") or "",
"target_profile_url": row.get("target_profile_url", ""),
"target_nickname": (_account_payload(target)["nickname"] if target else ""),
"relation_type": row.get("relation_type", "benchmark"),
"note": row.get("note", ""),
"search_id": row.get("search_id", ""),
"created_at": row["created_at"],
}
def _model_profile_payload(row: dict[str, Any]) -> dict[str, Any]:
return {
"id": row["id"],
"name": row["name"],
"model_name": row["model_name"],
"base_url": row["base_url"],
"is_default": bool(row.get("is_default", 0)),
}
def _report_payload(row: dict[str, Any]) -> dict[str, Any]:
suggestions = [
{
"id": suggestion["id"],
"status": suggestion.get("status", "ok"),
"model_profile_id": suggestion.get("model_profile_id", ""),
"model_label": suggestion.get("model_label", ""),
"suggestion_text": suggestion.get("suggestion_text", ""),
"parsed_json": _parse_json(suggestion.get("parsed_json") or "{}", {}),
"created_at": suggestion.get("created_at", ""),
}
for suggestion in legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_analysis_suggestions WHERE report_id = ? ORDER BY created_at ASC",
(row["id"],),
)
]
return {
"id": row["id"],
"focus_text": row.get("focus_text", ""),
"model_profile_ids": _parse_json(row.get("model_profile_ids_json") or "[]", []),
"linked_account_ids": _parse_json(row.get("linked_account_ids_json") or "[]", []),
"suggestions": suggestions,
"created_at": row["created_at"],
}
def _workspace_payload(account_row: dict[str, Any]) -> dict[str, Any]:
reports = legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_analysis_reports WHERE account_source_id = ? ORDER BY created_at DESC LIMIT 6",
(account_row["id"],),
)
relations = legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC",
(account_row["id"],),
)
recent_searches = legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_similarity_searches WHERE source_account_id = ? ORDER BY created_at DESC LIMIT 5",
(account_row["id"],),
)
latest_public_snapshot = _latest_snapshot(account_row, "public_profile")
latest_creator_snapshot = _latest_snapshot(account_row, "creator_center")
return {
"account": _account_payload(account_row),
"latest_public_snapshot": _snapshot_brief(latest_public_snapshot) if latest_public_snapshot else None,
"latest_creator_snapshot": _snapshot_brief(latest_creator_snapshot) if latest_creator_snapshot else None,
"recent_reports": [_report_payload(row) for row in reports],
"linked_accounts": [_relation_payload(row) for row in relations],
"recent_similarity_searches": [
{
"id": row["id"],
"prompt_text": row.get("prompt_text", ""),
"context": _parse_json(row.get("context_json") or "{}", {}),
"created_at": row["created_at"],
}
for row in recent_searches
],
"available_model_profiles": [_model_profile_payload(row) for row in legacy.db.fetch_all(
"""
SELECT *
FROM model_profiles
WHERE owner_account_id IS NULL OR owner_account_id = ?
ORDER BY is_default DESC, created_at ASC
""",
(account_row["user_id"],),
)],
}
async def _call_reasoning_model(user_id: str, prompt: str, *, system_prompt: str, model_profile_id: str = "", temperature: float = 0.3) -> tuple[str, dict[str, Any]]:
profile = legacy.model_profile_for_account(user_id, model_profile_id or None)
output = await legacy.call_model(profile, system_prompt=system_prompt, user_prompt=prompt, temperature=temperature)
parsed = legacy.parse_json_object(output)
return output, parsed if isinstance(parsed, dict) else {}
def _resolve_model_profiles(user_id: str, requested_ids: list[str] | None) -> list[dict[str, Any]]:
normalized_ids = _normalize_id_list(requested_ids)
profiles: list[dict[str, Any]] = []
seen: set[str] = set()
for profile_id in normalized_ids:
try:
profile = legacy.model_profile_for_account(user_id, profile_id)
except Exception:
continue
resolved_id = str(profile.get("id") or "").strip()
if not resolved_id or resolved_id in seen:
continue
seen.add(resolved_id)
profiles.append(profile)
if profiles:
return profiles
fallback = legacy.model_profile_for_account(user_id, None)
fallback_id = str(fallback.get("id") or "").strip()
return [fallback] if fallback_id else []
def _linked_account_context(
account_row: dict[str, Any],
requested_linked_account_ids: list[str] | None,
*,
include_linked_accounts: bool,
limit: int = 5,
) -> list[dict[str, Any]]:
normalized_requested = set(_normalize_id_list(requested_linked_account_ids))
relations = legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC",
(account_row["id"],),
)
items: list[dict[str, Any]] = []
for relation in relations:
target_account_id = str(relation.get("target_account_id") or "").strip()
if normalized_requested and target_account_id and target_account_id not in normalized_requested:
continue
if not include_linked_accounts and not (normalized_requested and target_account_id in normalized_requested):
continue
target_account = _require_account(target_account_id, account_row["user_id"]) if target_account_id else None
items.append(
{
"relation_id": relation["id"],
"relation_type": relation.get("relation_type", "benchmark"),
"note": relation.get("note", ""),
"target_account_id": target_account_id,
"target_profile_url": relation.get("target_profile_url", ""),
"target_nickname": _account_payload(target_account).get("nickname", "") if target_account else "",
"account": _account_payload(target_account) if target_account else None,
}
)
return items[:limit]
def _recent_similarity_candidates_context(account_row: dict[str, Any], limit: int = 6) -> list[dict[str, Any]]:
candidate_rows = legacy.db.fetch_all(
f"""
SELECT candidates.*
FROM {table_prefix}_similarity_candidates AS candidates
JOIN {table_prefix}_similarity_searches AS searches
ON searches.id = candidates.search_id
WHERE searches.source_account_id = ?
ORDER BY searches.created_at DESC, candidates.rank_index ASC
LIMIT ?
""",
(account_row["id"], limit),
)
items: list[dict[str, Any]] = []
for row in candidate_rows:
payload = _parse_json(row.get("raw_output_json") or "{}", {})
payload.setdefault("candidate_account_id", row.get("candidate_account_id", "") or "")
payload.setdefault("candidate_profile_url", row.get("candidate_profile_url", ""))
payload.setdefault("candidate_nickname", payload.get("candidate_nickname", ""))
payload.setdefault("rationale_text", row.get("rationale_text", ""))
payload.setdefault("heuristic_score", row.get("heuristic_score", 0))
payload.setdefault("agent_score", row.get("agent_score", 0))
items.append(payload)
return items
async def _analyze_top_videos_for_account(
account_row: dict[str, Any],
user_id: str,
*,
model_profile_id: str = "",
top_video_count: int = 5,
min_score: float = 0,
temperature: float = 0.25,
) -> list[dict[str, Any]]:
videos = [_video_payload(row) for row in _linked_video_sources(account_row)]
ranked = [
video for video in sorted(
videos,
key=lambda item: (item["score"]["performance_score"], item.get("published_at") or ""),
reverse=True,
)
if float(video["score"]["performance_score"] or 0) >= float(min_score or 0)
][: max(1, min(top_video_count, 12))]
results: list[dict[str, Any]] = []
for video in ranked:
prompt = (
f"请拆解这条{label}作品为什么值得关注,输出 summary、borrow_points、risks。"
f"\n\n输入:\n{json.dumps(video, ensure_ascii=False, indent=2)}"
)
output, parsed = await _call_reasoning_model(
user_id,
prompt,
system_prompt="你是短视频内容拆解助手。尽量输出 JSON字段包括 summary、borrow_points、risks。",
model_profile_id=model_profile_id,
temperature=temperature,
)
summary_text = str(parsed.get("summary") or parsed.get("headline_summary") or output)[:240]
results.append(
{
"id": make_id(f"{platform}_va"),
"video_id": video["id"],
"video_title": video["title"],
"status": "ok",
"summary_text": summary_text,
"parsed_json": parsed,
"performance_score": video["score"]["performance_score"],
"created_at": now(),
}
)
return results
def _similarity_tokens(payload: dict[str, Any], extra_text: str = "") -> set[str]:
return {
token.lower()
for token in _extract_keywords(
payload.get("nickname") or "",
payload.get("signature") or "",
" ".join(payload.get("tags") or []),
" ".join(payload.get("keywords") or []),
extra_text,
)
if token
}
def _candidate_title_from_url(url: str) -> str:
cleaned = str(url or "").strip().rstrip("/")
tail = cleaned.rsplit("/", 1)[-1] if cleaned else ""
tail = tail.split("?", 1)[0].replace("-", " ").replace("_", " ").strip()
return tail or cleaned or "候选账号"
async def _create_sync_job_for_account(account_row: dict[str, Any], assistant_id: str = "") -> dict[str, Any]:
project_id = account_row.get("project_id") or ""
if not project_id:
raise HTTPException(status_code=400, detail="Account source is not attached to a project")
kb = legacy.resolve_target_kb(account_row["user_id"], None, project_id)
source_payload = _content_source_payload(account_row)
profile = legacy.model_profile_for_account(account_row["user_id"], None)
job_row = legacy.create_job_record(
account_id=account_row["user_id"],
project_id=project_id,
knowledge_base_id=kb["id"],
content_source_id=account_row["id"],
assistant_id=assistant_id or None,
source_type="creator_account",
line_type="analysis",
workflow_key="content_source_sync_pipeline",
title=f"{source_payload.get('title') or source_payload.get('handle') or label} 内容同步",
language="auto",
source_url=source_payload.get("source_url", ""),
artifacts={
"source_account_url": source_payload.get("source_url", ""),
"platform": platform,
"handle": source_payload.get("handle", ""),
"max_items": int(source_payload.get("metadata", {}).get("max_items") or 5),
"skip_existing": True,
"auto_trigger_analysis": True,
},
analysis_model_profile_id=profile["id"],
)
queued = await legacy.trigger_orchestrated_job(job_row)
return legacy.job_payload(queued)
def _tracking_cursor(user_id: str) -> dict[str, Any] | None:
return legacy.db.fetch_one(
f"SELECT * FROM {table_prefix}_tracking_cursors WHERE user_id = ?",
(user_id,),
)
def _set_tracking_cursor(user_id: str, last_seen_at: str) -> dict[str, Any]:
existing = _tracking_cursor(user_id)
updated_at = now()
if existing:
legacy.db.execute(
f"UPDATE {table_prefix}_tracking_cursors SET last_seen_at = ?, updated_at = ? WHERE user_id = ?",
(last_seen_at, updated_at, user_id),
)
else:
legacy.db.execute(
f"INSERT INTO {table_prefix}_tracking_cursors (user_id, last_seen_at, updated_at) VALUES (?, ?, ?)",
(user_id, last_seen_at, updated_at),
)
return legacy.db.fetch_one(
f"SELECT * FROM {table_prefix}_tracking_cursors WHERE user_id = ?",
(user_id,),
)
def _tracked_account_payload(tracked_row: dict[str, Any]) -> dict[str, Any]:
assistant_name = ""
if tracked_row.get("assistant_id"):
assistant_row = legacy.db.fetch_one("SELECT * FROM assistants WHERE id = ?", (tracked_row["assistant_id"],))
if assistant_row:
assistant_name = legacy.assistant_payload(assistant_row).get("name", "")
account_row = _require_account(tracked_row["tracked_account_id"], tracked_row["user_id"])
return {
"id": tracked_row["id"],
"platform": platform,
"tracked_account_id": tracked_row["tracked_account_id"],
"assistant_id": tracked_row.get("assistant_id", "") or "",
"assistant_name": assistant_name,
"note": tracked_row.get("note", ""),
"created_at": tracked_row.get("created_at", ""),
"updated_at": tracked_row["updated_at"],
"account": _account_payload(account_row),
}
def _list_tracked_accounts(user_id: str) -> list[dict[str, Any]]:
rows = legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? ORDER BY updated_at DESC",
(user_id,),
)
return [_tracked_account_payload(row) for row in rows]
def _tracking_digest_item(tracked_item: dict[str, Any], video: dict[str, Any]) -> dict[str, Any]:
latest_job = _latest_job_for_source(video["id"])
summary = (latest_job or {}).get("style_summary") or video.get("description") or "已发现更新内容"
borrowing_points = [point for point in [summary[:36], video.get("title", "")[:36]] if point]
performance_score = float(video.get("score", {}).get("performance_score") or 0)
return {
"tracking_id": tracked_item["id"],
"platform": platform,
"tracked_account_id": tracked_item["tracked_account_id"],
"tracked_account_name": tracked_item["account"]["nickname"],
"assistant_id": tracked_item.get("assistant_id", "") or "",
"assistant_name": tracked_item.get("assistant_name", ""),
"note": tracked_item.get("note", ""),
"account": tracked_item["account"],
"video": video,
"summary": summary,
"summary_text": summary,
"borrowing_points": borrowing_points[:3],
"is_high_value": performance_score >= 60,
"created_at": video.get("published_at") or now(),
}
def _tracking_digest(user_id: str, since_value: str = "", limit: int = 24) -> dict[str, Any]:
tracked_items = _list_tracked_accounts(user_id)
cursor = _tracking_cursor(user_id)
threshold = (since_value or (cursor or {}).get("last_seen_at") or "").strip()
items: list[dict[str, Any]] = []
for tracked in tracked_items:
for video in tracked["account"]["video_summary"]["videos"]:
published_at = str(video.get("published_at") or "")
if threshold and published_at and published_at <= threshold:
continue
items.append(_tracking_digest_item(tracked, video))
items.sort(key=lambda item: item.get("created_at", ""), reverse=True)
return {
"generated_at": now(),
"since": threshold,
"items": items[:limit],
"tracked_accounts": tracked_items,
"cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""),
}
@app.post(f"/v2/{platform}/accounts/sync")
async def sync_platform_account(
request: PlatformAccountSyncRequest,
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
if (
not request.profile_url.strip()
and not request.manual_profile_payload
and not request.manual_creator_pages
):
raise HTTPException(
status_code=400,
detail="profile_url、manual_profile_payload 或 manual_creator_pages 至少需要传一个",
)
public_data = await _collect_public_profile(
request.profile_url,
request.manual_profile_payload,
fallback_title=request.title,
fallback_handle=request.handle,
)
creator_data = await _collect_creator_center_pages(
request.creator_center_urls,
request.session_cookie,
request.manual_creator_pages,
fallback_title=request.title,
fallback_handle=request.handle,
)
if (
not public_data["profile"].get("nickname")
and not public_data["profile"].get("profile_url")
and not creator_data["pages"]
):
raise HTTPException(status_code=400, detail=f"No {label} profile or creator-center data could be extracted")
account_row = _upsert_account_source(account, request, public_data, creator_data)
_upsert_account_videos(
account_row,
_extract_videos([
public_data.get("payload") or {},
creator_data.get("payload") or {},
]),
)
account_row = _require_account(account_row["id"], account["id"])
workspace = _workspace_payload(account_row)
workspace["sync_errors"] = [*public_data["errors"], *creator_data["errors"]]
return workspace
@app.get(f"/v2/{platform}/accounts")
def list_platform_accounts(account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]:
return [_account_payload(row) for row in _content_source_rows(account["id"], platform, "creator_account")]
@app.get(f"/v2/{platform}/accounts/{{account_id}}/workspace")
def get_platform_account_workspace(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
account_row = _require_account(account_id, account["id"])
return _workspace_payload(account_row)
@app.get(f"/v2/{platform}/accounts/{{account_id}}/analysis-reports")
def list_platform_analysis_reports(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]:
account_row = _require_account(account_id, account["id"])
return _workspace_payload(account_row)["recent_reports"]
@app.get(f"/v2/{platform}/accounts/{{account_id}}/snapshots")
def list_platform_snapshots(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]:
account_row = _require_account(account_id, account["id"])
return [_snapshot_brief(item) for item in _list_snapshots(account_row)]
@app.get(f"/v2/{platform}/accounts/{{account_id}}/snapshots/{{snapshot_id}}")
def get_platform_snapshot_detail(
account_id: str,
snapshot_id: str,
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
account_row = _require_account(account_id, account["id"])
return _snapshot_detail(account_row, snapshot_id)
@app.get(f"/v2/{platform}/accounts/{{account_id}}/creator-fields")
def get_platform_creator_fields(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
account_row = _require_account(account_id, account["id"])
latest_creator_snapshot = _latest_snapshot(account_row, "creator_center")
if not latest_creator_snapshot:
raise HTTPException(status_code=404, detail="No creator-center snapshot found")
return _snapshot_detail(account_row, str(latest_creator_snapshot.get("id") or ""))
@app.get(f"/v2/{platform}/accounts/{{account_id}}/videos")
def list_platform_account_videos(
account_id: str,
limit: int = Query(default=80, ge=1, le=200),
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
account_row = _require_account(account_id, account["id"])
items = [_video_payload(row) for row in _linked_video_sources(account_row)]
items.sort(key=lambda item: (item["score"]["performance_score"], item.get("published_at") or ""), reverse=True)
top_ids = [item["id"] for item in items if float(item["score"]["performance_score"] or 0) >= 60][:12]
latest_ids = [item["id"] for item in sorted(items, key=lambda item: item.get("published_at") or "", reverse=True)[:12]]
return {
"items": items[:limit],
"count": len(items),
"meta": {"platform": platform, "account_id": account_id},
"top_scored_video_ids": top_ids,
"latest_video_ids": latest_ids,
"high_score_threshold": 60,
}
@app.post(f"/v2/{platform}/accounts/{{account_id}}/analysis")
async def analyze_platform_account(
account_id: str,
request: PlatformAnalysisRequest,
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
account_row = _require_account(account_id, account["id"])
workspace = _workspace_payload(account_row)
latest_public_snapshot = _latest_snapshot(account_row, "public_profile")
latest_creator_snapshot = _latest_snapshot(account_row, "creator_center")
profiles = _resolve_model_profiles(account["id"], request.model_profile_ids)
selected_model_profile_ids = [str(profile.get("id") or "").strip() for profile in profiles if str(profile.get("id") or "").strip()]
requested_linked_account_ids = _normalize_id_list(request.linked_account_ids)
linked_accounts = _linked_account_context(
account_row,
requested_linked_account_ids,
include_linked_accounts=bool(request.include_linked_accounts),
limit=6,
)
recent_similarity_candidates = (
_recent_similarity_candidates_context(account_row, limit=6)
if request.include_recent_similar_candidates
else []
)
context = {
"account": workspace["account"],
"top_videos": workspace["account"]["video_summary"]["videos"][: max(1, min(request.max_videos, 8))],
"linked_accounts": linked_accounts,
"recent_similar_candidates": recent_similarity_candidates,
"creator_center": {
"snapshot_count": len(_list_snapshots(account_row)),
"latest_public_snapshot": _snapshot_brief(latest_public_snapshot) if latest_public_snapshot else None,
"latest_creator_snapshot": _snapshot_detail(account_row, str(latest_creator_snapshot.get("id") or "")) if latest_creator_snapshot else None,
},
"request_options": {
"include_linked_accounts": bool(request.include_linked_accounts),
"include_recent_similar_candidates": bool(request.include_recent_similar_candidates),
"auto_analyze_top_videos": bool(request.auto_analyze_top_videos),
"top_video_analysis_count": int(request.top_video_analysis_count),
"max_videos": int(request.max_videos),
},
"requested_model_profile_ids": _normalize_id_list(request.model_profile_ids),
"selected_model_profile_ids": selected_model_profile_ids,
"extra_focus": request.extra_focus,
}
prompt = (
f"请从新媒体商业化运营视角,分析这个{label}账号,输出执行摘要、可借鉴点、风险提醒和下一步动作。"
f"\n\n输入:\n{json.dumps(context, ensure_ascii=False, indent=2)}"
)
report_id = make_id(f"{platform}_report")
legacy.db.execute(
f"""INSERT INTO {table_prefix}_analysis_reports
(id, user_id, account_source_id, focus_text, model_profile_ids_json, linked_account_ids_json, prompt_text, context_json, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
report_id,
account["id"],
account_row["id"],
request.extra_focus or "",
_safe_json_dumps(selected_model_profile_ids),
_safe_json_dumps([item.get("target_account_id", "") for item in linked_accounts if item.get("target_account_id")]),
prompt,
_safe_json_dumps(context),
now(),
),
)
suggestions: list[dict[str, Any]] = []
for profile in profiles:
output, parsed = await _call_reasoning_model(
account["id"],
prompt,
system_prompt="你是新媒体账号分析顾问。尽量输出 JSON字段包括 executive_summary、borrow_points、risks、next_actions。",
model_profile_id=str(profile.get("id") or ""),
temperature=request.temperature,
)
suggestion_id = make_id(f"{platform}_suggestion")
model_label = f"{profile.get('name', '')} · {profile.get('model_name', '')}".strip(" ·")
legacy.db.execute(
f"INSERT INTO {table_prefix}_analysis_suggestions (id, report_id, model_profile_id, model_label, status, suggestion_text, parsed_json, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(
suggestion_id,
report_id,
profile["id"],
model_label,
"ok",
output[:4000],
_safe_json_dumps(parsed),
now(),
),
)
suggestions.append(
{
"id": suggestion_id,
"status": "ok",
"model_profile_id": profile["id"],
"model_label": model_label,
"suggestion_text": output[:4000],
"parsed_json": parsed,
"created_at": now(),
}
)
top_video_min_score = 45.0
if not any(float(item.get("score", {}).get("performance_score") or 0) >= top_video_min_score for item in context["top_videos"]):
top_video_min_score = 0.0
top_video_analyses = (
await _analyze_top_videos_for_account(
account_row,
account["id"],
model_profile_id=selected_model_profile_ids[0] if selected_model_profile_ids else "",
top_video_count=request.top_video_analysis_count,
min_score=top_video_min_score,
temperature=min(max(request.temperature, 0.15), 0.4),
)
if request.auto_analyze_top_videos and profiles
else []
)
report_row = legacy.db.fetch_one(
f"SELECT * FROM {table_prefix}_analysis_reports WHERE id = ?",
(report_id,),
)
report_payload = _report_payload(report_row)
return {
"report_id": report_id,
"account_id": account_row["id"],
"created_at": now(),
"suggestions": report_payload["suggestions"] or suggestions,
"context": context,
"top_video_analyses": top_video_analyses,
}
@app.post(f"/v2/{platform}/accounts/{{account_id}}/videos/analyze-top")
async def analyze_platform_top_videos(
account_id: str,
request: PlatformTopVideoAnalysisRequest,
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
account_row = _require_account(account_id, account["id"])
results = await _analyze_top_videos_for_account(
account_row,
account["id"],
model_profile_id=request.model_profile_id,
top_video_count=request.top_video_count,
min_score=request.min_score,
temperature=request.temperature,
)
return {
"account_id": account_row["id"],
"analyzed_count": len(results),
"items": results,
}
@app.post(f"/v2/{platform}/similar-searches")
async def create_platform_similarity_search(
request: PlatformSimilaritySearchRequest,
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
account_row = _require_account(request.source_account_id, account["id"])
source_payload = _account_payload(account_row)
ranked_candidates: list[dict[str, Any]] = []
seen_keys: set[str] = set()
source_tokens = _similarity_tokens(source_payload)
extra_requirement_tokens = {
token.lower()
for token in _extract_keywords(request.extra_requirements or "")
if token
}
def _append_candidate(item: dict[str, Any]) -> None:
candidate_account_id = str(item.get("candidate_account_id") or "").strip()
candidate_profile_url = str(item.get("candidate_profile_url") or "").strip()
dedupe_key = candidate_account_id or candidate_profile_url.lower()
if not dedupe_key or dedupe_key in seen_keys:
return
seen_keys.add(dedupe_key)
ranked_candidates.append(item)
if request.seed_linked_accounts:
for linked in _linked_account_context(account_row, [], include_linked_accounts=True, limit=max(4, request.max_candidates)):
linked_account = linked.get("account") or {}
candidate_tokens = _similarity_tokens(linked_account)
source_overlap = len(source_tokens.intersection(candidate_tokens))
requirement_overlap = len(extra_requirement_tokens.intersection(candidate_tokens))
heuristic = 65 + source_overlap * 6 + requirement_overlap * 8
_append_candidate(
{
"candidate_account_id": linked.get("target_account_id", ""),
"candidate_profile_url": linked.get("target_profile_url") or linked_account.get("profile_url", ""),
"candidate_nickname": linked_account.get("nickname", "") or linked.get("target_nickname", ""),
"heuristic_score": float(heuristic),
"agent_score": float(heuristic),
"rationale_text": "来自已建立的对标关系,优先纳入相似账号候选池。",
"dimensions_json": {"source": "linked_account", "source_overlap": source_overlap, "requirement_overlap": requirement_overlap},
}
)
for url in _normalize_id_list(request.candidate_urls):
title = _candidate_title_from_url(url)
candidate_tokens = _similarity_tokens({"nickname": title, "signature": "", "tags": [], "keywords": []})
source_overlap = len(source_tokens.intersection(candidate_tokens))
requirement_overlap = len(extra_requirement_tokens.intersection(candidate_tokens))
heuristic = 52 + source_overlap * 4 + requirement_overlap * 6
_append_candidate(
{
"candidate_account_id": "",
"candidate_profile_url": url,
"candidate_nickname": title,
"heuristic_score": float(heuristic),
"agent_score": float(heuristic),
"rationale_text": "来自手动提供的主页链接,已纳入相似账号候选池。",
"dimensions_json": {"source": "manual_url", "source_overlap": source_overlap, "requirement_overlap": requirement_overlap, "search_public_pages": bool(request.search_public_pages)},
}
)
candidates = [
row for row in _content_source_rows(account["id"], platform, "creator_account")
if row["id"] != account_row["id"]
][: max(5, request.max_candidates * 2)]
for index, row in enumerate(candidates, start=1):
payload = _account_payload(row)
candidate_tokens = _similarity_tokens(payload)
source_overlap = len(source_tokens.intersection(candidate_tokens))
requirement_overlap = len(extra_requirement_tokens.intersection(candidate_tokens))
heuristic = source_overlap * 10 + requirement_overlap * 8 + max(0, 50 - index)
rationale = f"与源账号同平台,标签/关键词重合 {source_overlap},适合作为{label}对标候选。"
_append_candidate(
{
"candidate_account_id": row["id"],
"candidate_profile_url": payload.get("profile_url", ""),
"candidate_nickname": payload.get("nickname", ""),
"heuristic_score": float(heuristic),
"agent_score": float(heuristic),
"rationale_text": rationale,
"dimensions_json": {"source": "local_account", "source_overlap": source_overlap, "requirement_overlap": requirement_overlap},
}
)
ranked_candidates.sort(key=lambda item: item["agent_score"], reverse=True)
ranked_candidates = ranked_candidates[: request.max_candidates]
search_id = make_id(f"{platform}_search")
legacy.db.execute(
f"INSERT INTO {table_prefix}_similarity_searches (id, user_id, source_account_id, prompt_text, context_json, created_at) VALUES (?, ?, ?, ?, ?, ?)",
(
search_id,
account["id"],
account_row["id"],
request.extra_requirements or "",
_safe_json_dumps(
{
"source_account": source_payload,
"candidate_urls": _normalize_id_list(request.candidate_urls),
"seed_linked_accounts": bool(request.seed_linked_accounts),
"search_public_pages": bool(request.search_public_pages),
"model_profile_id": request.model_profile_id or "",
}
),
now(),
),
)
for idx, item in enumerate(ranked_candidates):
legacy.db.execute(
f"""INSERT INTO {table_prefix}_similarity_candidates
(id, search_id, candidate_account_id, candidate_profile_url, heuristic_score, agent_score, rationale_text, dimensions_json, raw_output_json, rank_index, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
make_id(f"{platform}_candidate"),
search_id,
item.get("candidate_account_id") or None,
item.get("candidate_profile_url", ""),
item.get("heuristic_score", 0),
item.get("agent_score", 0),
item.get("rationale_text", ""),
_safe_json_dumps(item.get("dimensions_json") or {}),
_safe_json_dumps(item),
idx,
now(),
),
)
return {"id": search_id, "search_id": search_id}
@app.get(f"/v2/{platform}/similar-searches/{{search_id}}")
def get_platform_similarity_search(search_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
search_row = legacy.db.fetch_one(
f"SELECT * FROM {table_prefix}_similarity_searches WHERE id = ? AND user_id = ?",
(search_id, account["id"]),
)
if not search_row:
raise HTTPException(status_code=404, detail="Similarity search not found")
candidate_rows = legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_similarity_candidates WHERE search_id = ? ORDER BY rank_index ASC",
(search_id,),
)
candidates = []
for row in candidate_rows:
payload = _parse_json(row.get("raw_output_json") or "{}", {})
payload.setdefault("candidate_account_id", row.get("candidate_account_id", ""))
payload.setdefault("candidate_profile_url", row.get("candidate_profile_url", ""))
payload.setdefault("rationale_text", row.get("rationale_text", ""))
payload.setdefault("agent_score", row.get("agent_score", 0))
payload.setdefault("heuristic_score", row.get("heuristic_score", 0))
candidates.append(payload)
return {
"id": search_row["id"],
"search_id": search_row["id"],
"source_account_id": search_row["source_account_id"],
"context": _parse_json(search_row.get("context_json") or "{}", {}),
"candidates": candidates,
"created_at": search_row["created_at"],
}
@app.get(f"/v2/{platform}/accounts/{{account_id}}/benchmark-links")
def list_platform_benchmark_links(account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> list[dict[str, Any]]:
_require_account(account_id, account["id"])
rows = legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_account_relations WHERE source_account_id = ? ORDER BY created_at DESC",
(account_id,),
)
return [_relation_payload(row) for row in rows]
@app.post(f"/v2/{platform}/accounts/{{account_id}}/benchmark-links")
def create_platform_benchmark_links(
account_id: str,
request: PlatformBenchmarkLinksRequest,
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
source_account = _require_account(account_id, account["id"])
created: list[dict[str, Any]] = []
for target_account_id in request.target_account_ids:
target = _require_account(target_account_id, account["id"])
relation_id = make_id(f"{platform}_link")
legacy.db.execute(
f"INSERT INTO {table_prefix}_account_relations (id, user_id, source_account_id, target_account_id, target_profile_url, relation_type, note, search_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
relation_id,
account["id"],
source_account["id"],
target["id"],
target.get("source_url", ""),
request.relation_type or "benchmark",
request.note or "",
request.search_id or "",
now(),
),
)
created.append(_relation_payload(legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_account_relations WHERE id = ?", (relation_id,))))
for target_profile_url in request.target_profile_urls:
cleaned = str(target_profile_url or "").strip()
if not cleaned:
continue
relation_id = make_id(f"{platform}_link")
legacy.db.execute(
f"INSERT INTO {table_prefix}_account_relations (id, user_id, source_account_id, target_account_id, target_profile_url, relation_type, note, search_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
relation_id,
account["id"],
source_account["id"],
None,
cleaned,
request.relation_type or "benchmark",
request.note or "",
request.search_id or "",
now(),
),
)
created.append(_relation_payload(legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_account_relations WHERE id = ?", (relation_id,))))
return {"links": created}
@app.get(f"/v2/{platform}/tracking/accounts")
def list_platform_tracking_accounts(account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
cursor = _tracking_cursor(account["id"])
return {
"items": _list_tracked_accounts(account["id"]),
"cursor_last_seen_at": (cursor or {}).get("last_seen_at", ""),
}
@app.post(f"/v2/{platform}/tracking/accounts")
def create_platform_tracking_account(
request: PlatformTrackingAccountRequest,
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
tracked = _require_account(request.tracked_account_id, account["id"])
assistant = legacy.resolve_target_assistant(account["id"], request.assistant_id or None, tracked.get("project_id", ""))
existing = legacy.db.fetch_one(
f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? AND tracked_account_id = ?",
(account["id"], tracked["id"]),
)
if existing:
legacy.db.execute(
f"UPDATE {table_prefix}_tracked_accounts SET assistant_id = ?, note = ?, updated_at = ? WHERE id = ?",
(((assistant or {}).get("id") or None), request.note or "", now(), existing["id"]),
)
row = legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_tracked_accounts WHERE id = ?", (existing["id"],))
else:
tracking_id = make_id(f"{platform}_track")
legacy.db.execute(
f"INSERT INTO {table_prefix}_tracked_accounts (id, user_id, tracked_account_id, assistant_id, note, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
(
tracking_id,
account["id"],
tracked["id"],
(assistant or {}).get("id") or None,
request.note or "",
now(),
now(),
),
)
row = legacy.db.fetch_one(f"SELECT * FROM {table_prefix}_tracked_accounts WHERE id = ?", (tracking_id,))
return _tracked_account_payload(row)
@app.post(f"/v2/{platform}/tracking/accounts/{{tracked_account_id}}/refresh")
async def refresh_platform_tracked_account(tracked_account_id: str, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
tracked_row = legacy.db.fetch_one(
f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? AND tracked_account_id = ?",
(account["id"], tracked_account_id),
)
if not tracked_row:
raise HTTPException(status_code=404, detail="Tracked account not found")
account_row = _require_account(tracked_account_id, account["id"])
queued = await _create_sync_job_for_account(account_row, assistant_id=tracked_row.get("assistant_id", "") or "")
legacy.db.execute(
f"UPDATE {table_prefix}_tracked_accounts SET updated_at = ? WHERE id = ?",
(now(), tracked_row["id"]),
)
return {"tracking_id": tracked_row["id"], "tracked_account_id": tracked_account_id, "sync_job_id": queued["id"], "status": queued["status"]}
@app.post(f"/v2/{platform}/tracking/refresh")
async def refresh_platform_tracking(account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
tracked_rows = legacy.db.fetch_all(
f"SELECT * FROM {table_prefix}_tracked_accounts WHERE user_id = ? ORDER BY updated_at DESC",
(account["id"],),
)
refreshed = 0
failed = 0
items: list[dict[str, Any]] = []
for row in tracked_rows:
try:
account_row = _require_account(row["tracked_account_id"], account["id"])
queued = await _create_sync_job_for_account(account_row, assistant_id=row.get("assistant_id", "") or "")
refreshed += 1
items.append({"tracking_id": row["id"], "tracked_account_id": row["tracked_account_id"], "sync_job_id": queued["id"], "status": queued["status"]})
except Exception as exc:
failed += 1
items.append({"tracking_id": row["id"], "tracked_account_id": row["tracked_account_id"], "error": str(exc)})
return {"refreshed": refreshed, "failed": failed, "items": items}
@app.post(f"/v2/{platform}/tracking/cursor")
def update_platform_tracking_cursor(request: PlatformTrackingCursorRequest, account: dict[str, Any] = Depends(legacy.require_approved)) -> dict[str, Any]:
cursor = _set_tracking_cursor(account["id"], request.last_seen_at)
return {"last_seen_at": cursor["last_seen_at"], "updated_at": cursor["updated_at"]}
@app.get(f"/v2/{platform}/tracking/digest")
def get_platform_tracking_digest(
since: str = Query(default=""),
limit: int = Query(default=24, ge=1, le=100),
account: dict[str, Any] = Depends(legacy.require_approved),
) -> dict[str, Any]:
return _tracking_digest(account["id"], since_value=(since or "").strip(), limit=limit)