fix: collapse duplicate douyin analysis history

This commit is contained in:
kris
2026-03-21 02:26:42 +08:00
parent c4222755b1
commit be94836e3c
2 changed files with 140 additions and 34 deletions

View File

@@ -1598,6 +1598,127 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
})
return payloads
def _normalize_report_text(value: Any) -> str:
text = str(value or "").strip()
if not text:
return ""
return re.sub(r"\s+", " ", text)
def _build_report_payload(report: dict[str, Any]) -> dict[str, Any]:
suggestions = legacy.db.fetch_all(
"SELECT * FROM douyin_analysis_suggestions WHERE report_id = ? ORDER BY created_at ASC",
(report["id"],)
)
return {
"id": report["id"],
"focus_text": report["focus_text"],
"model_profile_ids": _safe_json_loads(report["model_profile_ids_json"], []),
"linked_account_ids": _safe_json_loads(report["linked_account_ids_json"], []),
"created_at": report["created_at"],
"duplicate_count": 1,
"duplicate_report_ids": [],
"suggestions": [
{
"id": suggestion["id"],
"model_profile_id": suggestion["model_profile_id"],
"model_label": suggestion["model_label"],
"status": suggestion["status"],
"suggestion_text": suggestion["suggestion_text"],
"parsed_json": _safe_json_loads(suggestion["parsed_json"], {})
}
for suggestion in suggestions
]
}
def _report_signature(report_payload: dict[str, Any]) -> str:
parts = [_normalize_report_text(report_payload.get("focus_text"))]
for suggestion in report_payload.get("suggestions", []):
parsed = suggestion.get("parsed_json") or {}
if isinstance(parsed, dict) and parsed:
normalized_content = json.dumps(parsed, ensure_ascii=False, sort_keys=True)
else:
normalized_content = _normalize_report_text(suggestion.get("suggestion_text"))
parts.append(
"|".join(
[
suggestion.get("model_profile_id", ""),
suggestion.get("status", ""),
normalized_content
]
)
)
return "\n".join(parts)
def _list_report_payloads(account_id: str, limit: int = 5, dedupe: bool = True) -> list[dict[str, Any]]:
rows = legacy.db.fetch_all(
"""
SELECT *
FROM douyin_analysis_reports
WHERE account_id = ?
ORDER BY created_at DESC
LIMIT ?
""",
(account_id, max(limit * 4, 20))
)
payloads = [_build_report_payload(report) for report in rows]
if not dedupe:
return payloads[:limit]
unique_payloads: list[dict[str, Any]] = []
seen: dict[str, dict[str, Any]] = {}
for payload in payloads:
signature = _report_signature(payload)
if signature in seen:
seen_payload = seen[signature]
seen_payload["duplicate_count"] = int(seen_payload.get("duplicate_count") or 1) + 1
seen_payload.setdefault("duplicate_report_ids", []).append(payload["id"])
continue
seen[signature] = payload
unique_payloads.append(payload)
focus_filtered: list[dict[str, Any]] = []
focus_seen: dict[str, dict[str, Any]] = {}
for payload in unique_payloads:
focus_key = _normalize_report_text(payload.get("focus_text") or "__default__")
if focus_key in focus_seen:
seen_payload = focus_seen[focus_key]
seen_payload["duplicate_count"] = int(seen_payload.get("duplicate_count") or 1) + 1
seen_payload.setdefault("duplicate_report_ids", []).append(payload["id"])
continue
focus_seen[focus_key] = payload
focus_filtered.append(payload)
return focus_filtered[:limit]
def _delete_report(report_id: str) -> None:
legacy.db.execute("DELETE FROM douyin_analysis_suggestions WHERE report_id = ?", (report_id,))
legacy.db.execute("DELETE FROM douyin_analysis_reports WHERE id = ?", (report_id,))
def _find_duplicate_report_payload(
account_id: str,
focus_text: str,
suggestion_payloads: list[dict[str, Any]],
exclude_report_id: str = ""
) -> dict[str, Any] | None:
candidate_rows = legacy.db.fetch_all(
"""
SELECT *
FROM douyin_analysis_reports
WHERE account_id = ? AND focus_text = ? AND id != ?
ORDER BY created_at DESC
LIMIT 10
""",
(account_id, focus_text, exclude_report_id)
)
probe_payload = {
"focus_text": focus_text,
"suggestions": suggestion_payloads
}
probe_signature = _report_signature(probe_payload)
for row in candidate_rows:
candidate_payload = _build_report_payload(row)
if _report_signature(candidate_payload) == probe_signature:
return candidate_payload
return None
def _build_workspace_payload(account_row: dict[str, Any]) -> dict[str, Any]:
account_payload = _build_account_payload(account_row)
video_workspace = _build_video_workspace_payload(account_row)
@@ -1621,40 +1742,7 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
""",
(account_row["id"],)
)
reports = legacy.db.fetch_all(
"""
SELECT *
FROM douyin_analysis_reports
WHERE account_id = ?
ORDER BY created_at DESC
LIMIT 5
""",
(account_row["id"],)
)
report_payloads = []
for report in reports:
suggestions = legacy.db.fetch_all(
"SELECT * FROM douyin_analysis_suggestions WHERE report_id = ? ORDER BY created_at ASC",
(report["id"],)
)
report_payloads.append({
"id": report["id"],
"focus_text": report["focus_text"],
"model_profile_ids": _safe_json_loads(report["model_profile_ids_json"], []),
"linked_account_ids": _safe_json_loads(report["linked_account_ids_json"], []),
"created_at": report["created_at"],
"suggestions": [
{
"id": suggestion["id"],
"model_profile_id": suggestion["model_profile_id"],
"model_label": suggestion["model_label"],
"status": suggestion["status"],
"suggestion_text": suggestion["suggestion_text"],
"parsed_json": _safe_json_loads(suggestion["parsed_json"], {})
}
for suggestion in suggestions
]
})
report_payloads = _list_report_payloads(account_row["id"], limit=5, dedupe=True)
recent_searches = legacy.db.fetch_all(
"""
SELECT *
@@ -2335,6 +2423,23 @@ def register_douyin_routes(app: Any, legacy: Any) -> None:
}
suggestions = await asyncio.gather(*[_analyze_with_model(profile) for profile in profiles])
duplicate_report = _find_duplicate_report_payload(
account_row["id"],
request.extra_focus,
suggestions,
exclude_report_id=report_id
)
if duplicate_report:
_delete_report(report_id)
return {
"report_id": duplicate_report["id"],
"created_at": duplicate_report["created_at"],
"context": analysis_context,
"suggestions": duplicate_report["suggestions"],
"auto_video_analyses": [],
"duplicate_of_report_id": duplicate_report["id"],
"duplicate_count": duplicate_report.get("duplicate_count", 1)
}
auto_video_analyses: list[dict[str, Any]] = []
if request.auto_analyze_top_videos and profiles:
auto_video_analyses = await _run_top_video_analyses(

View File

@@ -1505,6 +1505,7 @@ function renderPage() {
'<strong>' + escapeHtml(report.focus_text || "默认分析") + '</strong>',
'<span class="pill">' + escapeHtml(formatDateTime(report.created_at)) + '</span>',
'</div>',
Number(report.duplicate_count || 1) > 1 ? '<div class="meta" style="margin-top:8px;">已折叠 ' + escapeHtml(String(Number(report.duplicate_count) - 1)) + ' 条同主题历史</div>' : '',
safeArray(report.suggestions).length ? safeArray(report.suggestions).map(renderAccountSuggestion).join("") : '<p class="empty-state" style="margin-top:10px;">这份报告还没有 suggestion。</p>',
'</div>'
].join("");