diff --git a/collector-service/app/douyin_features.py b/collector-service/app/douyin_features.py index 69e07f2..1875c2f 100644 --- a/collector-service/app/douyin_features.py +++ b/collector-service/app/douyin_features.py @@ -245,11 +245,22 @@ def _video_score_breakdown(video: dict[str, Any]) -> dict[str, Any]: else: age_days = 999.0 - engagement_rate = (like + comment * 2.2 + share * 4.2 + collect * 3.0) / max(play, 1.0) - share_rate = share / max(play, 1.0) - collect_rate = collect / max(play, 1.0) - comment_rate = comment / max(play, 1.0) - like_rate = like / max(play, 1.0) + if play > 0: + rate_denominator = play + else: + rate_denominator = max( + like * 18.0, + comment * 70.0, + share * 95.0, + collect * 55.0, + 1000.0 + ) + + engagement_rate = (like + comment * 2.2 + share * 4.2 + collect * 3.0) / max(rate_denominator, 1.0) + share_rate = share / max(rate_denominator, 1.0) + collect_rate = collect / max(rate_denominator, 1.0) + comment_rate = comment / max(rate_denominator, 1.0) + like_rate = like / max(rate_denominator, 1.0) volume_component = min(36.0, math.log10(play + 1.0) * 9.0) interaction_component = min(28.0, engagement_rate * 100.0) @@ -261,6 +272,17 @@ def _video_score_breakdown(video: dict[str, Any]) -> dict[str, Any]: min(100.0, volume_component + interaction_component + spread_component + freshness_component + baseline_component), 2 ) + popularity_score = round( + min( + 100.0, + math.log10(play + 1.0) * 24.0 + + math.log10(like + 1.0) * 22.0 + + math.log10(comment + 1.0) * 20.0 + + math.log10(share + 1.0) * 18.0 + + math.log10(collect + 1.0) * 16.0 + ), + 2 + ) commercial_score = round( min( 100.0, @@ -288,6 +310,7 @@ def _video_score_breakdown(video: dict[str, Any]) -> dict[str, Any]: return { "performance_score": performance_score, + "popularity_score": popularity_score, "commercial_score": commercial_score, "engagement_rate": round(engagement_rate, 4), "share_rate": round(share_rate, 4), @@ -486,27 +509,79 @@ def _pick_best_profile(candidates: list[dict[str, Any]], fallback_url: str = "") def _normalize_video_candidate(candidate: dict[str, Any]) -> dict[str, Any]: + def _collect_image_urls(node: Any) -> list[str]: + urls: list[str] = [] + + def _visit(value: Any) -> None: + if isinstance(value, str): + text = value.strip() + if text.startswith("http"): + urls.append(text) + return + if isinstance(value, list): + for item in value[:20]: + _visit(item) + return + if not isinstance(value, dict): + return + + for key in ("url", "download_url", "origin_url", "display_url", "cover_url"): + target = value.get(key) + if isinstance(target, str) and target.strip().startswith("http"): + urls.append(target.strip()) + + url_list = value.get("url_list") + if isinstance(url_list, list): + for item in url_list[:5]: + _visit(item) + + for key in ("image", "images", "cover", "display_image", "origin_image"): + child = value.get(key) + if child not in (None, "", [], {}): + _visit(child) + + _visit(node) + return _dedupe_strings(urls) + stats_source = candidate.get("statistics") if isinstance(candidate.get("statistics"), dict) else {} video_source = candidate.get("video") if isinstance(candidate.get("video"), dict) else {} title = _first_non_empty(candidate.get("title"), candidate.get("desc"), candidate.get("share_title")) description = _first_non_empty(candidate.get("desc"), candidate.get("title"), candidate.get("text")) cover = candidate.get("cover") or video_source.get("cover") + image_urls = _collect_image_urls( + [ + candidate.get("images"), + candidate.get("image_infos"), + candidate.get("image_list"), + candidate.get("slides"), + candidate.get("photos"), + candidate.get("photo"), + candidate.get("image_post_info"), + ] + ) if isinstance(cover, dict): cover = _first_non_empty( cover.get("url_list", [""])[0] if isinstance(cover.get("url_list"), list) else "", cover.get("url") ) + duration_raw = float(candidate.get("duration") or video_source.get("duration") or 0) + duration_sec = duration_raw / 1000.0 if duration_raw > 1000 else duration_raw + has_video_media = bool(video_source) or duration_sec > 0.3 + aweme_type = str(candidate.get("aweme_type") or "") + looks_like_image_text = bool(image_urls) and (not has_video_media or aweme_type in {"51", "55", "61", "68", "122", "150"}) + content_type = "image_text" if looks_like_image_text else "video" return { "aweme_id": _first_non_empty(candidate.get("aweme_id"), candidate.get("item_id"), candidate.get("group_id")), "title": title, "description": description, "share_url": _first_non_empty(candidate.get("share_url")), - "cover_url": _first_non_empty(cover), - "duration_sec": float(candidate.get("duration") or video_source.get("duration") or 0) / 1000.0 - if float(candidate.get("duration") or video_source.get("duration") or 0) > 1000 - else float(candidate.get("duration") or video_source.get("duration") or 0), + "cover_url": _first_non_empty(cover, image_urls[0] if image_urls else ""), + "duration_sec": duration_sec, "published_at": _normalize_timestamp(candidate.get("create_time") or candidate.get("publish_time")), "tags": _extract_hashtags(title, description), + "content_type": content_type, + "content_type_label": "图文" if content_type == "image_text" else "视频", + "image_count": len(image_urls), "stats": { "play": _parse_count(stats_source.get("play_count") or candidate.get("play_count")), "like": _parse_count(stats_source.get("digg_count") or candidate.get("digg_count")), @@ -1341,6 +1416,8 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: ) payloads: list[dict[str, Any]] = [] for row in rows: + raw_payload = _safe_json_loads(row["raw_json"], {}) + normalized = _normalize_video_candidate(raw_payload) if isinstance(raw_payload, dict) and raw_payload else {} payloads.append({ "id": row["id"], "aweme_id": row["aweme_id"], @@ -1352,7 +1429,10 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "published_at": row["published_at"], "tags": _safe_json_loads(row["tags_json"], []), "stats": _safe_json_loads(row["stats_json"], {}), - "raw": _safe_json_loads(row["raw_json"], {}) + "content_type": normalized.get("content_type", "video"), + "content_type_label": normalized.get("content_type_label", "视频"), + "image_count": int(normalized.get("image_count") or 0), + "raw": raw_payload }) return payloads @@ -1409,6 +1489,9 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "duration_sec": video["duration_sec"], "published_at": video["published_at"], "tags": video["tags"], + "content_type": video.get("content_type", "video"), + "content_type_label": video.get("content_type_label", "视频"), + "image_count": int(video.get("image_count") or 0), "stats": video["stats"], "score": score } @@ -1417,6 +1500,12 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: return payload def _video_sort_key(video: dict[str, Any], sort_by: str) -> tuple[Any, ...]: + if sort_by in {"popular", "popularity"}: + return ( + float(video.get("score", {}).get("popularity_score") or 0), + float(video.get("score", {}).get("performance_score") or 0), + float(video.get("score", {}).get("commercial_score") or 0) + ) if sort_by == "latest": return ( _parse_iso_datetime(video.get("published_at")) or datetime.fromtimestamp(0, tz=timezone.utc), @@ -1467,6 +1556,8 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: high_score_threshold = 60.0 high_score_videos = [video for video in videos_by_score if float(video["score"]["performance_score"]) >= high_score_threshold] analyzed_count = sum(1 for video in videos if video.get("latest_analysis")) + video_only_count = sum(1 for video in videos if video.get("content_type") == "video") + image_text_count = sum(1 for video in videos if video.get("content_type") == "image_text") return { "items": videos, "top_scored_video_ids": [video["id"] for video in videos_by_score[: min(12, len(videos_by_score))]], @@ -1475,7 +1566,9 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "meta": { "total_count": len(videos), "analyzed_count": analyzed_count, - "high_score_count": len(high_score_videos) + "high_score_count": len(high_score_videos), + "video_count": video_only_count, + "image_text_count": image_text_count } } @@ -2818,9 +2911,10 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: @app.get("/v2/douyin/accounts/{account_id}/videos") def list_douyin_account_videos( account_id: str, - limit: int = 60, + limit: int = 200, sort_by: str = "score", scope: str = "all", + content_type: str = "all", q: str = "", tag: str = "", account: dict[str, Any] = Depends(legacy.require_approved) @@ -2836,6 +2930,13 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: elif normalized_scope == "latest": items = [item_map[video_id] for video_id in workspace["latest_video_ids"] if video_id in item_map] + normalized_content_type = (content_type or "all").strip().lower() + if normalized_content_type in {"video", "image_text"}: + items = [ + item for item in items + if str(item.get("content_type") or "video").strip().lower() == normalized_content_type + ] + query_text = (q or "").strip().lower() if query_text: items = [ @@ -2863,13 +2964,14 @@ def register_douyin_routes(app: Any, legacy: Any) -> None: "account_id": account_row["id"], "sort_by": normalized_sort, "scope": normalized_scope, + "content_type": normalized_content_type, "query": q, "tag": tag, "high_score_threshold": workspace["high_score_threshold"], "meta": workspace["meta"], "top_scored_video_ids": workspace["top_scored_video_ids"], "latest_video_ids": workspace["latest_video_ids"], - "items": items[: max(1, min(limit, 120))] + "items": items[: max(1, min(limit, 1000))] } @app.get("/v2/douyin/accounts/{account_id}/analysis-reports") diff --git a/scripts/douyin-browser-capture/control_panel.mjs b/scripts/douyin-browser-capture/control_panel.mjs index ea7be77..bd9f18f 100644 --- a/scripts/douyin-browser-capture/control_panel.mjs +++ b/scripts/douyin-browser-capture/control_panel.mjs @@ -713,6 +713,52 @@ function renderPage() { display: grid; gap: 14px; } + .video-card-header { + display: flex; + justify-content: space-between; + gap: 12px; + align-items: flex-start; + flex-wrap: wrap; + } + .video-card-title { + display: inline-flex; + align-items: center; + gap: 10px; + flex-wrap: wrap; + } + .rank-badge { + display: inline-flex; + align-items: center; + justify-content: center; + min-width: 34px; + height: 34px; + padding: 0 10px; + border-radius: 999px; + background: linear-gradient(135deg, rgba(11, 60, 93, 0.95), rgba(31, 110, 95, 0.9)); + color: white; + font-size: 13px; + font-weight: 700; + box-shadow: 0 8px 18px rgba(11, 60, 93, 0.16); + } + .work-link { + color: var(--ink); + text-decoration: none; + line-height: 1.45; + } + .work-link:hover { + color: var(--accent); + text-decoration: underline; + } + .work-type-pill { + display: inline-flex; + align-items: center; + padding: 6px 10px; + border-radius: 999px; + background: rgba(185, 117, 36, 0.12); + color: #8a5517; + font-size: 12px; + font-weight: 600; + } .video-layout { display: grid; grid-template-columns: 120px 1fr; @@ -728,7 +774,7 @@ function renderPage() { } .toolbar-grid { display: grid; - grid-template-columns: repeat(4, minmax(0, 1fr)); + grid-template-columns: repeat(5, minmax(0, 1fr)); gap: 12px; } .score-badges { @@ -963,7 +1009,7 @@ function renderPage() {

作品工作台

-

这里会把高分作品和最新作品拆开看,并给高分作品自动补运营分析。

+

这里展示完整作品列表,可按发布时间、AI 打分、受欢迎程度与作品类型筛选排序,每条作品下都带分析与跳转链接。

@@ -971,7 +1017,7 @@ function renderPage() {
+