feat: recover live collector source and local model integrations

2026-03-23 04:38:52 +08:00
parent 78085ca074
commit 3d0a898faa
9 changed files with 5660 additions and 0 deletions
--- a/collector-service/app/init.py
+++ b/collector-service/app/init.py
@@ -0,0 +1 @@
+"""Collector service source overlay for legacy pyc-backed app."""
--- a/collector-service/app/core_main.py
+++ b/collector-service/app/core_main.py
--- a/collector-service/app/database.py
+++ b/collector-service/app/database.py
@@ -0,0 +1,340 @@
+from __future__ import annotations
+
+import sqlite3
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any, Iterator
+
+
+def utc_now() -> str:
+    from datetime import datetime, timezone
+
+    return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
+
+
+def dict_factory(cursor: sqlite3.Cursor, row: sqlite3.Row) -> dict[str, Any]:
+    return {col[0]: row[idx] for idx, col in enumerate(cursor.description)}
+
+
+class Database:
+    def __init__(self, path: str) -> None:
+        self.path = Path(path)
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+
+    def connect(self) -> sqlite3.Connection:
+        conn = sqlite3.connect(self.path)
+        conn.row_factory = dict_factory
+        conn.execute("PRAGMA foreign_keys = ON")
+        return conn
+
+    @contextmanager
+    def session(self) -> Iterator[sqlite3.Connection]:
+        conn = self.connect()
+        try:
+            yield conn
+            conn.commit()
+        finally:
+            conn.close()
+
+    def fetch_one(self, sql: str, params: tuple[Any, ...] = ()) -> dict[str, Any] | None:
+        with self.session() as conn:
+            return conn.execute(sql, params).fetchone()
+
+    def fetch_all(self, sql: str, params: tuple[Any, ...] = ()) -> list[dict[str, Any]]:
+        with self.session() as conn:
+            return list(conn.execute(sql, params).fetchall())
+
+    def execute(self, sql: str, params: tuple[Any, ...] = ()) -> None:
+        with self.session() as conn:
+            conn.execute(sql, params)
+
+    def table_exists(self, name: str) -> bool:
+        row = self.fetch_one(
+            "SELECT name FROM sqlite_master WHERE type = 'table' AND name = ?",
+            (name,),
+        )
+        return bool(row)
+
+    def column_exists(self, table: str, column: str) -> bool:
+        with self.session() as conn:
+            rows = conn.execute(f"PRAGMA table_info({table})").fetchall()
+        return any(row["name"] == column for row in rows)
+
+    def init_schema(self) -> None:
+        schema = """
+        CREATE TABLE IF NOT EXISTS accounts (
+            id TEXT PRIMARY KEY,
+            username TEXT NOT NULL UNIQUE,
+            password_hash TEXT NOT NULL,
+            password_salt TEXT NOT NULL,
+            display_name TEXT NOT NULL,
+            role TEXT NOT NULL,
+            approval_status TEXT NOT NULL,
+            approved_by TEXT,
+            approved_at TEXT,
+            preferred_analysis_model_id TEXT,
+            created_at TEXT NOT NULL,
+            updated_at TEXT NOT NULL
+        );
+
+        CREATE TABLE IF NOT EXISTS auth_tokens (
+            token TEXT PRIMARY KEY,
+            account_id TEXT NOT NULL,
+            created_at TEXT NOT NULL,
+            FOREIGN KEY(account_id) REFERENCES accounts(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS model_profiles (
+            id TEXT PRIMARY KEY,
+            owner_account_id TEXT,
+            name TEXT NOT NULL,
+            provider TEXT NOT NULL,
+            base_url TEXT NOT NULL,
+            api_key TEXT NOT NULL DEFAULT '',
+            model_name TEXT NOT NULL,
+            is_system INTEGER NOT NULL DEFAULT 0,
+            is_default INTEGER NOT NULL DEFAULT 0,
+            created_at TEXT NOT NULL,
+            updated_at TEXT NOT NULL,
+            FOREIGN KEY(owner_account_id) REFERENCES accounts(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS knowledge_bases (
+            id TEXT PRIMARY KEY,
+            user_id TEXT NOT NULL,
+            project_id TEXT,
+            name TEXT NOT NULL,
+            description TEXT NOT NULL DEFAULT '',
+            sync_status TEXT NOT NULL DEFAULT 'ready',
+            created_at TEXT NOT NULL,
+            updated_at TEXT NOT NULL,
+            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS knowledge_documents (
+            id TEXT PRIMARY KEY,
+            knowledge_base_id TEXT NOT NULL,
+            title TEXT NOT NULL,
+            source_type TEXT NOT NULL,
+            source_url TEXT NOT NULL DEFAULT '',
+            transcript_text TEXT NOT NULL DEFAULT '',
+            style_summary TEXT NOT NULL DEFAULT '',
+            combined_text TEXT NOT NULL DEFAULT '',
+            analysis_json TEXT NOT NULL DEFAULT '{}',
+            storyboard_json TEXT NOT NULL DEFAULT '[]',
+            source_artifact_json TEXT NOT NULL DEFAULT '{}',
+            analysis_model_profile_id TEXT NOT NULL DEFAULT '',
+            created_at TEXT NOT NULL,
+            updated_at TEXT NOT NULL,
+            FOREIGN KEY(knowledge_base_id) REFERENCES knowledge_bases(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS assistants (
+            id TEXT PRIMARY KEY,
+            user_id TEXT NOT NULL,
+            project_id TEXT,
+            name TEXT NOT NULL,
+            description TEXT NOT NULL DEFAULT '',
+            system_prompt TEXT NOT NULL DEFAULT '',
+            generation_goal TEXT NOT NULL DEFAULT '',
+            config_json TEXT NOT NULL DEFAULT '{}',
+            model_profile_id TEXT NOT NULL DEFAULT '',
+            created_at TEXT NOT NULL,
+            updated_at TEXT NOT NULL,
+            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS assistant_knowledge_bases (
+            assistant_id TEXT NOT NULL,
+            knowledge_base_id TEXT NOT NULL,
+            PRIMARY KEY (assistant_id, knowledge_base_id),
+            FOREIGN KEY(assistant_id) REFERENCES assistants(id) ON DELETE CASCADE,
+            FOREIGN KEY(knowledge_base_id) REFERENCES knowledge_bases(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS jobs (
+            id TEXT PRIMARY KEY,
+            user_id TEXT NOT NULL,
+            project_id TEXT,
+            parent_job_id TEXT,
+            assistant_id TEXT,
+            knowledge_base_id TEXT NOT NULL,
+            content_source_id TEXT,
+            source_type TEXT NOT NULL,
+            line_type TEXT NOT NULL DEFAULT 'analysis',
+            workflow_key TEXT NOT NULL DEFAULT '',
+            orchestrator TEXT NOT NULL DEFAULT 'n8n',
+            provider_name TEXT NOT NULL DEFAULT '',
+            provider_task_id TEXT NOT NULL DEFAULT '',
+            source_url TEXT,
+            title TEXT NOT NULL,
+            language TEXT NOT NULL DEFAULT 'auto',
+            status TEXT NOT NULL,
+            transcript_text TEXT NOT NULL DEFAULT '',
+            style_summary TEXT NOT NULL DEFAULT '',
+            upload_status TEXT NOT NULL DEFAULT 'pending',
+            error TEXT NOT NULL DEFAULT '',
+            artifacts_json TEXT NOT NULL DEFAULT '{}',
+            result_json TEXT NOT NULL DEFAULT '{}',
+            analysis_model_profile_id TEXT NOT NULL DEFAULT '',
+            created_at TEXT NOT NULL,
+            updated_at TEXT NOT NULL,
+            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
+            FOREIGN KEY(assistant_id) REFERENCES assistants(id) ON DELETE SET NULL,
+            FOREIGN KEY(knowledge_base_id) REFERENCES knowledge_bases(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS projects (
+            id TEXT PRIMARY KEY,
+            user_id TEXT NOT NULL,
+            name TEXT NOT NULL,
+            description TEXT NOT NULL DEFAULT '',
+            created_at TEXT NOT NULL,
+            updated_at TEXT NOT NULL,
+            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS content_sources (
+            id TEXT PRIMARY KEY,
+            user_id TEXT NOT NULL,
+            project_id TEXT,
+            source_kind TEXT NOT NULL,
+            platform TEXT NOT NULL DEFAULT '',
+            handle TEXT NOT NULL DEFAULT '',
+            source_url TEXT NOT NULL DEFAULT '',
+            title TEXT NOT NULL DEFAULT '',
+            local_path TEXT NOT NULL DEFAULT '',
+            metadata_json TEXT NOT NULL DEFAULT '{}',
+            created_at TEXT NOT NULL,
+            updated_at TEXT NOT NULL,
+            FOREIGN KEY(user_id) REFERENCES accounts(id) ON DELETE CASCADE,
+            FOREIGN KEY(project_id) REFERENCES projects(id) ON DELETE SET NULL
+        );
+
+        CREATE TABLE IF NOT EXISTS job_events (
+            id TEXT PRIMARY KEY,
+            job_id TEXT NOT NULL,
+            event_type TEXT NOT NULL,
+            payload_json TEXT NOT NULL DEFAULT '{}',
+            created_at TEXT NOT NULL,
+            FOREIGN KEY(job_id) REFERENCES jobs(id) ON DELETE CASCADE
+        );
+
+        CREATE TABLE IF NOT EXISTS app_updates (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            platform TEXT NOT NULL,
+            channel TEXT NOT NULL,
+            version_code INTEGER NOT NULL,
+            version_name TEXT NOT NULL,
+            min_supported_code INTEGER NOT NULL,
+            apk_url TEXT NOT NULL,
+            apk_sha256 TEXT NOT NULL DEFAULT '',
+            notes TEXT NOT NULL DEFAULT '',
+            force_update INTEGER NOT NULL DEFAULT 0,
+            is_active INTEGER NOT NULL DEFAULT 1,
+            published_at INTEGER NOT NULL,
+            created_by TEXT NOT NULL
+        );
+        """
+        with self.session() as conn:
+            conn.executescript(schema)
+        self.migrate_schema()
+
+    def migrate_schema(self) -> None:
+        table_columns: dict[str, dict[str, str]] = {
+            "knowledge_bases": {
+                "project_id": "TEXT",
+            },
+            "knowledge_documents": {
+                "analysis_json": "TEXT NOT NULL DEFAULT '{}'",
+                "storyboard_json": "TEXT NOT NULL DEFAULT '[]'",
+                "source_artifact_json": "TEXT NOT NULL DEFAULT '{}'",
+            },
+            "assistants": {
+                "project_id": "TEXT",
+                "config_json": "TEXT NOT NULL DEFAULT '{}'",
+            },
+            "jobs": {
+                "project_id": "TEXT",
+                "parent_job_id": "TEXT",
+                "content_source_id": "TEXT",
+                "line_type": "TEXT NOT NULL DEFAULT 'analysis'",
+                "workflow_key": "TEXT NOT NULL DEFAULT ''",
+                "orchestrator": "TEXT NOT NULL DEFAULT 'n8n'",
+                "provider_name": "TEXT NOT NULL DEFAULT ''",
+                "provider_task_id": "TEXT NOT NULL DEFAULT ''",
+                "result_json": "TEXT NOT NULL DEFAULT '{}'",
+            },
+        }
+
+        for table, columns in table_columns.items():
+            if not self.table_exists(table):
+                continue
+            for column, definition in columns.items():
+                if self.column_exists(table, column):
+                    continue
+                self.execute(f"ALTER TABLE {table} ADD COLUMN {column} {definition}")
+
+        self.ensure_default_projects()
+
+    def ensure_default_projects(self) -> None:
+        if not self.table_exists("projects"):
+            return
+
+        accounts = self.fetch_all("SELECT id, username FROM accounts ORDER BY created_at ASC")
+        for account in accounts:
+            project = self.fetch_one(
+                "SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC LIMIT 1",
+                (account["id"],),
+            )
+            if not project:
+                project_id = f"proj_{account['id']}"
+                now = utc_now()
+                self.execute(
+                    """
+                    INSERT INTO projects (id, user_id, name, description, created_at, updated_at)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        project_id,
+                        account["id"],
+                        f"{account['username']} 默认项目",
+                        "系统自动创建的默认项目",
+                        now,
+                        now,
+                    ),
+                )
+                project = self.fetch_one("SELECT * FROM projects WHERE id = ?", (project_id,))
+
+            if not project:
+                continue
+
+            if self.column_exists("knowledge_bases", "project_id"):
+                self.execute(
+                    """
+                    UPDATE knowledge_bases
+                    SET project_id = ?
+                    WHERE user_id = ? AND (project_id IS NULL OR project_id = '')
+                    """,
+                    (project["id"], account["id"]),
+                )
+
+            if self.column_exists("assistants", "project_id"):
+                self.execute(
+                    """
+                    UPDATE assistants
+                    SET project_id = ?
+                    WHERE user_id = ? AND (project_id IS NULL OR project_id = '')
+                    """,
+                    (project["id"], account["id"]),
+                )
+
+            if self.column_exists("jobs", "project_id"):
+                self.execute(
+                    """
+                    UPDATE jobs
+                    SET project_id = ?
+                    WHERE user_id = ? AND (project_id IS NULL OR project_id = '')
+                    """,
+                    (project["id"], account["id"]),
+                )
--- a/collector-service/app/douyin_features.py
+++ b/collector-service/app/douyin_features.py
--- a/collector-service/app/integrations.py
+++ b/collector-service/app/integrations.py
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+import mimetypes
+from pathlib import Path
+from typing import Any
+
+import httpx
+
+
+def _join_url(base_url: str, path: str) -> str:
+    base = base_url.rstrip("/")
+    if path.startswith("http://") or path.startswith("https://"):
+        return path
+    return f"{base}/{path.lstrip('/')}"
+
+
+def _unwrap_response(payload: Any) -> dict[str, Any]:
+    if not isinstance(payload, dict):
+        return {"value": payload}
+    if payload.get("success") is True and "data" in payload:
+        data = payload.get("data")
+        if isinstance(data, dict):
+            return data
+        return {"value": data}
+    return payload
+
+
+class N8NClient:
+    def __init__(
+        self,
+        *,
+        base_url: str,
+        workflow_paths: dict[str, str],
+        shared_secret: str = "",
+        timeout: float = 60.0,
+    ) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.workflow_paths = workflow_paths
+        self.shared_secret = shared_secret.strip()
+        self.timeout = timeout
+
+    @property
+    def enabled(self) -> bool:
+        return bool(self.base_url)
+
+    async def trigger(self, workflow_key: str, payload: dict[str, Any]) -> dict[str, Any]:
+        workflow_path = self.workflow_paths.get(workflow_key, "").strip()
+        if not workflow_path:
+            raise ValueError(f"workflow path not configured for {workflow_key}")
+        try:
+            workflow_path = workflow_path.format(**payload)
+        except KeyError:
+            pass
+        headers: dict[str, str] = {}
+        if self.shared_secret:
+            headers["X-Orchestrator-Secret"] = self.shared_secret
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.post(
+                _join_url(self.base_url, workflow_path),
+                json=payload,
+                headers=headers,
+            )
+            response.raise_for_status()
+            if not response.content:
+                return {"triggered": True}
+            return _unwrap_response(response.json())
+
+
+class CutVideoClient:
+    def __init__(
+        self,
+        *,
+        base_url: str,
+        api_key: str = "",
+        timeout: float = 120.0,
+        upload_timeout: float = 1800.0,
+    ) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.api_key = api_key.strip()
+        self.timeout = timeout
+        self.upload_timeout = upload_timeout
+
+    @property
+    def enabled(self) -> bool:
+        return bool(self.base_url)
+
+    def _headers(self) -> dict[str, str]:
+        headers: dict[str, str] = {}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        return headers
+
+    async def submit_job(self, payload: dict[str, Any]) -> dict[str, Any]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.post(
+                _join_url(self.base_url, "/api/jobs"),
+                json=payload,
+                headers=self._headers(),
+            )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+    async def upload_source_file(self, source_path: Path, *, folder_name: str = "") -> dict[str, Any]:
+        content_type = mimetypes.guess_type(source_path.name)[0] or "application/octet-stream"
+        headers = self._headers()
+        data = {"folder_name": folder_name} if folder_name else {}
+        async with httpx.AsyncClient(timeout=self.upload_timeout) as client:
+            with source_path.open("rb") as handle:
+                response = await client.post(
+                    _join_url(self.base_url, "/api/uploads"),
+                    data=data,
+                    files={"files": (source_path.name, handle, content_type)},
+                    headers=headers,
+                )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+    async def get_task(self, task_id: str) -> dict[str, Any]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(
+                _join_url(self.base_url, f"/api/tasks/{task_id}"),
+                headers=self._headers(),
+            )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+    async def get_run(self, run_id: str) -> dict[str, Any]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(
+                _join_url(self.base_url, f"/api/runs/{run_id}"),
+                headers=self._headers(),
+            )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+
+class AsrHttpClient:
+    def __init__(
+        self,
+        *,
+        base_url: str,
+        transcribe_path: str = "/transcribe",
+        field_name: str = "wav",
+        timeout: float = 120.0,
+    ) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.transcribe_path = transcribe_path
+        self.field_name = field_name.strip() or "wav"
+        self.timeout = timeout
+
+    @property
+    def enabled(self) -> bool:
+        return bool(self.base_url)
+
+    async def transcribe_audio(self, audio_path: Path) -> dict[str, Any]:
+        content_type = mimetypes.guess_type(audio_path.name)[0] or "application/octet-stream"
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            with audio_path.open("rb") as handle:
+                response = await client.post(
+                    _join_url(self.base_url, self.transcribe_path),
+                    files={self.field_name: (audio_path.name, handle, content_type)},
+                )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+
+class HuobaoDramaClient:
+    def __init__(self, *, base_url: str, timeout: float = 180.0) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.timeout = timeout
+
+    @property
+    def enabled(self) -> bool:
+        return bool(self.base_url)
+
+    async def create_drama(self, payload: dict[str, Any]) -> dict[str, Any]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.post(
+                _join_url(self.base_url, "/api/v1/dramas"),
+                json=payload,
+            )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+    async def generate_image(self, payload: dict[str, Any]) -> dict[str, Any]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.post(
+                _join_url(self.base_url, "/api/v1/images"),
+                json=payload,
+            )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+    async def get_image(self, image_id: str) -> dict[str, Any]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(
+                _join_url(self.base_url, f"/api/v1/images/{image_id}"),
+            )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+    async def generate_video(self, payload: dict[str, Any]) -> dict[str, Any]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.post(
+                _join_url(self.base_url, "/api/v1/videos"),
+                json=payload,
+            )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
+
+    async def get_video(self, video_id: str) -> dict[str, Any]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(
+                _join_url(self.base_url, f"/api/v1/videos/{video_id}"),
+            )
+            response.raise_for_status()
+            return _unwrap_response(response.json())
--- a/collector-service/app/legacy_runtime.py
+++ b/collector-service/app/legacy_runtime.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import importlib.machinery
+import importlib.util
+import sys
+import types
+from pathlib import Path
+from typing import Any
+
+BASE_DIR = Path(__file__).resolve().parent
+PYCACHE_DIR = BASE_DIR / "__pycache__"
+LEGACY_PYC_DIR = BASE_DIR / "_legacy_pyc"
+SUPPORTED_PYTHON = (3, 11)
+
+_LEGACY_MODULE: Any | None = None
+
+
+def _ensure_supported_runtime() -> None:
+    if sys.version_info[:2] != SUPPORTED_PYTHON:
+        version = ".".join(map(str, sys.version_info[:3]))
+        required = ".".join(map(str, SUPPORTED_PYTHON))
+        raise RuntimeError(
+            f"Legacy collector bytecode requires Python {required}. Current runtime: {version}."
+        )
+
+
+def _ensure_package() -> None:
+    package = sys.modules.get("app")
+    if package is None:
+        package = types.ModuleType("app")
+        package.__path__ = [str(BASE_DIR)]
+        sys.modules["app"] = package
+
+
+def _load_sourceless_module(module_name: str, pyc_path: Path) -> Any:
+    loader = importlib.machinery.SourcelessFileLoader(module_name, str(pyc_path))
+    spec = importlib.util.spec_from_loader(module_name, loader)
+    if spec is None:
+        raise RuntimeError(f"Unable to create spec for {module_name}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+    loader.exec_module(module)
+    return module
+
+
+def load_legacy_main() -> Any:
+    global _LEGACY_MODULE
+    if _LEGACY_MODULE is not None:
+        return _LEGACY_MODULE
+
+    _ensure_supported_runtime()
+    _ensure_package()
+
+    for name in ("database", "fastgpt", "openai_compat"):
+        full_name = f"app.{name}"
+        if full_name not in sys.modules:
+            pyc_dir = LEGACY_PYC_DIR if (LEGACY_PYC_DIR / f"{name}.cpython-311.pyc").exists() else PYCACHE_DIR
+            _load_sourceless_module(full_name, pyc_dir / f"{name}.cpython-311.pyc")
+
+    legacy_name = "app.main_legacy"
+    if legacy_name in sys.modules:
+        _LEGACY_MODULE = sys.modules[legacy_name]
+        return _LEGACY_MODULE
+
+    main_pyc_dir = LEGACY_PYC_DIR if (LEGACY_PYC_DIR / "main.cpython-311.pyc").exists() else PYCACHE_DIR
+    _LEGACY_MODULE = _load_sourceless_module(legacy_name, main_pyc_dir / "main.cpython-311.pyc")
+    _LEGACY_MODULE.__package__ = "app"
+    return _LEGACY_MODULE
--- a/collector-service/app/main.py
+++ b/collector-service/app/main.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from .douyin_features import register_douyin_routes
+
+try:
+    from . import core_main as core
+except Exception:
+    # Keep a bytecode-backed fallback so the app can still boot if the
+    # recovered source baseline is incomplete in this workspace.
+    from .legacy_runtime import load_legacy_main
+
+    core = load_legacy_main()
+
+app = core.app
+
+register_douyin_routes(app, core)
--- a/collector-service/app/openai_compat.py
+++ b/collector-service/app/openai_compat.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from typing import Any
+
+import httpx
+
+
+class OpenAICompatClient:
+    def __init__(self, timeout: float = 180.0) -> None:
+        self.timeout = timeout
+
+    async def chat_completion(
+        self,
+        *,
+        base_url: str,
+        api_key: str,
+        model: str,
+        system_prompt: str,
+        user_prompt: str,
+        temperature: float = 0.7,
+    ) -> str:
+        url = base_url.rstrip("/") + "/chat/completions"
+        headers = {"Content-Type": "application/json"}
+        if api_key.strip():
+            headers["Authorization"] = f"Bearer {api_key.strip()}"
+        payload: dict[str, Any] = {
+            "model": model,
+            "temperature": temperature,
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+        }
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.post(url, headers=headers, json=payload)
+            response.raise_for_status()
+            data = response.json()
+        choices = data.get("choices") or []
+        if not choices:
+            return ""
+        message = choices[0].get("message") or {}
+        content = message.get("content") or ""
+        if isinstance(content, list):
+            return "\n".join(str(item.get("text", "")) for item in content if isinstance(item, dict)).strip()
+        return str(content).strip()
--- a/collector-service/run_source_overlay.sh
+++ b/collector-service/run_source_overlay.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PORT="${PORT:-18083}"
+HOST="${HOST:-127.0.0.1}"
+
+# Mirror the current live collector runtime so we can verify the source overlay
+# against the same database and external integrations without touching 8081.
+export DATA_DIR="${DATA_DIR:-/Users/kris/code/StoryForge-gitea/data/collector}"
+export DATABASE_PATH="${DATABASE_PATH:-$DATA_DIR/storyforge.db}"
+export DEFAULT_EXTERNAL_BASE_URL="${DEFAULT_EXTERNAL_BASE_URL:-https://test.hyzq.net/storyforge}"
+export LOCAL_OPENAI_BASE_URL="${LOCAL_OPENAI_BASE_URL:-http://host.docker.internal:8317/v1}"
+export LOCAL_OPENAI_MODEL="${LOCAL_OPENAI_MODEL:-GLM-5}"
+export LOCAL_OPENAI_API_KEY="${LOCAL_OPENAI_API_KEY:-}"
+export YTDLP_BIN="${YTDLP_BIN:-yt-dlp}"
+export FFMPEG_BIN="${FFMPEG_BIN:-ffmpeg}"
+export WHISPER_BIN="${WHISPER_BIN:-}"
+export WHISPER_MODEL="${WHISPER_MODEL:-$DATA_DIR/models/ggml-base.en.bin}"
+export ASR_HTTP_BASE_URL="${ASR_HTTP_BASE_URL:-http://host.docker.internal:8088}"
+export ASR_HTTP_TRANSCRIBE_PATH="${ASR_HTTP_TRANSCRIBE_PATH:-/transcribe}"
+export ASR_HTTP_FIELD_NAME="${ASR_HTTP_FIELD_NAME:-wav}"
+export ASR_HTTP_TIMEOUT_SEC="${ASR_HTTP_TIMEOUT_SEC:-120}"
+export N8N_BASE_URL="${N8N_BASE_URL:-http://n8n:5678}"
+export N8N_ANALYSIS_WEBHOOK_PATH="${N8N_ANALYSIS_WEBHOOK_PATH:-/webhook/storyforge-analysis}"
+export N8N_REAL_CUT_WEBHOOK_PATH="${N8N_REAL_CUT_WEBHOOK_PATH:-/webhook/storyforge-real-cut}"
+export N8N_AI_VIDEO_WEBHOOK_PATH="${N8N_AI_VIDEO_WEBHOOK_PATH:-/webhook/storyforge-ai-video}"
+export N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH="${N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH:-/webhook/storyforge-content-source-sync}"
+export ORCHESTRATOR_SHARED_SECRET="${ORCHESTRATOR_SHARED_SECRET:-storyforge-local-secret}"
+export CUTVIDEO_BASE_URL="${CUTVIDEO_BASE_URL:-http://192.168.31.18:7860}"
+export CUTVIDEO_API_KEY="${CUTVIDEO_API_KEY:-}"
+export CUTVIDEO_BASE_CONFIG="${CUTVIDEO_BASE_CONFIG:-example.job.yaml}"
+export CUTVIDEO_POLL_INTERVAL_SEC="${CUTVIDEO_POLL_INTERVAL_SEC:-10}"
+export CUTVIDEO_MAX_WAIT_SEC="${CUTVIDEO_MAX_WAIT_SEC:-1800}"
+export CUTVIDEO_UPLOAD_TIMEOUT_SEC="${CUTVIDEO_UPLOAD_TIMEOUT_SEC:-1800}"
+export HUOBAO_BASE_URL="${HUOBAO_BASE_URL:-http://host.docker.internal:5678}"
+export HUOBAO_POLL_INTERVAL_SEC="${HUOBAO_POLL_INTERVAL_SEC:-10}"
+export HUOBAO_MAX_WAIT_SEC="${HUOBAO_MAX_WAIT_SEC:-900}"
+
+cd "$ROOT_DIR"
+exec ./.venv311/bin/python -m uvicorn app.main:app --host "$HOST" --port "$PORT"
				`@@ -0,0 +1 @@`
				`"""Collector service source overlay for legacy pyc-backed app."""`