Files
storyforge/collector-service/app/main.py

3066 lines
122 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import asyncio
import httpx
import json
import os
import re
import secrets
import shutil
import socket
import subprocess
import sys
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from urllib.parse import urljoin, urlparse
from fastapi import Body, Depends, FastAPI, File, Form, Header, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, Field
from .database import Database, utc_now
from .douyin_features import register_douyin_routes
from .integrations import AsrHttpClient, CutVideoClient, HuobaoDramaClient, N8NClient
from .openai_compat import OpenAICompatClient
BASE_DIR = Path(__file__).resolve().parents[2]
DATA_DIR = Path(os.getenv("DATA_DIR", BASE_DIR / "data" / "collector"))
DOWNLOADS_DIR = DATA_DIR / "downloads"
JOBS_DIR = DATA_DIR / "jobs"
MODELS_DIR = DATA_DIR / "models"
DB_PATH = os.getenv("DATABASE_PATH", str(DATA_DIR / "storyforge.db"))
DEFAULT_EXTERNAL_BASE_URL = os.getenv("DEFAULT_EXTERNAL_BASE_URL", "https://test.hyzq.net/storyforge")
LOCAL_OPENAI_BASE_URL = os.getenv("LOCAL_OPENAI_BASE_URL", "http://127.0.0.1:8317/v1")
LOCAL_OPENAI_MODEL = os.getenv("LOCAL_OPENAI_MODEL", "GLM-5")
LOCAL_OPENAI_API_KEY = os.getenv("LOCAL_OPENAI_API_KEY", "")
YTDLP_BIN = os.getenv("YTDLP_BIN", "yt-dlp")
FFMPEG_BIN = os.getenv("FFMPEG_BIN", "ffmpeg")
WHISPER_BIN = os.getenv("WHISPER_BIN", "")
WHISPER_MODEL = os.getenv("WHISPER_MODEL", str(MODELS_DIR / "ggml-base.en.bin"))
ASR_HTTP_BASE_URL = os.getenv("ASR_HTTP_BASE_URL", "")
ASR_HTTP_TRANSCRIBE_PATH = os.getenv("ASR_HTTP_TRANSCRIBE_PATH", "/transcribe")
ASR_HTTP_FIELD_NAME = os.getenv("ASR_HTTP_FIELD_NAME", "wav")
ASR_HTTP_TIMEOUT_SEC = float(os.getenv("ASR_HTTP_TIMEOUT_SEC", "120"))
N8N_BASE_URL = os.getenv("N8N_BASE_URL", "http://127.0.0.1:5670")
N8N_ANALYSIS_WEBHOOK_PATH = os.getenv("N8N_ANALYSIS_WEBHOOK_PATH", "/webhook/storyforge-analysis")
N8N_REAL_CUT_WEBHOOK_PATH = os.getenv("N8N_REAL_CUT_WEBHOOK_PATH", "/webhook/storyforge-real-cut")
N8N_AI_VIDEO_WEBHOOK_PATH = os.getenv("N8N_AI_VIDEO_WEBHOOK_PATH", "/webhook/storyforge-ai-video")
N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH = os.getenv("N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH", "/webhook/storyforge-content-source-sync")
ORCHESTRATOR_SHARED_SECRET = os.getenv("ORCHESTRATOR_SHARED_SECRET", "")
CUTVIDEO_BASE_URL = os.getenv("CUTVIDEO_BASE_URL", "")
CUTVIDEO_API_KEY = os.getenv("CUTVIDEO_API_KEY", "")
HUOBAO_BASE_URL = os.getenv("HUOBAO_BASE_URL", "http://127.0.0.1:5678")
CUTVIDEO_BASE_CONFIG = os.getenv("CUTVIDEO_BASE_CONFIG", "example.job.yaml")
CUTVIDEO_POLL_INTERVAL_SEC = int(os.getenv("CUTVIDEO_POLL_INTERVAL_SEC", "10"))
CUTVIDEO_MAX_WAIT_SEC = int(os.getenv("CUTVIDEO_MAX_WAIT_SEC", "1800"))
CUTVIDEO_UPLOAD_TIMEOUT_SEC = int(os.getenv("CUTVIDEO_UPLOAD_TIMEOUT_SEC", "1800"))
HUOBAO_POLL_INTERVAL_SEC = int(os.getenv("HUOBAO_POLL_INTERVAL_SEC", "10"))
HUOBAO_MAX_WAIT_SEC = int(os.getenv("HUOBAO_MAX_WAIT_SEC", "900"))
for path in (DATA_DIR, DOWNLOADS_DIR, JOBS_DIR, MODELS_DIR):
path.mkdir(parents=True, exist_ok=True)
db = Database(DB_PATH)
openai_client = OpenAICompatClient()
asr_http_client = AsrHttpClient(
base_url=ASR_HTTP_BASE_URL,
transcribe_path=ASR_HTTP_TRANSCRIBE_PATH,
field_name=ASR_HTTP_FIELD_NAME,
timeout=ASR_HTTP_TIMEOUT_SEC,
)
n8n_client = N8NClient(
base_url=N8N_BASE_URL,
workflow_paths={
"analysis_pipeline": N8N_ANALYSIS_WEBHOOK_PATH,
"real_cut_pipeline": N8N_REAL_CUT_WEBHOOK_PATH,
"ai_video_pipeline": N8N_AI_VIDEO_WEBHOOK_PATH,
"content_source_sync_pipeline": N8N_CONTENT_SOURCE_SYNC_WEBHOOK_PATH,
},
shared_secret=ORCHESTRATOR_SHARED_SECRET,
)
cutvideo_client = CutVideoClient(
base_url=CUTVIDEO_BASE_URL,
api_key=CUTVIDEO_API_KEY,
upload_timeout=CUTVIDEO_UPLOAD_TIMEOUT_SEC,
)
huobao_client = HuobaoDramaClient(base_url=HUOBAO_BASE_URL)
app = FastAPI(title="StoryForge Collector Service", version="0.2.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.mount("/downloads", StaticFiles(directory=str(DOWNLOADS_DIR)), name="downloads")
class RegisterAccountRequest(BaseModel):
username: str
password: str
display_name: str = ""
class LoginRequest(BaseModel):
username: str
password: str
class ModelProfileRequest(BaseModel):
name: str
base_url: str
api_key: str = ""
model_name: str
is_default: bool = False
class PreferredModelRequest(BaseModel):
model_profile_id: str
class KnowledgeBaseCreateRequest(BaseModel):
name: str
description: str = ""
project_id: str = ""
class ExploreVideoLinkRequest(BaseModel):
video_url: str
title: str | None = None
project_id: str | None = None
knowledge_base_id: str | None = None
assistant_id: str | None = None
analysis_model_profile_id: str | None = None
language: str = "auto"
class ExploreTextRequest(BaseModel):
title: str
content: str
project_id: str | None = None
knowledge_base_id: str | None = None
assistant_id: str | None = None
analysis_model_profile_id: str | None = None
class AssistantCreateRequest(BaseModel):
name: str
description: str = ""
system_prompt: str = ""
generation_goal: str = ""
knowledge_base_ids: list[str] = Field(default_factory=list)
project_id: str = ""
model_profile_id: str = ""
class AssistantUpdateRequest(BaseModel):
name: str | None = None
description: str | None = None
system_prompt: str | None = None
generation_goal: str | None = None
knowledge_base_ids: list[str] | None = None
project_id: str | None = None
model_profile_id: str | None = None
class GenerateCopyRequest(BaseModel):
brief: str
platform: str = "抖音"
audience: str = "创业者"
extra_requirements: str = ""
knowledge_base_ids: list[str] = Field(default_factory=list)
class PublishAppUpdateRequest(BaseModel):
platform: str = "android"
channel: str = "stable"
versionCode: int
versionName: str
minSupportedCode: int
apkUrl: str
apkSha256: str = ""
notes: str = ""
forceUpdate: bool = False
isActive: bool = True
class ProjectCreateRequest(BaseModel):
name: str
description: str = ""
class ContentSourceCreateRequest(BaseModel):
project_id: str = ""
source_kind: str
platform: str = ""
handle: str = ""
source_url: str = ""
title: str = ""
local_path: str = ""
metadata: dict[str, Any] = Field(default_factory=dict)
class ContentSourceSyncRequest(BaseModel):
project_id: str = ""
knowledge_base_id: str = ""
assistant_id: str = ""
content_source_id: str = ""
platform: str = ""
handle: str = ""
source_url: str = ""
title: str = ""
analysis_model_profile_id: str = ""
language: str = "auto"
max_items: int = Field(default=5, ge=1, le=20)
skip_existing: bool = True
auto_trigger_analysis: bool = True
class RealCutJobRequest(BaseModel):
project_id: str = ""
title: str
input_dir: str = ""
source_job_id: str = ""
base_config: str = ""
objective: str = "保留高信息密度片段,输出适合短视频平台的粗剪结果"
target_duration_sec: int = 60
target_aspect_ratio: str = "9:16"
ideal_segment_duration_sec: int = 8
max_segment_duration_sec: int = 18
transcript_backend: str = "auto"
transcript_device: str = "cuda"
review_enabled: bool = False
dry_run: bool = False
class AiVideoJobRequest(BaseModel):
project_id: str = ""
assistant_id: str = ""
knowledge_base_id: str = ""
source_job_id: str = ""
title: str
brief: str
style: str = "realistic"
shots: int = 4
image_provider: str = "openai"
image_model: str = ""
video_provider: str = "doubao"
video_model: str = ""
aspect_ratio: str = "9:16"
duration: int = 5
class ReviewCreateRequest(BaseModel):
project_id: str = ""
source_job_id: str = ""
assistant_id: str = ""
title: str = ""
platform: str = "douyin"
content_type: str = "video"
publish_url: str = ""
published_at: str = ""
metrics: dict[str, Any] = Field(default_factory=dict)
verdict: str = ""
highlights: str = ""
next_actions: str = ""
notes: str = ""
class ReviewUpdateRequest(BaseModel):
title: str | None = None
platform: str | None = None
content_type: str | None = None
publish_url: str | None = None
published_at: str | None = None
metrics: dict[str, Any] | None = None
verdict: str | None = None
highlights: str | None = None
next_actions: str | None = None
notes: str | None = None
assistant_id: str | None = None
class InternalStepRequest(BaseModel):
job_id: str = ""
jobId: str = ""
payload: dict[str, Any] = Field(default_factory=dict)
class JobStatusUpdateRequest(BaseModel):
status: str
error: str = ""
provider_name: str = ""
provider_task_id: str = ""
artifacts: dict[str, Any] = Field(default_factory=dict)
result: dict[str, Any] = Field(default_factory=dict)
def now_ts() -> int:
return int(datetime.now(timezone.utc).timestamp())
def make_id(prefix: str) -> str:
return f"{prefix}_{uuid.uuid4().hex}"
def hash_password(password: str, salt: str) -> str:
import hashlib
return hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt.encode("utf-8"), 120_000).hex()
def create_password_hash(password: str) -> tuple[str, str]:
salt = secrets.token_hex(16)
return hash_password(password, salt), salt
def verify_password(password: str, hashed: str, salt: str) -> bool:
return secrets.compare_digest(hash_password(password, salt), hashed)
def mask_api_key(value: str) -> str:
if not value:
return ""
if len(value) <= 8:
return "*" * len(value)
return f"{value[:4]}***{value[-4:]}"
def normalize_model_profile(row: dict[str, Any]) -> dict[str, Any]:
return {
"id": row["id"],
"owner_account_id": row.get("owner_account_id"),
"name": row["name"],
"provider": row["provider"],
"base_url": row["base_url"],
"api_key_masked": mask_api_key(row.get("api_key", "")),
"model_name": row["model_name"],
"is_system": bool(row.get("is_system", 0)),
"is_default": bool(row.get("is_default", 0)),
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def normalize_account(row: dict[str, Any]) -> dict[str, Any]:
return {
"id": row["id"],
"username": row["username"],
"display_name": row["display_name"],
"role": row["role"],
"approval_status": row["approval_status"],
"approved_by": row.get("approved_by"),
"approved_at": row.get("approved_at"),
"preferred_analysis_model_id": row.get("preferred_analysis_model_id") or "",
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def model_profile_for_account(account_id: str, requested_id: str | None) -> dict[str, Any]:
if requested_id:
row = db.fetch_one(
"SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)",
(requested_id, account_id),
)
if row:
return row
account = db.fetch_one("SELECT preferred_analysis_model_id FROM accounts WHERE id = ?", (account_id,))
preferred_id = (account or {}).get("preferred_analysis_model_id") or ""
if preferred_id:
row = db.fetch_one(
"SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)",
(preferred_id, account_id),
)
if row:
return row
row = db.fetch_one("SELECT * FROM model_profiles WHERE is_default = 1 ORDER BY is_system DESC, created_at ASC LIMIT 1")
if not row:
raise HTTPException(status_code=500, detail="No model profile configured")
return row
def project_payload(row: dict[str, Any]) -> dict[str, Any]:
return {
"id": row["id"],
"user_id": row["user_id"],
"name": row["name"],
"description": row.get("description", ""),
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def content_source_payload(row: dict[str, Any]) -> dict[str, Any]:
metadata = row.get("metadata_json") or "{}"
try:
metadata_map = json.loads(metadata)
except json.JSONDecodeError:
metadata_map = {}
return {
"id": row["id"],
"user_id": row["user_id"],
"project_id": row.get("project_id", ""),
"source_kind": row["source_kind"],
"platform": row.get("platform", ""),
"handle": row.get("handle", ""),
"source_url": row.get("source_url", ""),
"title": row.get("title", ""),
"local_path": row.get("local_path", ""),
"metadata": metadata_map,
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def job_event_payload(row: dict[str, Any]) -> dict[str, Any]:
return {
"id": row["id"],
"job_id": row["job_id"],
"event_type": row["event_type"],
"payload": parse_json_object(row.get("payload_json") or "{}"),
"created_at": row["created_at"],
}
def ensure_default_project(account_id: str, username: str = "默认用户") -> dict[str, Any]:
project = db.fetch_one(
"SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC LIMIT 1",
(account_id,),
)
if project:
return project
now = utc_now()
project_id = make_id("project")
db.execute(
"""
INSERT INTO projects (id, user_id, name, description, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
project_id,
account_id,
f"{username} 默认项目",
"系统自动创建",
now,
now,
),
)
return db.fetch_one("SELECT * FROM projects WHERE id = ?", (project_id,))
def resolve_target_project(account_id: str, requested_project_id: str | None, username: str = "默认用户") -> dict[str, Any]:
if requested_project_id:
project = db.fetch_one(
"SELECT * FROM projects WHERE id = ? AND user_id = ?",
(requested_project_id, account_id),
)
if project:
return project
raise HTTPException(status_code=404, detail="Project not found")
return ensure_default_project(account_id, username=username)
def resolve_target_assistant(account_id: str, requested_assistant_id: str | None, project_id: str = "") -> dict[str, Any] | None:
if not requested_assistant_id:
return None
assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (requested_assistant_id, account_id))
if not assistant:
raise HTTPException(status_code=404, detail="Assistant not found")
if project_id and assistant.get("project_id") and assistant.get("project_id") != project_id:
raise HTTPException(status_code=400, detail="Assistant does not belong to target project")
return assistant
def append_job_event(job_id: str, event_type: str, payload: dict[str, Any] | None = None) -> None:
db.execute(
"""
INSERT INTO job_events (id, job_id, event_type, payload_json, created_at)
VALUES (?, ?, ?, ?, ?)
""",
(
make_id("evt"),
job_id,
event_type,
json.dumps(payload or {}, ensure_ascii=False),
utc_now(),
),
)
def parse_json_object(raw_text: str) -> dict[str, Any]:
cleaned = raw_text.strip()
if not cleaned:
return {}
try:
data = json.loads(cleaned)
return data if isinstance(data, dict) else {}
except json.JSONDecodeError:
match = re.search(r"\{.*\}", cleaned, re.S)
if not match:
return {}
try:
data = json.loads(match.group(0))
return data if isinstance(data, dict) else {}
except json.JSONDecodeError:
return {}
def knowledge_base_payload(row: dict[str, Any]) -> dict[str, Any]:
document_count = db.fetch_one(
"SELECT COUNT(*) AS count FROM knowledge_documents WHERE knowledge_base_id = ?",
(row["id"],),
)["count"]
linked_count = db.fetch_one(
"SELECT COUNT(*) AS count FROM assistant_knowledge_bases WHERE knowledge_base_id = ?",
(row["id"],),
)["count"]
return {
"id": row["id"],
"user_id": row["user_id"],
"project_id": row.get("project_id", ""),
"name": row["name"],
"description": row.get("description", ""),
"sync_status": row.get("sync_status", "ready"),
"document_count": document_count,
"linked_assistant_count": linked_count,
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def assistant_payload(row: dict[str, Any]) -> dict[str, Any]:
kb_rows = db.fetch_all(
"SELECT knowledge_base_id FROM assistant_knowledge_bases WHERE assistant_id = ? ORDER BY knowledge_base_id ASC",
(row["id"],),
)
return {
"id": row["id"],
"user_id": row["user_id"],
"project_id": row.get("project_id", ""),
"name": row["name"],
"description": row.get("description", ""),
"system_prompt": row.get("system_prompt", ""),
"generation_goal": row.get("generation_goal", ""),
"knowledge_base_ids": [item["knowledge_base_id"] for item in kb_rows],
"config": parse_json_object(row.get("config_json") or "{}"),
"model_profile_id": row.get("model_profile_id", ""),
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def review_payload(row: dict[str, Any]) -> dict[str, Any]:
metrics = parse_json_object(row.get("metrics_json") or "{}")
source_job = None
assistant = None
if row.get("source_job_id"):
source_job_row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (row["source_job_id"],))
if source_job_row:
source_job = job_payload(source_job_row)
if row.get("assistant_id"):
assistant_row = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],))
if assistant_row:
assistant = assistant_payload(assistant_row)
return {
"id": row["id"],
"user_id": row["user_id"],
"project_id": row.get("project_id", ""),
"source_job_id": row.get("source_job_id", ""),
"assistant_id": row.get("assistant_id", ""),
"title": row.get("title", ""),
"platform": row.get("platform", "douyin"),
"content_type": row.get("content_type", "video"),
"publish_url": row.get("publish_url", ""),
"published_at": row.get("published_at", ""),
"metrics": metrics,
"verdict": row.get("verdict", ""),
"highlights": row.get("highlights", ""),
"next_actions": row.get("next_actions", ""),
"notes": row.get("notes", ""),
"source_job": source_job,
"assistant": assistant,
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def document_payload(row: dict[str, Any]) -> dict[str, Any]:
analysis_map = parse_json_object(row.get("analysis_json") or "{}")
source_artifacts = parse_json_object(row.get("source_artifact_json") or "{}")
storyboard_raw = row.get("storyboard_json") or "[]"
try:
storyboard_items = json.loads(storyboard_raw)
except json.JSONDecodeError:
storyboard_items = []
return {
"id": row["id"],
"knowledge_base_id": row["knowledge_base_id"],
"title": row["title"],
"source_type": row["source_type"],
"source_url": row.get("source_url", ""),
"transcript_text": row.get("transcript_text", ""),
"style_summary": row.get("style_summary", ""),
"combined_text": row.get("combined_text", ""),
"analysis": analysis_map,
"storyboards": storyboard_items,
"source_artifacts": source_artifacts,
"analysis_model_profile_id": row.get("analysis_model_profile_id", ""),
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def job_payload(row: dict[str, Any]) -> dict[str, Any]:
artifacts = row.get("artifacts_json") or "{}"
result = row.get("result_json") or "{}"
try:
artifacts_map = json.loads(artifacts)
except json.JSONDecodeError:
artifacts_map = {}
try:
result_map = json.loads(result)
except json.JSONDecodeError:
result_map = {}
return {
"id": row["id"],
"user_id": row["user_id"],
"project_id": row.get("project_id", ""),
"parent_job_id": row.get("parent_job_id", ""),
"assistant_id": row.get("assistant_id"),
"knowledge_base_id": row["knowledge_base_id"],
"content_source_id": row.get("content_source_id", ""),
"source_type": row["source_type"],
"line_type": row.get("line_type", "analysis"),
"workflow_key": row.get("workflow_key", ""),
"orchestrator": row.get("orchestrator", "n8n"),
"provider_name": row.get("provider_name", ""),
"provider_task_id": row.get("provider_task_id", ""),
"source_url": row.get("source_url"),
"title": row["title"],
"language": row.get("language", "auto"),
"status": row["status"],
"transcript_text": row.get("transcript_text", ""),
"style_summary": row.get("style_summary", ""),
"upload_status": row.get("upload_status", "pending"),
"error": row.get("error", ""),
"artifacts": artifacts_map,
"result": result_map,
"analysis_model_profile_id": row.get("analysis_model_profile_id", ""),
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def require_auth(authorization: str | None = Header(default=None)) -> dict[str, Any]:
if not authorization or not authorization.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Missing bearer token")
token = authorization.split(" ", 1)[1].strip()
token_row = db.fetch_one("SELECT * FROM auth_tokens WHERE token = ?", (token,))
if not token_row:
raise HTTPException(status_code=401, detail="Invalid token")
account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (token_row["account_id"],))
if not account:
raise HTTPException(status_code=401, detail="Account not found")
return account
def require_approved(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]:
if account["approval_status"] != "approved":
raise HTTPException(status_code=403, detail="Account pending approval")
return account
def require_super_admin(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]:
if account["role"] != "super_admin":
raise HTTPException(status_code=403, detail="Super admin required")
return account
def require_orchestrator(x_orchestrator_secret: str | None = Header(default=None)) -> bool:
if ORCHESTRATOR_SHARED_SECRET and x_orchestrator_secret != ORCHESTRATOR_SHARED_SECRET:
raise HTTPException(status_code=401, detail="Invalid orchestrator secret")
return True
def create_content_source(
*,
account_id: str,
project_id: str,
source_kind: str,
platform: str = "",
handle: str = "",
source_url: str = "",
title: str = "",
local_path: str = "",
metadata: dict[str, Any] | None = None,
) -> dict[str, Any]:
source_id = make_id("source")
now = utc_now()
db.execute(
"""
INSERT INTO content_sources (
id, user_id, project_id, source_kind, platform, handle,
source_url, title, local_path, metadata_json, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
source_id,
account_id,
project_id,
source_kind,
platform,
handle,
source_url,
title,
local_path,
json.dumps(metadata or {}, ensure_ascii=False),
now,
now,
),
)
return db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,))
def merge_json_field(current_raw: str | None, updates: dict[str, Any]) -> str:
current = parse_json_object(current_raw or "{}")
current.update(updates)
return json.dumps(current, ensure_ascii=False)
def update_content_source_metadata(source_id: str, updates: dict[str, Any]) -> dict[str, Any]:
row = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,))
if not row:
raise HTTPException(status_code=404, detail="Content source not found")
db.execute(
"UPDATE content_sources SET metadata_json = ?, updated_at = ? WHERE id = ?",
(merge_json_field(row.get("metadata_json") or "{}", updates), utc_now(), source_id),
)
return db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_id,))
def update_job_state(
job_id: str,
*,
status: str,
error: str = "",
provider_name: str | None = None,
provider_task_id: str | None = None,
artifacts: dict[str, Any] | None = None,
result: dict[str, Any] | None = None,
) -> dict[str, Any]:
row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,))
if not row:
raise HTTPException(status_code=404, detail="Job not found")
merged_artifacts = merge_json_field(row.get("artifacts_json") or "{}", artifacts or {})
merged_result = merge_json_field(row.get("result_json") or "{}", result or {})
db.execute(
"""
UPDATE jobs
SET status = ?, error = ?, provider_name = ?, provider_task_id = ?,
artifacts_json = ?, result_json = ?, updated_at = ?
WHERE id = ?
""",
(
status,
error,
provider_name if provider_name is not None else row.get("provider_name", ""),
provider_task_id if provider_task_id is not None else row.get("provider_task_id", ""),
merged_artifacts,
merged_result,
utc_now(),
job_id,
),
)
append_job_event(
job_id,
f"job.{status}",
{
"provider_name": provider_name if provider_name is not None else row.get("provider_name", ""),
"provider_task_id": provider_task_id if provider_task_id is not None else row.get("provider_task_id", ""),
"error": error,
"artifacts": artifacts or {},
"result": result or {},
},
)
return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,))
def job_context_payload(row: dict[str, Any]) -> dict[str, Any]:
payload = job_payload(row)
payload["parent_job"] = None
payload["child_jobs"] = []
payload["project"] = None
payload["assistant"] = None
payload["knowledge_base"] = None
payload["content_source"] = None
payload["events"] = []
if row.get("project_id"):
project = db.fetch_one("SELECT * FROM projects WHERE id = ?", (row["project_id"],))
if project:
payload["project"] = project_payload(project)
if row.get("assistant_id"):
assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],))
if assistant:
payload["assistant"] = assistant_payload(assistant)
kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (row["knowledge_base_id"],))
if kb:
payload["knowledge_base"] = knowledge_base_payload(kb)
if row.get("content_source_id"):
source = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (row["content_source_id"],))
if source:
payload["content_source"] = content_source_payload(source)
if row.get("parent_job_id"):
parent = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (row["parent_job_id"],))
if parent:
payload["parent_job"] = job_payload(parent)
payload["child_jobs"] = [
job_payload(item)
for item in db.fetch_all("SELECT * FROM jobs WHERE parent_job_id = ? ORDER BY created_at ASC", (row["id"],))
]
payload["events"] = [
job_event_payload(item)
for item in db.fetch_all("SELECT * FROM job_events WHERE job_id = ? ORDER BY created_at ASC", (row["id"],))
]
return payload
async def trigger_orchestrated_job(job_row: dict[str, Any]) -> dict[str, Any]:
workflow_key = job_row.get("workflow_key") or "analysis_pipeline"
if not n8n_client.enabled:
raise HTTPException(status_code=503, detail="n8n is not configured")
append_job_event(job_row["id"], "workflow.trigger.requested", {"workflow_key": workflow_key})
update_job_state(
job_row["id"],
status="queued",
provider_name="n8n",
provider_task_id="",
result={"n8n_trigger": {"requested": True}},
)
trigger_result = await n8n_client.trigger(
workflow_key,
{
"jobId": job_row["id"],
"job_id": job_row["id"],
"workflowKey": workflow_key,
"workflow_key": workflow_key,
"lineType": job_row.get("line_type", "analysis"),
"line_type": job_row.get("line_type", "analysis"),
},
)
provider_task_id = str(trigger_result.get("executionId") or "")
db.execute(
"""
UPDATE jobs
SET provider_name = ?, provider_task_id = ?, result_json = ?, updated_at = ?
WHERE id = ?
""",
(
"n8n",
provider_task_id,
merge_json_field(
db.fetch_one("SELECT result_json FROM jobs WHERE id = ?", (job_row["id"],)).get("result_json") or "{}",
{"n8n_trigger": trigger_result},
),
utc_now(),
job_row["id"],
),
)
append_job_event(
job_row["id"],
"workflow.trigger.accepted",
{"provider_task_id": provider_task_id, "trigger_result": trigger_result},
)
return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_row["id"],))
async def call_model(profile: dict[str, Any], system_prompt: str, user_prompt: str, temperature: float = 0.4) -> str:
try:
content = await openai_client.chat_completion(
base_url=profile["base_url"],
api_key=profile.get("api_key", ""),
model=profile["model_name"],
system_prompt=system_prompt,
user_prompt=user_prompt,
temperature=temperature,
)
if content:
return content.strip()
except Exception:
pass
excerpt = user_prompt.strip().replace("\n", " ")[:220]
return f"风格摘要:内容以强结论开头,节奏偏短句,强调冲突转折和行动指令。素材摘要:{excerpt}"
async def summarize_style(profile: dict[str, Any], transcript_text: str, title: str) -> str:
prompt = (
f"标题:{title}\n\n"
f"素材全文:\n{transcript_text}\n\n"
"请提炼这段素材的文案风格、结构节奏、开头钩子、情绪推进、收尾 CTA并给出可复用的学习结论。"
)
system_prompt = "你是短视频文案拆解师,输出简洁、结构化、适合沉淀进知识库。"
return await call_model(profile, system_prompt, prompt, temperature=0.3)
async def generate_content_blueprint(
profile: dict[str, Any],
*,
title: str,
transcript_text: str,
style_summary: str,
agent_prompt: str = "",
generation_goal: str = "",
) -> dict[str, Any]:
system_prompt = (
"你是短视频内容策略师。"
"必须输出 JSON 对象,不要输出 Markdown不要输出多余解释。"
)
user_prompt = (
f"标题:{title}\n\n"
f"素材转写:\n{transcript_text}\n\n"
f"风格拆解:\n{style_summary}\n\n"
f"智能体补充约束:\n{agent_prompt or ''}\n\n"
f"生成目标:\n{generation_goal or '围绕原素材做二创短视频'}\n\n"
"请输出如下 JSON 结构:"
"{"
'"analysis":{"hook":"","structure":[],"style_tags":[],"cta":""},'
'"rewrite":{"title":"","script":"","summary":""},'
'"storyboards":['
'{"shot_index":1,"title":"","narration":"","visual":"","first_frame_prompt":"","last_frame_prompt":"","video_prompt":"","duration_sec":5}'
"]"
"}"
)
raw = await call_model(profile, system_prompt, user_prompt, temperature=0.5)
parsed = parse_json_object(raw)
if parsed.get("storyboards"):
return parsed
fallback_storyboards: list[dict[str, Any]] = []
paragraphs = [part.strip() for part in transcript_text.split("\n") if part.strip()]
seed_segments = paragraphs[:4] or [transcript_text[:1200]]
for idx, segment in enumerate(seed_segments, start=1):
snippet = segment[:180]
fallback_storyboards.append(
{
"shot_index": idx,
"title": f"镜头{idx}",
"narration": snippet,
"visual": f"围绕这段内容构建具象画面:{snippet}",
"first_frame_prompt": f"短视频首帧,突出主题:{snippet}",
"last_frame_prompt": f"短视频尾帧,强化结论和行动指令:{snippet}",
"video_prompt": f"基于首尾帧生成连贯镜头,内容是:{snippet}",
"duration_sec": 5,
}
)
return {
"analysis": {
"hook": title,
"structure": ["结论开场", "核心论点", "例证推进", "收尾行动"],
"style_tags": ["短句", "结论先行", "强 CTA"],
"cta": "引导用户采取下一步行动",
},
"rewrite": {
"title": title,
"script": transcript_text[:3000],
"summary": style_summary[:500],
},
"storyboards": fallback_storyboards,
}
def fallback_transcript_from_text(title: str, content: str) -> str:
return f"标题:{title}\n\n正文:\n{content.strip()}"
def infer_platform_from_url(source_url: str) -> str:
normalized = source_url.strip().lower()
if "bilibili.com" in normalized or "b23.tv" in normalized:
return "bilibili"
if "douyin.com" in normalized or "iesdouyin.com" in normalized:
return "douyin"
if "xiaohongshu.com" in normalized or "xhslink.com" in normalized:
return "xiaohongshu"
if "youtube.com" in normalized or "youtu.be" in normalized:
return "youtube"
return ""
def command_exists(name: str) -> bool:
return shutil.which(name) is not None
def run_command(command: list[str], cwd: Path | None = None, timeout: float | None = None) -> tuple[int, str, str]:
try:
proc = subprocess.run(
command,
cwd=str(cwd) if cwd else None,
capture_output=True,
text=True,
timeout=timeout,
)
return proc.returncode, proc.stdout, proc.stderr
except subprocess.TimeoutExpired as exc:
stdout = exc.stdout if isinstance(exc.stdout, str) else (exc.stdout or b"").decode("utf-8", errors="ignore")
stderr = exc.stderr if isinstance(exc.stderr, str) else (exc.stderr or b"").decode("utf-8", errors="ignore")
detail = stderr or f"Command timed out after {timeout} seconds"
return 124, stdout, detail
def discover_account_video_links(source_url: str, max_items: int) -> tuple[list[dict[str, Any]], dict[str, Any]]:
if not command_exists(YTDLP_BIN):
raise HTTPException(status_code=503, detail="yt-dlp is not configured")
discovery_cmd = [
YTDLP_BIN,
"--flat-playlist",
"--playlist-end",
str(max_items),
"--print",
"%(webpage_url)s\t%(title)s\t%(id)s",
source_url,
]
code, stdout, stderr = run_command(discovery_cmd, timeout=180)
raw_lines = [line.strip() for line in stdout.splitlines() if line.strip()]
items: list[dict[str, Any]] = []
seen_urls: set[str] = set()
for line in raw_lines:
parts = line.split("\t")
video_url = parts[0].strip() if parts else ""
raw_title = parts[1].strip() if len(parts) > 1 else ""
raw_external_id = parts[2].strip() if len(parts) > 2 else ""
if not video_url or video_url == "NA" or video_url in seen_urls:
continue
seen_urls.add(video_url)
items.append(
{
"video_url": video_url,
"title": raw_title if raw_title and raw_title != "NA" else "短视频素材",
"external_id": raw_external_id if raw_external_id != "NA" else "",
}
)
debug_payload = {
"discovery_command": discovery_cmd,
"discovery_stdout_preview": raw_lines[: min(len(raw_lines), max_items)],
"discovery_stderr": stderr.strip()[:1000],
"discovery_exit_code": code,
}
if code != 0:
raise HTTPException(status_code=502, detail=f"Failed to inspect content source: {stderr.strip()[:200] or 'yt-dlp error'}")
return items, debug_payload
def validate_real_cut_source_job(source_job: dict[str, Any]) -> None:
source_type = source_job.get("source_type", "")
if source_type not in {"upload_video", "video_link"}:
raise HTTPException(status_code=400, detail="Real-cut source job must come from upload_video or video_link")
if source_type == "video_link" and source_job.get("status") != "completed":
raise HTTPException(status_code=409, detail="Video link source job must be completed before real-cut staging")
def resolve_real_cut_source_file(source_job: dict[str, Any]) -> tuple[Path, dict[str, Any] | None]:
validate_real_cut_source_job(source_job)
artifacts = parse_job_artifacts(source_job)
candidates: list[Path] = []
if artifacts.get("uploaded_path"):
candidates.append(Path(str(artifacts["uploaded_path"])))
if artifacts.get("source_path"):
candidates.append(Path(str(artifacts["source_path"])))
if source_job.get("content_source_id"):
source_row = db.fetch_one("SELECT * FROM content_sources WHERE id = ?", (source_job["content_source_id"],))
if source_row and source_row.get("local_path"):
candidates.append(Path(str(source_row["local_path"])))
if source_job.get("source_type") == "video_link":
candidates.append(JOBS_DIR / source_job["id"] / "source.mp4")
seen: set[str] = set()
for candidate in candidates:
candidate_str = str(candidate)
if not candidate_str or candidate_str in seen:
continue
seen.add(candidate_str)
if candidate.exists() and candidate.is_file():
return candidate, artifacts
raise HTTPException(status_code=409, detail="Source job media file is not available for real-cut staging")
async def stage_real_cut_source_to_cutvideo(source_job: dict[str, Any]) -> dict[str, Any]:
if not cutvideo_client.enabled:
raise HTTPException(status_code=503, detail="CutVideo is not configured")
source_path, source_artifacts = resolve_real_cut_source_file(source_job)
folder_name = f"storyforge-{source_job['id']}"
upload_payload = await cutvideo_client.upload_source_file(source_path, folder_name=folder_name)
input_dir = str(upload_payload.get("input_dir") or "").strip()
if not input_dir:
raise HTTPException(status_code=502, detail="CutVideo upload did not return input_dir")
return {
"input_dir": input_dir,
"source_path": str(source_path),
"upload": upload_payload,
"source_artifacts": source_artifacts,
}
def create_job_record(
*,
account_id: str,
project_id: str,
knowledge_base_id: str,
parent_job_id: str | None = None,
source_type: str,
line_type: str,
workflow_key: str,
title: str,
language: str = "auto",
source_url: str = "",
assistant_id: str | None = None,
content_source_id: str | None = None,
artifacts: dict[str, Any] | None = None,
analysis_model_profile_id: str = "",
) -> dict[str, Any]:
job_id = make_id("job")
now = utc_now()
db.execute(
"""
INSERT INTO jobs (
id, user_id, project_id, parent_job_id, assistant_id, knowledge_base_id, content_source_id,
source_type, line_type, workflow_key, orchestrator, provider_name, provider_task_id,
source_url, title, language, status, transcript_text, style_summary, upload_status,
error, artifacts_json, result_json, analysis_model_profile_id, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'n8n', '', '', ?, ?, ?, 'pending', '', '', 'pending', '', ?, '{}', ?, ?, ?)
""",
(
job_id,
account_id,
project_id,
parent_job_id,
assistant_id,
knowledge_base_id,
content_source_id,
source_type,
line_type,
workflow_key,
source_url or None,
title,
language,
json.dumps(artifacts or {}, ensure_ascii=False),
analysis_model_profile_id,
now,
now,
),
)
return db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,))
async def wait_for_huobao_image(image_id: str | int) -> dict[str, Any]:
deadline = now_ts() + HUOBAO_MAX_WAIT_SEC
last_payload: dict[str, Any] = {}
while True:
last_payload = await huobao_client.get_image(str(image_id))
status = str(last_payload.get("status") or "").lower()
if status in {"completed", "failed"}:
return last_payload
if now_ts() >= deadline:
raise RuntimeError(f"Huobao image task timed out: {image_id}")
await asyncio.sleep(HUOBAO_POLL_INTERVAL_SEC)
async def wait_for_huobao_video(video_id: str | int) -> dict[str, Any]:
deadline = now_ts() + HUOBAO_MAX_WAIT_SEC
last_payload: dict[str, Any] = {}
while True:
last_payload = await huobao_client.get_video(str(video_id))
status = str(last_payload.get("status") or "").lower()
if status in {"completed", "failed"}:
return last_payload
if now_ts() >= deadline:
raise RuntimeError(f"Huobao video task timed out: {video_id}")
await asyncio.sleep(HUOBAO_POLL_INTERVAL_SEC)
def coerce_storyboards(items: Any) -> list[dict[str, Any]]:
if not isinstance(items, list):
return []
return [item for item in items if isinstance(item, dict)]
def huobao_image_size_for_aspect_ratio(aspect_ratio: str) -> str:
normalized = str(aspect_ratio or "").strip()
if normalized == "9:16":
return "1024x1536"
if normalized == "16:9":
return "1536x1024"
if normalized == "1:1":
return "1024x1024"
return "1024x1536"
async def transcribe_media(job_dir: Path, source_path: Path, title: str, source_url: str = "") -> tuple[str, dict[str, Any]]:
artifacts: dict[str, Any] = {}
transcript = ""
media_path = source_path
artifacts["source_path"] = str(media_path)
if not source_path.exists():
transcript = (
f"素材标题:{title}\n"
f"素材来源:{source_url or source_path.name}\n\n"
"当前环境未找到可直接处理的本地视频文件,已记录来源信息并进入降级学习流程。"
)
return transcript, artifacts
audio_path = job_dir / "audio.wav"
if command_exists(FFMPEG_BIN):
code, _, err = run_command([FFMPEG_BIN, "-y", "-i", str(source_path), "-ar", "16000", "-ac", "1", str(audio_path)])
if code == 0 and audio_path.exists():
artifacts["audio_path"] = str(audio_path)
media_path = audio_path
elif err:
artifacts["ffmpeg_error"] = err.strip()[:500]
if asr_http_client.enabled and media_path.exists():
try:
asr_payload = await asr_http_client.transcribe_audio(media_path)
artifacts["asr_http_payload"] = {
"success": bool(asr_payload.get("success", True)),
"duration_ms": asr_payload.get("duration_ms"),
"error_message": str(asr_payload.get("error_message") or "")[:500],
}
transcript = str(asr_payload.get("text") or "").strip()
if transcript:
artifacts["asr_backend"] = "http"
except Exception as exc:
error_detail = str(exc).strip() or exc.__class__.__name__
artifacts["asr_http_error"] = error_detail[:500]
if WHISPER_BIN and Path(WHISPER_BIN).exists() and Path(WHISPER_MODEL).exists():
out_prefix = job_dir / "whisper"
code, stdout, stderr = run_command([
WHISPER_BIN,
"-m",
WHISPER_MODEL,
"-f",
str(media_path),
"-otxt",
"-of",
str(out_prefix),
])
txt_path = Path(str(out_prefix) + ".txt")
if code == 0 and txt_path.exists():
cli_transcript = txt_path.read_text(encoding="utf-8", errors="ignore").strip()
if cli_transcript:
transcript = cli_transcript
artifacts["transcript_path"] = str(txt_path)
artifacts["asr_backend"] = artifacts.get("asr_backend") or "whisper_cli"
else:
artifacts["whisper_stdout"] = stdout.strip()[:500]
artifacts["whisper_error"] = stderr.strip()[:500]
if not transcript:
transcript = (
f"素材标题:{title}\n"
f"素材来源:{source_url or source_path.name}\n\n"
"当前环境未完成真实 ASR已保留原始素材供后续转写。请结合标题、来源和上下文进行初步风格学习。"
)
return transcript, artifacts
def ensure_user_kb(account_id: str, project_id: str = "", username: str = "默认用户") -> dict[str, Any]:
project = resolve_target_project(account_id, project_id or None, username=username)
row = db.fetch_one(
"SELECT * FROM knowledge_bases WHERE user_id = ? AND project_id = ? ORDER BY created_at ASC LIMIT 1",
(account_id, project["id"]),
)
if row:
return row
kb_id = make_id("kb")
now = utc_now()
db.execute(
"""
INSERT INTO knowledge_bases (id, user_id, project_id, name, description, sync_status, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(kb_id, account_id, project["id"], "默认知识库", "系统为新用户自动创建", "ready", now, now),
)
return db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (kb_id,))
async def process_job(job_id: str) -> None:
row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,))
if not row:
return
now = utc_now()
db.execute("UPDATE jobs SET status = ?, updated_at = ? WHERE id = ?", ("processing", now, job_id))
append_job_event(job_id, "job.processing", {})
try:
artifacts = json.loads(row.get("artifacts_json") or "{}")
transcript_text = row.get("transcript_text", "")
job_dir = JOBS_DIR / job_id
job_dir.mkdir(parents=True, exist_ok=True)
if row["source_type"] == "text":
transcript_text = fallback_transcript_from_text(row["title"], artifacts.get("input_text", ""))
elif row["source_type"] == "video_link":
downloaded = job_dir / "source.mp4"
if command_exists(YTDLP_BIN):
code, stdout, stderr = run_command([
YTDLP_BIN,
"--no-playlist",
"-o",
str(downloaded),
row.get("source_url") or "",
], cwd=job_dir)
if code == 0 and downloaded.exists():
artifacts["download_stdout"] = stdout.strip()[:500]
else:
artifacts["download_error"] = stderr.strip()[:500]
transcript_text, extra = await transcribe_media(job_dir, downloaded if downloaded.exists() else job_dir / "placeholder.mp4", row["title"], row.get("source_url") or "")
artifacts.update(extra)
elif row["source_type"] == "upload_video":
source_path = Path(artifacts.get("uploaded_path", ""))
transcript_text, extra = await transcribe_media(job_dir, source_path, row["title"], row.get("source_url") or "")
artifacts.update(extra)
profile = model_profile_for_account(row["user_id"], row.get("analysis_model_profile_id") or None)
style_summary = await summarize_style(profile, transcript_text, row["title"])
assistant = None
if row.get("assistant_id"):
assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],))
content_blueprint = await generate_content_blueprint(
profile,
title=row["title"],
transcript_text=transcript_text,
style_summary=style_summary,
agent_prompt=(assistant or {}).get("system_prompt", ""),
generation_goal=(assistant or {}).get("generation_goal", ""),
)
combined_text = (
f"{transcript_text}\n\n"
"------\n"
f"风格学习结论:\n{style_summary}\n\n"
"------\n"
f"二创文案:\n{(content_blueprint.get('rewrite') or {}).get('script', '')}\n\n"
"------\n"
f"分镜:\n{json.dumps(content_blueprint.get('storyboards') or [], ensure_ascii=False, indent=2)}"
)
kb_row = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (row["knowledge_base_id"],))
if not kb_row:
raise RuntimeError("Knowledge base not found")
document_id = make_id("doc")
timestamp = utc_now()
db.execute(
"""
INSERT INTO knowledge_documents (
id, knowledge_base_id, title, source_type, source_url, transcript_text,
style_summary, combined_text, analysis_json, storyboard_json, source_artifact_json,
analysis_model_profile_id, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
document_id,
row["knowledge_base_id"],
row["title"],
row["source_type"],
row.get("source_url") or "",
transcript_text,
style_summary,
combined_text,
json.dumps(content_blueprint.get("analysis") or {}, ensure_ascii=False),
json.dumps(content_blueprint.get("storyboards") or [], ensure_ascii=False),
json.dumps(artifacts, ensure_ascii=False),
profile["id"],
timestamp,
timestamp,
),
)
update_job_state(
job_id,
status="completed",
artifacts={
"document_id": document_id,
"project_job_dir": str(job_dir),
**artifacts,
},
result={
"analysis": content_blueprint.get("analysis") or {},
"rewrite": content_blueprint.get("rewrite") or {},
"storyboards": content_blueprint.get("storyboards") or [],
"document_id": document_id,
},
)
db.execute(
"""
UPDATE jobs
SET transcript_text = ?, style_summary = ?, upload_status = ?, updated_at = ?
WHERE id = ?
""",
(transcript_text, style_summary, "ready", timestamp, job_id),
)
db.execute(
"UPDATE knowledge_bases SET sync_status = ?, updated_at = ? WHERE id = ?",
("ready", timestamp, kb_row["id"]),
)
except Exception as exc:
update_job_state(job_id, status="failed", error=str(exc))
def probe_tcp(url: str, timeout: float = 3.0) -> dict[str, Any]:
if not url:
return {"configured": False, "reachable": False, "status_code": 0, "error": "not_configured", "url": ""}
parsed = urlparse(url)
host = parsed.hostname
port = parsed.port or (443 if parsed.scheme == "https" else 80)
if not host:
return {"configured": True, "reachable": False, "status_code": 0, "error": "invalid_url", "url": url}
sock = socket.socket()
sock.settimeout(timeout)
try:
sock.connect((host, port))
return {"configured": True, "reachable": True, "status_code": 0, "error": "", "url": url}
except Exception as exc: # pragma: no cover - operational probe
return {"configured": True, "reachable": False, "status_code": 0, "error": str(exc), "url": url}
finally:
sock.close()
def probe_http(url: str, path: str = "", timeout: float = 3.0) -> dict[str, Any]:
tcp = probe_tcp(url, timeout=timeout)
target_url = urljoin(url if url.endswith("/") else f"{url}/", path.lstrip("/")) if url else ""
if not tcp["configured"] or not tcp["reachable"]:
if target_url:
tcp["url"] = target_url
return tcp
try:
response = httpx.get(target_url or url, timeout=timeout, follow_redirects=True)
tcp["status_code"] = response.status_code
tcp["reachable"] = response.status_code < 500
tcp["error"] = "" if response.status_code < 500 else f"http_{response.status_code}"
except Exception as exc: # pragma: no cover - operational probe
tcp["reachable"] = False
tcp["error"] = str(exc)
tcp["url"] = target_url or url
return tcp
@app.on_event("startup")
def on_startup() -> None:
db.init_schema()
seed_defaults()
@app.get("/healthz")
def healthz() -> dict[str, Any]:
return {
"status": "ok",
"dbPath": DB_PATH,
"defaultExternalBaseUrl": DEFAULT_EXTERNAL_BASE_URL,
"localModelBaseUrl": LOCAL_OPENAI_BASE_URL,
"asrHttpBaseUrl": ASR_HTTP_BASE_URL,
"n8nBaseUrl": N8N_BASE_URL,
"cutvideoBaseUrl": CUTVIDEO_BASE_URL,
"cutvideoUploadTimeoutSec": CUTVIDEO_UPLOAD_TIMEOUT_SEC,
"huobaoBaseUrl": HUOBAO_BASE_URL,
}
@app.get("/v2/integrations/health")
def integrations_health(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
_ = account
return {
"cutvideo": {
"base_url": CUTVIDEO_BASE_URL,
**probe_http(CUTVIDEO_BASE_URL, "/api/bootstrap"),
},
"huobao": {
"base_url": HUOBAO_BASE_URL,
**probe_http(HUOBAO_BASE_URL, "/health"),
},
"n8n": {
"base_url": N8N_BASE_URL,
**probe_http(N8N_BASE_URL, "/healthz"),
},
"asr": {
"base_url": ASR_HTTP_BASE_URL,
**probe_tcp(ASR_HTTP_BASE_URL),
},
}
def seed_defaults() -> None:
if not db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1"):
profile_id = make_id("model")
now = utc_now()
db.execute(
"""
INSERT INTO model_profiles (id, owner_account_id, name, provider, base_url, api_key, model_name, is_system, is_default, created_at, updated_at)
VALUES (?, NULL, ?, ?, ?, ?, ?, 1, 1, ?, ?)
""",
(
profile_id,
"本机默认模型",
"openai_compat",
LOCAL_OPENAI_BASE_URL,
LOCAL_OPENAI_API_KEY,
LOCAL_OPENAI_MODEL,
now,
now,
),
)
if not db.fetch_one("SELECT id FROM accounts WHERE username = ?", ("kris",)):
account_id = make_id("acct")
password_hash, password_salt = create_password_hash("Asd123456.")
now = utc_now()
model_row = db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1")
db.execute(
"""
INSERT INTO accounts (
id, username, password_hash, password_salt, display_name, role,
approval_status, approved_by, approved_at, preferred_analysis_model_id,
created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
account_id,
"kris",
password_hash,
password_salt,
"Kris",
"super_admin",
"approved",
account_id,
now,
model_row["id"] if model_row else "",
now,
now,
),
)
project = ensure_default_project(account_id, username="kris")
kb = ensure_user_kb(account_id, project["id"], username="kris")
assistant_id = make_id("assistant")
db.execute(
"""
INSERT INTO assistants (id, user_id, project_id, name, description, system_prompt, generation_goal, config_json, model_profile_id, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, '{}', ?, ?, ?)
""",
(
assistant_id,
account_id,
project["id"],
"默认文案助手",
"系统为超级管理员预置",
"你是一个擅长学习短视频文案风格的 AI 助手。",
"为用户生成稳定风格的短视频文案。",
model_row["id"] if model_row else "",
now,
now,
),
)
db.execute(
"INSERT INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)",
(assistant_id, kb["id"]),
)
@app.post("/v2/auth/register")
def register(request: RegisterAccountRequest) -> dict[str, Any]:
username = request.username.strip()
password = request.password.strip()
display_name = request.display_name.strip() or username
if not username or not password:
raise HTTPException(status_code=400, detail="username and password are required")
if db.fetch_one("SELECT id FROM accounts WHERE username = ?", (username,)):
raise HTTPException(status_code=409, detail="username already exists")
account_id = make_id("acct")
password_hash, password_salt = create_password_hash(password)
now = utc_now()
default_model = db.fetch_one("SELECT id FROM model_profiles WHERE is_default = 1 LIMIT 1")
db.execute(
"""
INSERT INTO accounts (
id, username, password_hash, password_salt, display_name, role,
approval_status, approved_by, approved_at, preferred_analysis_model_id,
created_at, updated_at
) VALUES (?, ?, ?, ?, ?, 'user', 'pending', NULL, NULL, ?, ?, ?)
""",
(
account_id,
username,
password_hash,
password_salt,
display_name,
default_model["id"] if default_model else "",
now,
now,
),
)
ensure_default_project(account_id, username=username)
account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,))
return normalize_account(account)
@app.post("/v2/auth/login")
def login(request: LoginRequest) -> dict[str, Any]:
account = db.fetch_one("SELECT * FROM accounts WHERE username = ?", (request.username.strip(),))
if not account or not verify_password(request.password, account["password_hash"], account["password_salt"]):
raise HTTPException(status_code=401, detail="Invalid credentials")
token = secrets.token_urlsafe(32)
db.execute(
"INSERT INTO auth_tokens (token, account_id, created_at) VALUES (?, ?, ?)",
(token, account["id"], utc_now()),
)
return {
"token": token,
"account": normalize_account(account),
"default_external_base_url": DEFAULT_EXTERNAL_BASE_URL,
}
@app.post("/v2/auth/logout")
def logout(account: dict[str, Any] = Depends(require_auth), authorization: str | None = Header(default=None)) -> dict[str, bool]:
token = authorization.split(" ", 1)[1].strip()
db.execute("DELETE FROM auth_tokens WHERE token = ?", (token,))
return {"saved": True}
@app.get("/v2/me")
def me(account: dict[str, Any] = Depends(require_auth)) -> dict[str, Any]:
return normalize_account(account)
@app.get("/v2/me/dashboard")
def dashboard(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
projects = [project_payload(row) for row in db.fetch_all("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC", (account["id"],))]
knowledge_bases = [knowledge_base_payload(row) for row in db.fetch_all("SELECT * FROM knowledge_bases WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))]
assistants = [assistant_payload(row) for row in db.fetch_all("SELECT * FROM assistants WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))]
jobs = [job_payload(row) for row in db.fetch_all("SELECT * FROM jobs WHERE user_id = ? ORDER BY created_at DESC LIMIT 20", (account["id"],))]
model_profiles = [normalize_model_profile(row) for row in db.fetch_all("SELECT * FROM model_profiles WHERE owner_account_id IS NULL OR owner_account_id = ? ORDER BY is_default DESC, created_at ASC", (account["id"],))]
return {
"account": normalize_account(account),
"projects": projects,
"knowledge_bases": knowledge_bases,
"assistants": assistants,
"recent_jobs": jobs,
"model_profiles": model_profiles,
}
@app.get("/v2/projects")
def list_projects(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]:
return [project_payload(row) for row in db.fetch_all("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at ASC", (account["id"],))]
@app.post("/v2/projects")
def create_project(request: ProjectCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
project_id = make_id("project")
now = utc_now()
db.execute(
"""
INSERT INTO projects (id, user_id, name, description, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
project_id,
account["id"],
request.name.strip(),
request.description.strip(),
now,
now,
),
)
ensure_user_kb(account["id"], project_id, username=account["username"])
return project_payload(db.fetch_one("SELECT * FROM projects WHERE id = ?", (project_id,)))
@app.get("/v2/content-sources")
def list_content_sources(
project_id: str | None = Query(default=None),
account: dict[str, Any] = Depends(require_approved),
) -> list[dict[str, Any]]:
if project_id:
resolve_target_project(account["id"], project_id, username=account["username"])
rows = db.fetch_all(
"SELECT * FROM content_sources WHERE user_id = ? AND project_id = ? ORDER BY created_at DESC",
(account["id"], project_id),
)
else:
rows = db.fetch_all("SELECT * FROM content_sources WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))
return [content_source_payload(row) for row in rows]
@app.post("/v2/content-sources")
def create_content_source_api(request: ContentSourceCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
project = resolve_target_project(account["id"], request.project_id or None, username=account["username"])
row = create_content_source(
account_id=account["id"],
project_id=project["id"],
source_kind=request.source_kind.strip(),
platform=request.platform.strip(),
handle=request.handle.strip(),
source_url=request.source_url.strip(),
title=request.title.strip(),
local_path=request.local_path.strip(),
metadata=request.metadata,
)
return content_source_payload(row)
@app.post("/v2/pipelines/content-source-sync")
async def create_content_source_sync_job(
request: ContentSourceSyncRequest,
account: dict[str, Any] = Depends(require_approved),
) -> dict[str, Any]:
source_row = None
if request.content_source_id.strip():
source_row = load_owned_content_source(request.content_source_id.strip(), account["id"])
requested_project_id = request.project_id or (source_row.get("project_id", "") if source_row else "")
project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"])
kb = resolve_target_kb(account["id"], request.knowledge_base_id or None, project["id"], username=account["username"])
assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"])
profile = model_profile_for_account(account["id"], request.analysis_model_profile_id or None)
source_url = (request.source_url or (source_row or {}).get("source_url") or "").strip()
if not source_url:
raise HTTPException(status_code=400, detail="source_url or content_source_id is required")
platform = (request.platform or (source_row or {}).get("platform") or infer_platform_from_url(source_url)).strip()
handle = (request.handle or (source_row or {}).get("handle") or "").strip()
source_title = (
request.title.strip()
or (source_row or {}).get("title", "").strip()
or handle
or source_url
)
if source_row and source_row.get("project_id") and source_row.get("project_id") != project["id"]:
raise HTTPException(status_code=400, detail="Content source does not belong to target project")
if not source_row:
source_row = create_content_source(
account_id=account["id"],
project_id=project["id"],
source_kind="creator_account",
platform=platform,
handle=handle,
source_url=source_url,
title=source_title,
metadata={
"sync_mode": "recent_uploads",
"max_items": request.max_items,
"analysis_model_profile_id": profile["id"],
},
)
job_row = create_job_record(
account_id=account["id"],
project_id=project["id"],
knowledge_base_id=kb["id"],
source_type="content_source_sync",
line_type="content_source_sync",
workflow_key="content_source_sync_pipeline",
title=f"{source_title} 内容源同步",
language=request.language,
source_url=source_url,
assistant_id=(assistant or {}).get("id"),
content_source_id=source_row["id"],
artifacts={
"platform": platform,
"handle": handle,
"source_account_url": source_url,
"source_title": source_title,
"max_items": request.max_items,
"skip_existing": request.skip_existing,
"auto_trigger_analysis": request.auto_trigger_analysis,
},
analysis_model_profile_id=profile["id"],
)
update_content_source_metadata(
source_row["id"],
{
"sync_mode": "recent_uploads",
"max_items": request.max_items,
"analysis_model_profile_id": profile["id"],
"last_sync_job_id": job_row["id"],
"last_sync_requested_at": utc_now(),
},
)
return job_payload(await trigger_orchestrated_job(job_row))
@app.get("/v2/model-profiles")
def list_model_profiles(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]:
rows = db.fetch_all(
"SELECT * FROM model_profiles WHERE owner_account_id IS NULL OR owner_account_id = ? ORDER BY is_default DESC, is_system DESC, created_at ASC",
(account["id"],),
)
return [normalize_model_profile(row) for row in rows]
@app.post("/v2/model-profiles")
def create_model_profile(request: ModelProfileRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
model_id = make_id("model")
now = utc_now()
if request.is_default:
db.execute("UPDATE model_profiles SET is_default = 0 WHERE owner_account_id = ?", (account["id"],))
db.execute(
"""
INSERT INTO model_profiles (id, owner_account_id, name, provider, base_url, api_key, model_name, is_system, is_default, created_at, updated_at)
VALUES (?, ?, ?, 'openai_compat', ?, ?, ?, 0, ?, ?, ?)
""",
(model_id, account["id"], request.name.strip(), request.base_url.strip(), request.api_key.strip(), request.model_name.strip(), 1 if request.is_default else 0, now, now),
)
row = db.fetch_one("SELECT * FROM model_profiles WHERE id = ?", (model_id,))
return normalize_model_profile(row)
@app.post("/v2/me/preferences/analysis-model")
def set_preferred_analysis_model(request: PreferredModelRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
model = db.fetch_one(
"SELECT * FROM model_profiles WHERE id = ? AND (owner_account_id IS NULL OR owner_account_id = ?)",
(request.model_profile_id, account["id"]),
)
if not model:
raise HTTPException(status_code=404, detail="Model profile not found")
db.execute(
"UPDATE accounts SET preferred_analysis_model_id = ?, updated_at = ? WHERE id = ?",
(request.model_profile_id, utc_now(), account["id"]),
)
account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account["id"],))
return normalize_account(account)
@app.get("/v2/knowledge-bases")
def list_knowledge_bases(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]:
return [knowledge_base_payload(row) for row in db.fetch_all("SELECT * FROM knowledge_bases WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))]
@app.post("/v2/knowledge-bases")
def create_knowledge_base(request: KnowledgeBaseCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
project = resolve_target_project(account["id"], request.project_id or None, username=account["username"])
kb_id = make_id("kb")
now = utc_now()
db.execute(
"""
INSERT INTO knowledge_bases (id, user_id, project_id, name, description, sync_status, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, 'ready', ?, ?)
""",
(kb_id, account["id"], project["id"], request.name.strip(), request.description.strip(), now, now),
)
row = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ?", (kb_id,))
return knowledge_base_payload(row)
@app.get("/v2/knowledge-bases/{knowledge_base_id}/documents")
def list_knowledge_documents(knowledge_base_id: str, account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]:
kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ? AND user_id = ?", (knowledge_base_id, account["id"]))
if not kb:
raise HTTPException(status_code=404, detail="Knowledge base not found")
rows = db.fetch_all("SELECT * FROM knowledge_documents WHERE knowledge_base_id = ? ORDER BY created_at DESC", (knowledge_base_id,))
return [document_payload(row) for row in rows]
@app.get("/v2/reviews")
def list_reviews(
project_id: str | None = Query(default=None),
limit: int = Query(default=50, ge=1, le=200),
account: dict[str, Any] = Depends(require_approved),
) -> list[dict[str, Any]]:
clauses = ["user_id = ?"]
params: list[Any] = [account["id"]]
if project_id is not None:
normalized_project = project_id.strip()
if normalized_project:
clauses.append("project_id = ?")
params.append(normalized_project)
else:
clauses.append("(project_id IS NULL OR project_id = '')")
sql = f"SELECT * FROM publish_reviews WHERE {' AND '.join(clauses)} ORDER BY COALESCE(NULLIF(published_at, ''), created_at) DESC, created_at DESC LIMIT ?"
params.append(limit)
return [review_payload(row) for row in db.fetch_all(sql, tuple(params))]
@app.post("/v2/reviews")
def create_review(request: ReviewCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
source_job = None
if request.source_job_id.strip():
source_job = load_owned_job(request.source_job_id.strip(), account["id"])
requested_project_id = request.project_id.strip() or (source_job.get("project_id", "") if source_job else "")
project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"])
assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"])
review_id = make_id("review")
title = request.title.strip() or (source_job.get("title", "") if source_job else "")
if not title:
title = f"{project['name']} 复盘"
timestamp = utc_now()
db.execute(
"""
INSERT INTO publish_reviews (
id, user_id, project_id, source_job_id, assistant_id, title, platform, content_type,
publish_url, published_at, metrics_json, verdict, highlights, next_actions, notes, created_at, updated_at
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
review_id,
account["id"],
project["id"],
source_job["id"] if source_job else None,
(assistant or {}).get("id") or None,
title,
request.platform or "douyin",
request.content_type or "video",
request.publish_url.strip(),
request.published_at.strip(),
json.dumps(request.metrics, ensure_ascii=False),
request.verdict.strip(),
request.highlights.strip(),
request.next_actions.strip(),
request.notes.strip(),
timestamp,
timestamp,
),
)
row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,))
return review_payload(row)
@app.patch("/v2/reviews/{review_id}")
def update_review(review_id: str, request: ReviewUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
current = load_owned_review(review_id, account["id"])
assistant_id = current.get("assistant_id") or None
if request.assistant_id is not None:
assistant = resolve_target_assistant(account["id"], request.assistant_id or None, current.get("project_id", ""))
assistant_id = (assistant or {}).get("id") or None
db.execute(
"""
UPDATE publish_reviews
SET title = ?, platform = ?, content_type = ?, publish_url = ?, published_at = ?,
metrics_json = ?, verdict = ?, highlights = ?, next_actions = ?, notes = ?,
assistant_id = ?, updated_at = ?
WHERE id = ? AND user_id = ?
""",
(
request.title if request.title is not None else current.get("title", ""),
request.platform if request.platform is not None else current.get("platform", "douyin"),
request.content_type if request.content_type is not None else current.get("content_type", "video"),
request.publish_url if request.publish_url is not None else current.get("publish_url", ""),
request.published_at if request.published_at is not None else current.get("published_at", ""),
json.dumps(request.metrics if request.metrics is not None else parse_json_object(current.get("metrics_json") or "{}"), ensure_ascii=False),
request.verdict if request.verdict is not None else current.get("verdict", ""),
request.highlights if request.highlights is not None else current.get("highlights", ""),
request.next_actions if request.next_actions is not None else current.get("next_actions", ""),
request.notes if request.notes is not None else current.get("notes", ""),
assistant_id,
utc_now(),
review_id,
account["id"],
),
)
row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ?", (review_id,))
return review_payload(row)
@app.get("/v2/explore/jobs")
def list_jobs(
parent_job_id: str | None = Query(default=None),
line_type: str | None = Query(default=None),
account: dict[str, Any] = Depends(require_approved),
) -> list[dict[str, Any]]:
clauses = ["user_id = ?"]
params: list[Any] = [account["id"]]
if parent_job_id is not None:
normalized_parent = parent_job_id.strip()
if normalized_parent:
clauses.append("parent_job_id = ?")
params.append(normalized_parent)
else:
clauses.append("(parent_job_id IS NULL OR parent_job_id = '')")
if line_type:
clauses.append("line_type = ?")
params.append(line_type.strip())
sql = f"SELECT * FROM jobs WHERE {' AND '.join(clauses)} ORDER BY created_at DESC"
return [job_payload(row) for row in db.fetch_all(sql, tuple(params))]
@app.get("/v2/explore/jobs/{job_id}")
def get_job(job_id: str, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
row = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"]))
if not row:
raise HTTPException(status_code=404, detail="Job not found")
return job_payload(row)
@app.get("/v2/explore/jobs/{job_id}/events")
def get_job_events(job_id: str, account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]:
row = db.fetch_one("SELECT id FROM jobs WHERE id = ? AND user_id = ?", (job_id, account["id"]))
if not row:
raise HTTPException(status_code=404, detail="Job not found")
return [
job_event_payload(item)
for item in db.fetch_all("SELECT * FROM job_events WHERE job_id = ? ORDER BY created_at ASC", (job_id,))
]
def resolve_target_kb(account_id: str, requested_kb_id: str | None, project_id: str = "", username: str = "默认用户") -> dict[str, Any]:
if requested_kb_id:
kb = db.fetch_one("SELECT * FROM knowledge_bases WHERE id = ? AND user_id = ?", (requested_kb_id, account_id))
if kb:
if project_id and kb.get("project_id") and kb.get("project_id") != project_id:
raise HTTPException(status_code=400, detail="Knowledge base does not belong to target project")
return kb
raise HTTPException(status_code=404, detail="Knowledge base not found")
return ensure_user_kb(account_id, project_id, username=username)
@app.post("/v2/explore/text")
async def create_text_job(request: ExploreTextRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
project = resolve_target_project(account["id"], request.project_id or None, username=account["username"])
kb = resolve_target_kb(account["id"], request.knowledge_base_id, project["id"], username=account["username"])
assistant = resolve_target_assistant(account["id"], request.assistant_id, project["id"])
profile = model_profile_for_account(account["id"], request.analysis_model_profile_id)
source = create_content_source(
account_id=account["id"],
project_id=project["id"],
source_kind="inline_text",
title=request.title.strip(),
metadata={"content_preview": request.content[:280]},
)
job_row = create_job_record(
account_id=account["id"],
project_id=project["id"],
knowledge_base_id=kb["id"],
source_type="text",
line_type="analysis",
workflow_key="analysis_pipeline",
title=request.title.strip(),
language="zh-CN",
assistant_id=(assistant or {}).get("id"),
content_source_id=source["id"],
artifacts={"input_text": request.content},
analysis_model_profile_id=profile["id"],
)
return job_payload(await trigger_orchestrated_job(job_row))
@app.post("/v2/explore/video-link")
async def create_video_link_job(request: ExploreVideoLinkRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
project = resolve_target_project(account["id"], request.project_id or None, username=account["username"])
kb = resolve_target_kb(account["id"], request.knowledge_base_id, project["id"], username=account["username"])
assistant = resolve_target_assistant(account["id"], request.assistant_id, project["id"])
profile = model_profile_for_account(account["id"], request.analysis_model_profile_id)
source = create_content_source(
account_id=account["id"],
project_id=project["id"],
source_kind="video_link",
source_url=request.video_url.strip(),
title=(request.title or "短视频素材").strip(),
metadata={"platform": "video_link"},
)
job_row = create_job_record(
account_id=account["id"],
project_id=project["id"],
knowledge_base_id=kb["id"],
source_type="video_link",
line_type="analysis",
workflow_key="analysis_pipeline",
title=(request.title or "短视频素材").strip(),
language=request.language,
source_url=request.video_url.strip(),
assistant_id=(assistant or {}).get("id"),
content_source_id=source["id"],
artifacts={},
analysis_model_profile_id=profile["id"],
)
return job_payload(await trigger_orchestrated_job(job_row))
@app.post("/v2/explore/upload-video")
async def upload_video(
file: UploadFile = File(...),
title: str = Form(""),
project_id: str = Form(""),
knowledge_base_id: str = Form(""),
assistant_id: str = Form(""),
analysis_model_profile_id: str = Form(""),
account: dict[str, Any] = Depends(require_approved),
) -> dict[str, Any]:
project = resolve_target_project(account["id"], project_id or None, username=account["username"])
kb = resolve_target_kb(account["id"], knowledge_base_id or None, project["id"], username=account["username"])
assistant = resolve_target_assistant(account["id"], assistant_id or None, project["id"])
profile = model_profile_for_account(account["id"], analysis_model_profile_id or None)
job_id = make_id("job_upload")
job_dir = JOBS_DIR / job_id
job_dir.mkdir(parents=True, exist_ok=True)
suffix = Path(file.filename or "upload.mp4").suffix or ".mp4"
target_path = job_dir / f"source{suffix}"
with target_path.open("wb") as handle:
shutil.copyfileobj(file.file, handle)
source = create_content_source(
account_id=account["id"],
project_id=project["id"],
source_kind="upload_video",
source_url=file.filename or "",
title=(title or file.filename or "上传视频素材").strip(),
local_path=str(target_path),
metadata={"filename": file.filename or "", "size_bytes": target_path.stat().st_size},
)
job_row = create_job_record(
account_id=account["id"],
project_id=project["id"],
knowledge_base_id=kb["id"],
source_type="upload_video",
line_type="analysis",
workflow_key="analysis_pipeline",
title=(title or file.filename or "上传视频素材").strip(),
source_url=file.filename or "",
assistant_id=(assistant or {}).get("id"),
content_source_id=source["id"],
artifacts={"uploaded_path": str(target_path)},
analysis_model_profile_id=profile["id"],
)
return job_payload(await trigger_orchestrated_job(job_row))
@app.post("/v2/pipelines/real-cut")
async def create_real_cut_job(request: RealCutJobRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
source_job = None
source_job_id = request.source_job_id.strip()
if source_job_id:
source_job = load_owned_job(source_job_id, account["id"])
requested_project_id = request.project_id or (source_job.get("project_id", "") if source_job else "")
project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"])
if source_job and source_job.get("project_id") and source_job.get("project_id") != project["id"]:
raise HTTPException(status_code=400, detail="Source job does not belong to target project")
kb = ensure_user_kb(account["id"], project["id"], username=account["username"])
resolved_input_dir = request.input_dir.strip()
staged_payload: dict[str, Any] = {}
if not resolved_input_dir:
if not source_job:
raise HTTPException(status_code=400, detail="input_dir or source_job_id is required")
staged_payload = await stage_real_cut_source_to_cutvideo(source_job)
resolved_input_dir = staged_payload["input_dir"]
source_url = resolved_input_dir
source_metadata: dict[str, Any] = {"line_type": "real_cut"}
if source_job:
source_url = source_job.get("source_url") or resolved_input_dir
source_metadata["source_job_id"] = source_job["id"]
source_metadata["source_job_type"] = source_job.get("source_type", "")
if staged_payload:
source_metadata["cutvideo_upload"] = staged_payload.get("upload", {})
source_metadata["source_media_path"] = staged_payload.get("source_path", "")
source = create_content_source(
account_id=account["id"],
project_id=project["id"],
source_kind="real_cut_input",
title=request.title.strip(),
source_url=source_url,
local_path=resolved_input_dir,
metadata=source_metadata,
)
job_row = create_job_record(
account_id=account["id"],
project_id=project["id"],
knowledge_base_id=kb["id"],
source_type="real_cut",
line_type="real_cut",
workflow_key="real_cut_pipeline",
title=request.title.strip(),
source_url=resolved_input_dir,
content_source_id=source["id"],
artifacts={
"source_job_id": source_job["id"] if source_job else "",
"source_media_path": staged_payload.get("source_path", ""),
"cutvideo_upload": staged_payload.get("upload", {}),
"cutvideo_request": {
"base_config": request.base_config.strip() or CUTVIDEO_BASE_CONFIG,
"name": request.title.strip(),
"input_dir": resolved_input_dir,
"objective": request.objective,
"target_duration_sec": request.target_duration_sec,
"target_aspect_ratio": request.target_aspect_ratio,
"ideal_segment_duration_sec": request.ideal_segment_duration_sec,
"max_segment_duration_sec": request.max_segment_duration_sec,
"transcript_backend": request.transcript_backend,
"transcript_device": request.transcript_device,
"review_enabled": request.review_enabled,
"dry_run": request.dry_run,
}
},
)
return job_payload(await trigger_orchestrated_job(job_row))
@app.post("/v2/pipelines/ai-video")
async def create_ai_video_job(request: AiVideoJobRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
source_job = None
source_project_id = ""
source_kb_id = ""
if request.source_job_id.strip():
source_job = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (request.source_job_id.strip(), account["id"]))
if not source_job:
raise HTTPException(status_code=404, detail="Source job not found")
if source_job["status"] != "completed":
raise HTTPException(status_code=409, detail="Source job must be completed before AI video generation")
source_project_id = source_job.get("project_id", "")
source_kb_id = source_job.get("knowledge_base_id", "")
requested_project_id = request.project_id or source_project_id
project = resolve_target_project(account["id"], requested_project_id or None, username=account["username"])
kb = resolve_target_kb(account["id"], request.knowledge_base_id or source_kb_id or None, project["id"], username=account["username"])
assistant = resolve_target_assistant(account["id"], request.assistant_id or None, project["id"])
source = create_content_source(
account_id=account["id"],
project_id=project["id"],
source_kind="ai_video_brief",
title=request.title.strip(),
metadata={"source_job_id": request.source_job_id.strip()},
)
job_row = create_job_record(
account_id=account["id"],
project_id=project["id"],
knowledge_base_id=kb["id"],
source_type="ai_video",
line_type="ai_video",
workflow_key="ai_video_pipeline",
title=request.title.strip(),
assistant_id=(assistant or {}).get("id"),
content_source_id=source["id"],
artifacts={
"brief": request.brief,
"style": request.style,
"shots": request.shots,
"image_provider": request.image_provider,
"image_model": request.image_model,
"video_provider": request.video_provider,
"video_model": request.video_model,
"aspect_ratio": request.aspect_ratio,
"duration": request.duration,
"source_job_id": request.source_job_id.strip(),
},
)
return job_payload(await trigger_orchestrated_job(job_row))
@app.get("/v2/assistants")
def list_assistants(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]:
return [assistant_payload(row) for row in db.fetch_all("SELECT * FROM assistants WHERE user_id = ? ORDER BY created_at DESC", (account["id"],))]
@app.post("/v2/assistants")
def create_assistant(request: AssistantCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
assistant_id = make_id("assistant")
now = utc_now()
project = resolve_target_project(account["id"], request.project_id or None, username=account["username"])
model_profile = model_profile_for_account(account["id"], request.model_profile_id or None)
db.execute(
"""
INSERT INTO assistants (id, user_id, project_id, name, description, system_prompt, generation_goal, config_json, model_profile_id, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, '{}', ?, ?, ?)
""",
(
assistant_id,
account["id"],
project["id"],
request.name.strip(),
request.description.strip(),
request.system_prompt.strip(),
request.generation_goal.strip(),
model_profile["id"],
now,
now,
),
)
for kb_id in request.knowledge_base_ids:
kb = db.fetch_one("SELECT id FROM knowledge_bases WHERE id = ? AND user_id = ?", (kb_id, account["id"]))
if kb:
db.execute("INSERT OR IGNORE INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)", (assistant_id, kb_id))
return assistant_payload(db.fetch_one("SELECT * FROM assistants WHERE id = ?", (assistant_id,)))
@app.patch("/v2/assistants/{assistant_id}")
def update_assistant(assistant_id: str, request: AssistantUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
current = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (assistant_id, account["id"]))
if not current:
raise HTTPException(status_code=404, detail="Assistant not found")
project_id = current.get("project_id", "")
if request.project_id is not None:
project_id = resolve_target_project(account["id"], request.project_id, username=account["username"])["id"]
payload = {
"name": request.name if request.name is not None else current["name"],
"description": request.description if request.description is not None else current.get("description", ""),
"system_prompt": request.system_prompt if request.system_prompt is not None else current.get("system_prompt", ""),
"generation_goal": request.generation_goal if request.generation_goal is not None else current.get("generation_goal", ""),
"project_id": project_id,
"model_profile_id": current.get("model_profile_id", ""),
}
if request.model_profile_id is not None:
payload["model_profile_id"] = model_profile_for_account(account["id"], request.model_profile_id)["id"]
db.execute(
"""
UPDATE assistants
SET project_id = ?, name = ?, description = ?, system_prompt = ?, generation_goal = ?, model_profile_id = ?, updated_at = ?
WHERE id = ?
""",
(
payload["project_id"],
payload["name"],
payload["description"],
payload["system_prompt"],
payload["generation_goal"],
payload["model_profile_id"],
utc_now(),
assistant_id,
),
)
if request.knowledge_base_ids is not None:
db.execute("DELETE FROM assistant_knowledge_bases WHERE assistant_id = ?", (assistant_id,))
for kb_id in request.knowledge_base_ids:
kb = db.fetch_one("SELECT id FROM knowledge_bases WHERE id = ? AND user_id = ?", (kb_id, account["id"]))
if kb:
db.execute("INSERT OR IGNORE INTO assistant_knowledge_bases (assistant_id, knowledge_base_id) VALUES (?, ?)", (assistant_id, kb_id))
return assistant_payload(db.fetch_one("SELECT * FROM assistants WHERE id = ?", (assistant_id,)))
@app.get("/v2/agents")
def list_agents(account: dict[str, Any] = Depends(require_approved)) -> list[dict[str, Any]]:
return list_assistants(account)
@app.post("/v2/agents")
def create_agent(request: AssistantCreateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
return create_assistant(request, account)
@app.patch("/v2/agents/{assistant_id}")
def update_agent(assistant_id: str, request: AssistantUpdateRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
return update_assistant(assistant_id, request, account)
@app.post("/v2/assistants/{assistant_id}/generate")
async def generate_copy(assistant_id: str, request: GenerateCopyRequest, account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]:
assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ? AND user_id = ?", (assistant_id, account["id"]))
if not assistant:
raise HTTPException(status_code=404, detail="Assistant not found")
kb_ids = request.knowledge_base_ids or [row["knowledge_base_id"] for row in db.fetch_all("SELECT knowledge_base_id FROM assistant_knowledge_bases WHERE assistant_id = ?", (assistant_id,))]
used_documents: list[dict[str, Any]] = []
excerpts: list[str] = []
for kb_id in kb_ids:
docs = db.fetch_all("SELECT * FROM knowledge_documents WHERE knowledge_base_id = ? ORDER BY created_at DESC LIMIT 3", (kb_id,))
for doc in docs:
payload = document_payload(doc)
used_documents.append(payload)
excerpt = payload["combined_text"] or payload["style_summary"] or payload["transcript_text"]
excerpts.append(f"[{payload['title']}]\n{excerpt[:1200]}")
prompt_excerpt = "\n\n".join(excerpts)[:6000]
system_prompt = assistant.get("system_prompt") or "你是文案助手。"
generation_goal = assistant.get("generation_goal") or "生成短视频文案。"
user_prompt = (
f"任务目标:{generation_goal}\n"
f"创作需求:{request.brief}\n"
f"平台:{request.platform}\n"
f"受众:{request.audience}\n"
f"额外要求:{request.extra_requirements or ''}\n\n"
f"参考知识库素材:\n{prompt_excerpt or '暂无参考素材,请按通用短视频结构输出。'}\n\n"
"请输出完整文案,包含标题、开场钩子、正文结构和结尾行动指令。"
)
profile = model_profile_for_account(account["id"], assistant.get("model_profile_id") or None)
content = await call_model(profile, system_prompt, user_prompt, temperature=0.7)
return {
"assistant_id": assistant_id,
"knowledge_base_ids": kb_ids,
"content": content,
"prompt_excerpt": prompt_excerpt[:2000],
"used_documents": used_documents,
}
def load_owned_job(job_id: str, account_id: str) -> dict[str, Any]:
row = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (job_id, account_id))
if not row:
raise HTTPException(status_code=404, detail="Job not found")
return row
def load_owned_content_source(source_id: str, account_id: str) -> dict[str, Any]:
row = db.fetch_one("SELECT * FROM content_sources WHERE id = ? AND user_id = ?", (source_id, account_id))
if not row:
raise HTTPException(status_code=404, detail="Content source not found")
return row
def load_owned_review(review_id: str, account_id: str) -> dict[str, Any]:
row = db.fetch_one("SELECT * FROM publish_reviews WHERE id = ? AND user_id = ?", (review_id, account_id))
if not row:
raise HTTPException(status_code=404, detail="Review not found")
return row
def load_internal_job(job_id: str) -> dict[str, Any]:
row = db.fetch_one("SELECT * FROM jobs WHERE id = ?", (job_id,))
if not row:
raise HTTPException(status_code=404, detail="Job not found")
return row
def parse_job_artifacts(row: dict[str, Any]) -> dict[str, Any]:
raw = row.get("artifacts_json") or "{}"
try:
return json.loads(raw)
except json.JSONDecodeError:
return {}
def parse_job_result(row: dict[str, Any]) -> dict[str, Any]:
raw = row.get("result_json") or "{}"
try:
data = json.loads(raw)
return data if isinstance(data, dict) else {}
except json.JSONDecodeError:
return {}
def extract_source_storyboards(source_job: dict[str, Any] | None) -> list[dict[str, Any]]:
if not source_job:
return []
return coerce_storyboards(parse_job_result(source_job).get("storyboards"))
def resolve_internal_job_id(request: InternalStepRequest | None, query_job_id: str = "") -> str:
resolved = (query_job_id or "").strip()
if not resolved and request is not None:
resolved = (
request.job_id
or request.jobId
or str(request.payload.get("job_id") or request.payload.get("jobId") or "")
).strip()
return resolved
def load_step_job(request: InternalStepRequest | None, query_job_id: str, workflow_key: str) -> dict[str, Any]:
resolved_job_id = resolve_internal_job_id(request, query_job_id)
if resolved_job_id:
return load_internal_job(resolved_job_id)
row = db.fetch_one(
"""
SELECT * FROM jobs
WHERE workflow_key = ? AND status IN ('pending', 'queued')
ORDER BY created_at ASC
LIMIT 1
""",
(workflow_key,),
)
if not row:
raise HTTPException(status_code=400, detail="job_id is required")
return row
@app.get("/internal/jobs/{job_id}/context")
def internal_job_context(job_id: str, _: bool = Depends(require_orchestrator)) -> dict[str, Any]:
return job_context_payload(load_internal_job(job_id))
@app.post("/internal/jobs/steps/analyze")
async def internal_run_analysis(
request: InternalStepRequest | None = Body(default=None),
job_id: str = Query(default=""),
_: bool = Depends(require_orchestrator),
) -> dict[str, Any]:
row = load_step_job(request, job_id, "analysis_pipeline")
await process_job(row["id"])
return job_context_payload(load_internal_job(row["id"]))
@app.post("/internal/jobs/steps/content-source-sync")
async def internal_content_source_sync(
request: InternalStepRequest | None = Body(default=None),
job_id: str = Query(default=""),
_: bool = Depends(require_orchestrator),
) -> dict[str, Any]:
row = load_step_job(request, job_id, "content_source_sync_pipeline")
artifacts = parse_job_artifacts(row)
source_url = str(artifacts.get("source_account_url") or row.get("source_url") or "").strip()
if not source_url:
raise HTTPException(status_code=400, detail="Content source sync job is missing source URL")
max_items = max(1, min(int(artifacts.get("max_items") or 5), 20))
skip_existing = bool(artifacts.get("skip_existing", True))
auto_trigger_analysis = bool(artifacts.get("auto_trigger_analysis", True))
update_job_state(
row["id"],
status="processing",
provider_name="collector",
provider_task_id=row["id"],
result={"sync_started": True},
)
try:
discovered_items, debug_payload = discover_account_video_links(source_url, max_items)
child_jobs: list[dict[str, Any]] = []
queued_jobs: list[dict[str, Any]] = []
skipped_items: list[dict[str, Any]] = []
for index, item in enumerate(discovered_items, start=1):
video_url = str(item.get("video_url") or "").strip()
if not video_url:
continue
existing_row = db.fetch_one(
"""
SELECT * FROM jobs
WHERE user_id = ? AND project_id = ? AND source_type = 'video_link' AND source_url = ?
ORDER BY created_at DESC
LIMIT 1
""",
(row["user_id"], row.get("project_id", ""), video_url),
)
if existing_row and skip_existing:
skipped_items.append(
{
"video_url": video_url,
"title": item.get("title") or existing_row.get("title") or "短视频素材",
"existing_job_id": existing_row["id"],
"existing_status": existing_row.get("status", ""),
}
)
continue
content_source = create_content_source(
account_id=row["user_id"],
project_id=row.get("project_id", ""),
source_kind="video_link",
platform=str(artifacts.get("platform") or infer_platform_from_url(video_url)),
handle=str(artifacts.get("handle") or ""),
source_url=video_url,
title=str(item.get("title") or f"内容源视频 {index}"),
metadata={
"origin_content_source_id": row.get("content_source_id", ""),
"origin_sync_job_id": row["id"],
"external_id": str(item.get("external_id") or ""),
"source_account_url": source_url,
},
)
child_row = create_job_record(
account_id=row["user_id"],
project_id=row.get("project_id", ""),
parent_job_id=row["id"],
knowledge_base_id=row["knowledge_base_id"],
source_type="video_link",
line_type="analysis",
workflow_key="analysis_pipeline",
title=str(item.get("title") or f"内容源视频 {index}"),
language=row.get("language", "auto"),
source_url=video_url,
assistant_id=row.get("assistant_id"),
content_source_id=content_source["id"],
artifacts={
"origin_content_source_id": row.get("content_source_id", ""),
"origin_sync_job_id": row["id"],
"source_account_url": source_url,
},
analysis_model_profile_id=row.get("analysis_model_profile_id", ""),
)
child_jobs.append(job_payload(child_row))
if auto_trigger_analysis:
queued_child = await trigger_orchestrated_job(child_row)
queued_jobs.append(job_payload(queued_child))
if row.get("content_source_id"):
update_content_source_metadata(
row["content_source_id"],
{
"last_sync_job_id": row["id"],
"last_sync_completed_at": utc_now(),
"last_discovered_count": len(discovered_items),
"last_enqueued_job_ids": [item["id"] for item in queued_jobs] or [item["id"] for item in child_jobs],
"last_skipped_existing_count": len(skipped_items),
"last_source_account_url": source_url,
"last_sync_error": "",
},
)
updated = update_job_state(
row["id"],
status="completed",
provider_name="collector",
provider_task_id=row["id"],
artifacts={
**debug_payload,
"discovered_videos": discovered_items,
"skipped_existing": skipped_items,
"child_job_ids": [item["id"] for item in child_jobs],
"queued_job_ids": [item["id"] for item in queued_jobs],
},
result={
"discovered_count": len(discovered_items),
"queued_count": len(queued_jobs) if auto_trigger_analysis else len(child_jobs),
"skipped_count": len(skipped_items),
"child_jobs": queued_jobs or child_jobs,
"skipped_existing": skipped_items,
},
)
return job_context_payload(updated)
except HTTPException as exc:
error = str(exc.detail)
except Exception as exc:
error = str(exc)
if row.get("content_source_id"):
update_content_source_metadata(
row["content_source_id"],
{
"last_sync_job_id": row["id"],
"last_sync_completed_at": utc_now(),
"last_sync_error": error[:500],
"last_source_account_url": source_url,
},
)
updated = update_job_state(
row["id"],
status="failed",
error=error[:500],
provider_name="collector",
provider_task_id=row["id"],
)
return job_context_payload(updated)
@app.post("/internal/jobs/steps/real-cut/submit")
async def internal_real_cut_submit(
request: InternalStepRequest | None = Body(default=None),
job_id: str = Query(default=""),
_: bool = Depends(require_orchestrator),
) -> dict[str, Any]:
if not cutvideo_client.enabled:
raise HTTPException(status_code=503, detail="CutVideo is not configured")
row = load_step_job(request, job_id, "real_cut_pipeline")
artifacts = parse_job_artifacts(row)
cutvideo_request = artifacts.get("cutvideo_request") or {}
if not isinstance(cutvideo_request, dict):
raise HTTPException(status_code=400, detail="Invalid cutvideo request payload")
append_job_event(row["id"], "cutvideo.submit.requested", cutvideo_request)
submit_result = await cutvideo_client.submit_job(cutvideo_request)
task_id = str(submit_result.get("task_id") or "")
updated = update_job_state(
row["id"],
status="processing",
provider_name="cutvideo",
provider_task_id=task_id,
result={"cutvideo_submit": submit_result},
)
return job_context_payload(updated)
@app.post("/internal/jobs/steps/real-cut/poll")
async def internal_real_cut_poll(
request: InternalStepRequest | None = Body(default=None),
job_id: str = Query(default=""),
_: bool = Depends(require_orchestrator),
) -> dict[str, Any]:
row = load_step_job(request, job_id, "real_cut_pipeline")
if not row.get("provider_task_id"):
raise HTTPException(status_code=409, detail="CutVideo task has not been submitted")
task_payload = await cutvideo_client.get_task(row["provider_task_id"])
status = str(task_payload.get("status") or "").lower()
run_payload: dict[str, Any] = {}
artifacts: dict[str, Any] = {"cutvideo_task": task_payload}
next_status = row["status"]
error = row.get("error", "")
if status == "completed":
next_status = "completed"
run_id = str(task_payload.get("run_id") or "")
if run_id:
run_payload = await cutvideo_client.get_run(run_id)
artifacts["cutvideo_run"] = run_payload
elif status == "failed":
next_status = "failed"
error = str(task_payload.get("error") or "CutVideo task failed")
else:
next_status = "processing"
updated = update_job_state(
row["id"],
status=next_status,
error=error,
provider_name="cutvideo",
provider_task_id=row["provider_task_id"],
artifacts=artifacts,
result={"cutvideo_run": run_payload} if run_payload else {"cutvideo_task": task_payload},
)
return job_context_payload(updated)
@app.post("/internal/jobs/steps/real-cut/run")
async def internal_real_cut_run(
request: InternalStepRequest | None = Body(default=None),
job_id: str = Query(default=""),
_: bool = Depends(require_orchestrator),
) -> dict[str, Any]:
if not cutvideo_client.enabled:
raise HTTPException(status_code=503, detail="CutVideo is not configured")
row = load_step_job(request, job_id, "real_cut_pipeline")
if not row.get("provider_task_id"):
artifacts = parse_job_artifacts(row)
cutvideo_request = artifacts.get("cutvideo_request") or {}
if not isinstance(cutvideo_request, dict):
raise HTTPException(status_code=400, detail="Invalid cutvideo request payload")
submit_result = await cutvideo_client.submit_job(cutvideo_request)
row = update_job_state(
row["id"],
status="processing",
provider_name="cutvideo",
provider_task_id=str(submit_result.get("task_id") or ""),
result={"cutvideo_submit": submit_result},
)
deadline = now_ts() + HUOBAO_MAX_WAIT_SEC
while True:
task_payload = await cutvideo_client.get_task(row["provider_task_id"])
status = str(task_payload.get("status") or "").lower()
if status == "completed":
run_payload: dict[str, Any] = {}
run_id = str(task_payload.get("run_id") or "")
if run_id:
run_payload = await cutvideo_client.get_run(run_id)
updated = update_job_state(
row["id"],
status="completed",
provider_name="cutvideo",
provider_task_id=row["provider_task_id"],
artifacts={"cutvideo_task": task_payload, "cutvideo_run": run_payload},
result={"cutvideo_task": task_payload, "cutvideo_run": run_payload},
)
return job_context_payload(updated)
if status == "failed":
updated = update_job_state(
row["id"],
status="failed",
error=str(task_payload.get("error") or "CutVideo task failed"),
provider_name="cutvideo",
provider_task_id=row["provider_task_id"],
artifacts={"cutvideo_task": task_payload},
result={"cutvideo_task": task_payload},
)
return job_context_payload(updated)
if now_ts() >= deadline:
updated = update_job_state(
row["id"],
status="failed",
error="CutVideo task timed out",
provider_name="cutvideo",
provider_task_id=row["provider_task_id"],
artifacts={"cutvideo_task": task_payload},
result={"cutvideo_task": task_payload},
)
return job_context_payload(updated)
await asyncio.sleep(CUTVIDEO_POLL_INTERVAL_SEC)
row = load_internal_job(row["id"])
@app.post("/internal/jobs/steps/ai-video/render")
async def internal_ai_video_render(
request: InternalStepRequest | None = Body(default=None),
job_id: str = Query(default=""),
_: bool = Depends(require_orchestrator),
) -> dict[str, Any]:
if not huobao_client.enabled:
raise HTTPException(status_code=503, detail="Huobao is not configured")
row = load_step_job(request, job_id, "ai_video_pipeline")
artifacts = parse_job_artifacts(row)
assistant = db.fetch_one("SELECT * FROM assistants WHERE id = ?", (row["assistant_id"],)) if row.get("assistant_id") else None
source_job = None
source_storyboards: list[dict[str, Any]] = []
source_job_id = str(artifacts.get("source_job_id") or "").strip()
if source_job_id:
source_job = db.fetch_one("SELECT * FROM jobs WHERE id = ? AND user_id = ?", (source_job_id, row["user_id"]))
if source_job:
source_storyboards = extract_source_storyboards(source_job)
if source_storyboards:
storyboard_items = source_storyboards[: max(int(artifacts.get("shots") or 4), 1)]
else:
profile = model_profile_for_account(row["user_id"], row.get("analysis_model_profile_id") or None)
blueprint = await generate_content_blueprint(
profile,
title=row["title"],
transcript_text=str(artifacts.get("brief") or row["title"]),
style_summary=str(artifacts.get("style") or ""),
agent_prompt=(assistant or {}).get("system_prompt", ""),
generation_goal=(assistant or {}).get("generation_goal", "") or "生成适合视频模型的分镜与提示词",
)
storyboard_items = coerce_storyboards(blueprint.get("storyboards"))[: max(int(artifacts.get("shots") or 4), 1)]
if not storyboard_items:
raise HTTPException(status_code=400, detail="No storyboards available for AI video rendering")
drama_payload = await huobao_client.create_drama(
{
"title": row["title"],
"description": str(artifacts.get("brief") or row["title"]),
"style": str(artifacts.get("style") or "realistic"),
"genre": "short_video",
"tags": "storyforge",
}
)
drama_id = str(drama_payload.get("id") or "")
if not drama_id:
raise RuntimeError("Huobao did not return drama id")
update_job_state(
row["id"],
status="processing",
provider_name="huobao-drama",
provider_task_id=drama_id,
result={"huobao_drama": drama_payload},
)
rendered_scenes: list[dict[str, Any]] = []
image_provider = str(artifacts.get("image_provider") or "openai")
image_model = str(artifacts.get("image_model") or "")
video_provider = str(artifacts.get("video_provider") or "doubao")
video_model = str(artifacts.get("video_model") or "")
aspect_ratio = str(artifacts.get("aspect_ratio") or "9:16")
image_size = huobao_image_size_for_aspect_ratio(aspect_ratio)
duration = int(artifacts.get("duration") or 5)
style = str(artifacts.get("style") or "realistic")
for idx, storyboard in enumerate(storyboard_items, start=1):
first_prompt = str(storyboard.get("first_frame_prompt") or storyboard.get("visual") or storyboard.get("title") or row["title"])
last_prompt = str(storyboard.get("last_frame_prompt") or storyboard.get("visual") or storyboard.get("title") or row["title"])
video_prompt = str(storyboard.get("video_prompt") or storyboard.get("narration") or storyboard.get("title") or row["title"])
first_image = await huobao_client.generate_image(
{
"drama_id": drama_id,
"image_type": "storyboard",
"frame_type": "first",
"prompt": first_prompt,
"provider": image_provider,
"model": image_model,
"size": image_size,
"style": style,
}
)
last_image = await huobao_client.generate_image(
{
"drama_id": drama_id,
"image_type": "storyboard",
"frame_type": "last",
"prompt": last_prompt,
"provider": image_provider,
"model": image_model,
"size": image_size,
"style": style,
}
)
first_ready = await wait_for_huobao_image(str(first_image.get("id") or ""))
last_ready = await wait_for_huobao_image(str(last_image.get("id") or ""))
if str(first_ready.get("status") or "").lower() != "completed":
raise RuntimeError(f"First frame generation failed for scene {idx}")
if str(last_ready.get("status") or "").lower() != "completed":
raise RuntimeError(f"Last frame generation failed for scene {idx}")
first_frame_url = first_ready.get("image_url") or first_ready.get("local_path")
last_frame_url = last_ready.get("image_url") or last_ready.get("local_path")
if not first_frame_url or not last_frame_url:
raise RuntimeError(f"Huobao image output missing for scene {idx}")
video_payload = await huobao_client.generate_video(
{
"drama_id": drama_id,
"prompt": video_prompt,
"provider": video_provider,
"model": video_model,
"reference_mode": "first_last",
"first_frame_url": first_frame_url,
"last_frame_url": last_frame_url,
"aspect_ratio": aspect_ratio,
"duration": duration,
"style": style,
}
)
video_ready = await wait_for_huobao_video(str(video_payload.get("id") or ""))
if str(video_ready.get("status") or "").lower() != "completed":
raise RuntimeError(f"Video generation failed for scene {idx}")
rendered_scenes.append(
{
"shot_index": storyboard.get("shot_index", idx),
"title": storyboard.get("title", f"镜头{idx}"),
"narration": storyboard.get("narration", ""),
"first_frame": first_ready,
"last_frame": last_ready,
"video": video_ready,
}
)
updated = update_job_state(
row["id"],
status="completed",
provider_name="huobao-drama",
provider_task_id=drama_id,
artifacts={
"huobao_drama_id": drama_id,
"source_job_id": source_job_id,
},
result={
"huobao_drama": drama_payload,
"rendered_scenes": rendered_scenes,
"storyboards": storyboard_items,
},
)
return job_context_payload(updated)
@app.post("/internal/jobs/{job_id}/status")
def internal_update_job_status(job_id: str, request: JobStatusUpdateRequest, _: bool = Depends(require_orchestrator)) -> dict[str, Any]:
updated = update_job_state(
job_id,
status=request.status,
error=request.error,
provider_name=request.provider_name or None,
provider_task_id=request.provider_task_id or None,
artifacts=request.artifacts,
result=request.result,
)
return job_context_payload(updated)
@app.get("/v2/admin/accounts/pending")
def pending_accounts(admin: dict[str, Any] = Depends(require_super_admin)) -> list[dict[str, Any]]:
rows = db.fetch_all("SELECT * FROM accounts WHERE approval_status = 'pending' ORDER BY created_at ASC")
return [normalize_account(row) for row in rows]
@app.post("/v2/admin/accounts/{account_id}/approve")
def approve_account(account_id: str, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]:
account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,))
if not account:
raise HTTPException(status_code=404, detail="Account not found")
db.execute(
"UPDATE accounts SET approval_status = 'approved', approved_by = ?, approved_at = ?, updated_at = ? WHERE id = ?",
(admin["id"], utc_now(), utc_now(), account_id),
)
approved = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,))
project = ensure_default_project(account_id, username=approved["username"])
ensure_user_kb(account_id, project["id"], username=approved["username"])
return {"saved": True, "account": normalize_account(approved)}
@app.post("/v2/admin/accounts/{account_id}/reject")
def reject_account(account_id: str, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]:
account = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,))
if not account:
raise HTTPException(status_code=404, detail="Account not found")
db.execute(
"UPDATE accounts SET approval_status = 'rejected', approved_by = ?, approved_at = ?, updated_at = ? WHERE id = ?",
(admin["id"], utc_now(), utc_now(), account_id),
)
rejected = db.fetch_one("SELECT * FROM accounts WHERE id = ?", (account_id,))
return {"saved": True, "account": normalize_account(rejected)}
@app.get("/api/v1/app/update/latest")
def latest_update(
platform: str = Query(default="android"),
channel: str = Query(default="stable"),
currentVersionCode: int | None = Query(default=None),
) -> dict[str, Any]:
row = db.fetch_one(
"SELECT * FROM app_updates WHERE platform = ? AND channel = ? AND is_active = 1 ORDER BY version_code DESC, published_at DESC LIMIT 1",
(platform, channel),
)
if not row:
return {
"platform": platform,
"channel": channel,
"hasUpdate": False,
"latestVersionCode": currentVersionCode or 0,
"latestVersionName": "",
"minSupportedCode": 0,
"downloadUrl": "",
"apkSha256": "",
"releaseNotes": "",
"forceUpdate": False,
"publishedAt": 0,
}
latest_version_code = int(row["version_code"])
return {
"platform": row["platform"],
"channel": row["channel"],
"hasUpdate": currentVersionCode is None or latest_version_code > currentVersionCode,
"latestVersionCode": latest_version_code,
"latestVersionName": row["version_name"],
"minSupportedCode": int(row["min_supported_code"]),
"downloadUrl": row["apk_url"],
"apkSha256": row.get("apk_sha256", ""),
"releaseNotes": row.get("notes", ""),
"forceUpdate": bool(row.get("force_update", 0)),
"publishedAt": int(row.get("published_at", 0)),
}
@app.post("/v2/admin/app/update/publish")
def publish_app_update(request: PublishAppUpdateRequest, admin: dict[str, Any] = Depends(require_super_admin)) -> dict[str, Any]:
db.execute(
"UPDATE app_updates SET is_active = 0 WHERE platform = ? AND channel = ?",
(request.platform, request.channel),
)
db.execute(
"""
INSERT INTO app_updates (
platform, channel, version_code, version_name, min_supported_code,
apk_url, apk_sha256, notes, force_update, is_active, published_at, created_by
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
request.platform,
request.channel,
request.versionCode,
request.versionName,
request.minSupportedCode,
request.apkUrl,
request.apkSha256,
request.notes,
1 if request.forceUpdate else 0,
1 if request.isActive else 0,
now_ts(),
admin["id"],
),
)
row = db.fetch_one(
"""
SELECT id
FROM app_updates
WHERE platform = ? AND channel = ? AND version_code = ?
ORDER BY id DESC
LIMIT 1
""",
(request.platform, request.channel, request.versionCode),
)
return {"saved": True, "action": "published", "updateId": row["id"] if row else 0}
register_douyin_routes(app, sys.modules[__name__])