diff --git a/collector-service/app/core_main.py b/collector-service/app/core_main.py index 57c089c..17de693 100644 --- a/collector-service/app/core_main.py +++ b/collector-service/app/core_main.py @@ -478,6 +478,112 @@ def legacy_job_storage_dir(job_id: str) -> Path: return JOBS_DIR / storage_token(job_id, "job") +def job_account_root(account_id: str) -> Path: + return JOBS_DIR / storage_token(account_id, "anonymous") + + +def job_project_root(account_id: str, project_id: str | None) -> Path: + return job_account_root(account_id) / storage_token(project_id, "default-project") + + +def tenant_download_root(account_id: str, project_id: str | None) -> Path: + return DOWNLOADS_DIR / storage_token(account_id, "anonymous") / storage_token(project_id, "default-project") + + +def storage_mode(path: Path) -> str: + text = str(path) + if text.startswith("/Users/kris/mnt/fnos-share"): + return "nas" + return "local" + + +def disk_usage_payload(path: Path) -> dict[str, Any]: + probe = path if path.exists() else path.parent + try: + total, used, free = shutil.disk_usage(probe) + return { + "path": str(path), + "mode": storage_mode(path), + "total_bytes": total, + "used_bytes": used, + "free_bytes": free, + } + except Exception as exc: + return { + "path": str(path), + "mode": storage_mode(path), + "error": str(exc), + } + + +def directory_usage_payload(path: Path) -> dict[str, Any]: + if not path.exists(): + return { + "path": str(path), + "mode": storage_mode(path), + "exists": False, + "file_count": 0, + "dir_count": 0, + "bytes": 0, + } + total_bytes = 0 + file_count = 0 + dir_count = 0 + for root, dirnames, filenames in os.walk(path): + dir_count += len(dirnames) + for filename in filenames: + file_count += 1 + try: + total_bytes += (Path(root) / filename).stat().st_size + except OSError: + continue + return { + "path": str(path), + "mode": storage_mode(path), + "exists": True, + "file_count": file_count, + "dir_count": dir_count, + "bytes": total_bytes, + } + + +def recent_job_storage_examples(account_id: str, project_id: str | None, limit: int = 5) -> list[dict[str, Any]]: + clauses = ["user_id = ?"] + params: list[Any] = [account_id] + normalized_project_id = str(project_id or "").strip() + if normalized_project_id: + clauses.append("project_id = ?") + params.append(normalized_project_id) + rows = db.fetch_all( + f""" + SELECT id, title, status, updated_at, artifacts_json + FROM jobs + WHERE {" AND ".join(clauses)} + ORDER BY updated_at DESC + LIMIT ? + """, + tuple(params + [limit]), + ) + items: list[dict[str, Any]] = [] + for row in rows: + artifacts = parse_json_object(row.get("artifacts_json") or "{}") + paths = [] + for key in ("uploaded_path", "source_path", "audio_path", "transcript_path", "project_job_dir"): + value = str(artifacts.get(key) or "").strip() + if value: + paths.append({"key": key, "path": value}) + items.append( + { + "job_id": row["id"], + "title": row.get("title", "") or row["id"], + "status": row.get("status", ""), + "updated_at": row.get("updated_at", ""), + "paths": paths, + } + ) + return items + + def live_recorder_remote_name(source_id: str) -> str: return f"{LIVE_RECORDER_MANAGED_PREFIX}{source_id.replace('-', '')[:12]}" @@ -2313,6 +2419,41 @@ def integrations_health(account: dict[str, Any] = Depends(require_approved)) -> } +@app.get("/v2/storage/status") +def storage_status( + project_id: str | None = Query(default=None), + account: dict[str, Any] = Depends(require_approved), +) -> dict[str, Any]: + normalized_project_id = (project_id or "").strip() + if normalized_project_id: + resolve_target_project(account["id"], normalized_project_id, username=account["username"]) + jobs_account_root = job_account_root(account["id"]) + jobs_project_root = job_project_root(account["id"], normalized_project_id or None) + downloads_project_root = tenant_download_root(account["id"], normalized_project_id or None) + return { + "strategy": { + "database": {"mode": storage_mode(Path(DB_PATH)), "path": DB_PATH}, + "jobs": {"mode": storage_mode(JOBS_DIR), "path": str(JOBS_DIR)}, + "downloads": {"mode": storage_mode(DOWNLOADS_DIR), "path": str(DOWNLOADS_DIR)}, + "models": {"mode": storage_mode(MODELS_DIR), "path": str(MODELS_DIR)}, + "live_recorder": {"mode": "nas_service", "base_url": LIVE_RECORDER_BASE_URL}, + }, + "disk": { + "database": disk_usage_payload(Path(DB_PATH)), + "jobs": disk_usage_payload(JOBS_DIR), + "downloads": disk_usage_payload(DOWNLOADS_DIR), + "models": disk_usage_payload(MODELS_DIR), + }, + "tenant_usage": { + "account_jobs": directory_usage_payload(jobs_account_root), + "project_jobs": directory_usage_payload(jobs_project_root), + "project_downloads": directory_usage_payload(downloads_project_root), + "project_id": normalized_project_id, + "recent_jobs": recent_job_storage_examples(account["id"], normalized_project_id or None, limit=6), + }, + } + + @app.get("/v2/integrations/local-models") def integrations_local_models(account: dict[str, Any] = Depends(require_approved)) -> dict[str, Any]: _ = account