diff --git a/CHANGELOG.md b/CHANGELOG.md index f288fb8..4ad8ee4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Windows `ASR HTTP` 现在在 `auto` 模式下仍会优先尝试 `cuda + int8_float16`,但如果在真正推理阶段命中 `cublas/cudnn/cuda` 运行库缺失,会自动切回 `cpu + int8` 重试,不再把整次转写卡死在 GPU 路径。 - 这让“默认优先用 GPU、但当前机器 CUDA 运行库不完整”的场景也能稳定返回结果,同时保留混合中英文自动识别。 - `smoke_public_storyforge.sh` 与 `smoke_fnos_storyforge_lan.sh` 现在会覆盖 ASR 转写链路,公网 smoke 在遇到 `127.0.0.1` 这类服务器内网地址时会自动跳过真实转写,避免在开发机上误判。 +- Windows ASR 运行时现在会自动发现 venv 里的 `nvidia-cublas-cu12 / nvidia-cuda-runtime-cu12 / nvidia-cudnn-cu12` DLL 目录并注入搜索路径;实机验证后 `active_device` 已经恢复为 `cuda`,不再长期回退到 CPU。 ### Windows ASR 默认改成 GPU 优先与自动语言识别 diff --git a/deploy/storyforge-windows-asr-http/app.py b/deploy/storyforge-windows-asr-http/app.py index a3b4b28..7e32348 100644 --- a/deploy/storyforge-windows-asr-http/app.py +++ b/deploy/storyforge-windows-asr-http/app.py @@ -1,6 +1,8 @@ from __future__ import annotations import os +import sys +import sysconfig import tempfile import time from functools import lru_cache @@ -15,6 +17,7 @@ VAD_FILTER = os.getenv("WHISPER_VAD_FILTER", "1").strip().lower() not in {"0", " DOWNLOAD_ROOT = Path(os.getenv("WHISPER_DOWNLOAD_ROOT", str(Path(__file__).resolve().parent / "models-cache"))) app = FastAPI(title="storyforge-windows-asr", version="1.0.0") +_dll_handles: list[object] = [] def describe_language_mode() -> str: @@ -66,8 +69,44 @@ def activate_cpu_fallback() -> None: get_model.cache_clear() +def find_windows_cuda_runtime_dirs(site_packages_root: Path | None = None) -> list[Path]: + root = site_packages_root or Path(sysconfig.get_paths()["purelib"]) + dirs = [] + for rel in ( + "nvidia/cublas/bin", + "nvidia/cuda_runtime/bin", + "nvidia/cuda_nvrtc/bin", + "nvidia/cudnn/bin", + ): + path = root / rel + if path.exists(): + dirs.append(path) + return dirs + + +def configure_windows_cuda_runtime() -> None: + if sys.platform != "win32": + return + configured = getattr(app.state, "windows_cuda_runtime_dirs", None) + if configured is not None: + return + runtime_dirs = find_windows_cuda_runtime_dirs() + app.state.windows_cuda_runtime_dirs = [str(path) for path in runtime_dirs] + if not runtime_dirs: + return + path_parts = os.environ.get("PATH", "").split(os.pathsep) + for runtime_dir in runtime_dirs: + runtime_dir_str = str(runtime_dir) + if runtime_dir_str not in path_parts: + path_parts.insert(0, runtime_dir_str) + if hasattr(os, "add_dll_directory"): + _dll_handles.append(os.add_dll_directory(runtime_dir_str)) + os.environ["PATH"] = os.pathsep.join(path_parts) + + @lru_cache(maxsize=1) def get_model(): + configure_windows_cuda_runtime() from faster_whisper import WhisperModel DOWNLOAD_ROOT.mkdir(parents=True, exist_ok=True) @@ -91,6 +130,7 @@ def get_model(): @app.get("/health") def health() -> dict[str, object]: + configure_windows_cuda_runtime() return { "status": "ok", "service": "storyforge-windows-asr", @@ -102,6 +142,7 @@ def health() -> dict[str, object]: "active_compute_type": getattr(app.state, "runtime_compute_type", ""), "download_root": str(DOWNLOAD_ROOT), "model_loaded": get_model.cache_info().currsize > 0, + "windows_cuda_runtime_dirs": getattr(app.state, "windows_cuda_runtime_dirs", []), } diff --git a/deploy/storyforge-windows-asr-http/requirements.txt b/deploy/storyforge-windows-asr-http/requirements.txt index 9ea4c4c..341b5ad 100644 --- a/deploy/storyforge-windows-asr-http/requirements.txt +++ b/deploy/storyforge-windows-asr-http/requirements.txt @@ -2,3 +2,6 @@ fastapi==0.116.1 uvicorn[standard]==0.35.0 python-multipart==0.0.20 faster-whisper>=1.1,<2 +nvidia-cublas-cu12; platform_system == "Windows" +nvidia-cuda-runtime-cu12; platform_system == "Windows" +nvidia-cudnn-cu12; platform_system == "Windows" diff --git a/tests/test_windows_asr_http.py b/tests/test_windows_asr_http.py index be09519..b5e1c2c 100644 --- a/tests/test_windows_asr_http.py +++ b/tests/test_windows_asr_http.py @@ -71,6 +71,30 @@ class WindowsAsrHttpTests(unittest.TestCase): self.assertEqual(module.describe_language_mode(), "zh") self.assertEqual(module.build_runtime_profiles(), [("cpu", "int8")]) + def test_windows_cuda_runtime_discovery_finds_nvidia_wheel_bins(self) -> None: + module = load_windows_asr_app() + with tempfile.TemporaryDirectory() as tempdir: + root = Path(tempdir) + for rel in [ + "nvidia/cublas/bin", + "nvidia/cuda_runtime/bin", + "nvidia/cuda_nvrtc/bin", + "nvidia/cudnn/bin", + ]: + (root / rel).mkdir(parents=True, exist_ok=True) + + dirs = module.find_windows_cuda_runtime_dirs(root) + + self.assertEqual( + [path.as_posix() for path in dirs], + [ + (root / "nvidia/cublas/bin").as_posix(), + (root / "nvidia/cuda_runtime/bin").as_posix(), + (root / "nvidia/cuda_nvrtc/bin").as_posix(), + (root / "nvidia/cudnn/bin").as_posix(), + ], + ) + def test_auto_runtime_falls_back_to_cpu_when_cuda_runtime_is_missing(self) -> None: os.environ.pop("WHISPER_LANGUAGE", None) os.environ.pop("WHISPER_DEVICE", None)