fix: restore windows asr gpu runtime
This commit is contained in:
@@ -9,6 +9,7 @@
|
|||||||
- Windows `ASR HTTP` 现在在 `auto` 模式下仍会优先尝试 `cuda + int8_float16`,但如果在真正推理阶段命中 `cublas/cudnn/cuda` 运行库缺失,会自动切回 `cpu + int8` 重试,不再把整次转写卡死在 GPU 路径。
|
- Windows `ASR HTTP` 现在在 `auto` 模式下仍会优先尝试 `cuda + int8_float16`,但如果在真正推理阶段命中 `cublas/cudnn/cuda` 运行库缺失,会自动切回 `cpu + int8` 重试,不再把整次转写卡死在 GPU 路径。
|
||||||
- 这让“默认优先用 GPU、但当前机器 CUDA 运行库不完整”的场景也能稳定返回结果,同时保留混合中英文自动识别。
|
- 这让“默认优先用 GPU、但当前机器 CUDA 运行库不完整”的场景也能稳定返回结果,同时保留混合中英文自动识别。
|
||||||
- `smoke_public_storyforge.sh` 与 `smoke_fnos_storyforge_lan.sh` 现在会覆盖 ASR 转写链路,公网 smoke 在遇到 `127.0.0.1` 这类服务器内网地址时会自动跳过真实转写,避免在开发机上误判。
|
- `smoke_public_storyforge.sh` 与 `smoke_fnos_storyforge_lan.sh` 现在会覆盖 ASR 转写链路,公网 smoke 在遇到 `127.0.0.1` 这类服务器内网地址时会自动跳过真实转写,避免在开发机上误判。
|
||||||
|
- Windows ASR 运行时现在会自动发现 venv 里的 `nvidia-cublas-cu12 / nvidia-cuda-runtime-cu12 / nvidia-cudnn-cu12` DLL 目录并注入搜索路径;实机验证后 `active_device` 已经恢复为 `cuda`,不再长期回退到 CPU。
|
||||||
|
|
||||||
### Windows ASR 默认改成 GPU 优先与自动语言识别
|
### Windows ASR 默认改成 GPU 优先与自动语言识别
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
import sysconfig
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
@@ -15,6 +17,7 @@ VAD_FILTER = os.getenv("WHISPER_VAD_FILTER", "1").strip().lower() not in {"0", "
|
|||||||
DOWNLOAD_ROOT = Path(os.getenv("WHISPER_DOWNLOAD_ROOT", str(Path(__file__).resolve().parent / "models-cache")))
|
DOWNLOAD_ROOT = Path(os.getenv("WHISPER_DOWNLOAD_ROOT", str(Path(__file__).resolve().parent / "models-cache")))
|
||||||
|
|
||||||
app = FastAPI(title="storyforge-windows-asr", version="1.0.0")
|
app = FastAPI(title="storyforge-windows-asr", version="1.0.0")
|
||||||
|
_dll_handles: list[object] = []
|
||||||
|
|
||||||
|
|
||||||
def describe_language_mode() -> str:
|
def describe_language_mode() -> str:
|
||||||
@@ -66,8 +69,44 @@ def activate_cpu_fallback() -> None:
|
|||||||
get_model.cache_clear()
|
get_model.cache_clear()
|
||||||
|
|
||||||
|
|
||||||
|
def find_windows_cuda_runtime_dirs(site_packages_root: Path | None = None) -> list[Path]:
|
||||||
|
root = site_packages_root or Path(sysconfig.get_paths()["purelib"])
|
||||||
|
dirs = []
|
||||||
|
for rel in (
|
||||||
|
"nvidia/cublas/bin",
|
||||||
|
"nvidia/cuda_runtime/bin",
|
||||||
|
"nvidia/cuda_nvrtc/bin",
|
||||||
|
"nvidia/cudnn/bin",
|
||||||
|
):
|
||||||
|
path = root / rel
|
||||||
|
if path.exists():
|
||||||
|
dirs.append(path)
|
||||||
|
return dirs
|
||||||
|
|
||||||
|
|
||||||
|
def configure_windows_cuda_runtime() -> None:
|
||||||
|
if sys.platform != "win32":
|
||||||
|
return
|
||||||
|
configured = getattr(app.state, "windows_cuda_runtime_dirs", None)
|
||||||
|
if configured is not None:
|
||||||
|
return
|
||||||
|
runtime_dirs = find_windows_cuda_runtime_dirs()
|
||||||
|
app.state.windows_cuda_runtime_dirs = [str(path) for path in runtime_dirs]
|
||||||
|
if not runtime_dirs:
|
||||||
|
return
|
||||||
|
path_parts = os.environ.get("PATH", "").split(os.pathsep)
|
||||||
|
for runtime_dir in runtime_dirs:
|
||||||
|
runtime_dir_str = str(runtime_dir)
|
||||||
|
if runtime_dir_str not in path_parts:
|
||||||
|
path_parts.insert(0, runtime_dir_str)
|
||||||
|
if hasattr(os, "add_dll_directory"):
|
||||||
|
_dll_handles.append(os.add_dll_directory(runtime_dir_str))
|
||||||
|
os.environ["PATH"] = os.pathsep.join(path_parts)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=1)
|
@lru_cache(maxsize=1)
|
||||||
def get_model():
|
def get_model():
|
||||||
|
configure_windows_cuda_runtime()
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
DOWNLOAD_ROOT.mkdir(parents=True, exist_ok=True)
|
DOWNLOAD_ROOT.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -91,6 +130,7 @@ def get_model():
|
|||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
def health() -> dict[str, object]:
|
def health() -> dict[str, object]:
|
||||||
|
configure_windows_cuda_runtime()
|
||||||
return {
|
return {
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"service": "storyforge-windows-asr",
|
"service": "storyforge-windows-asr",
|
||||||
@@ -102,6 +142,7 @@ def health() -> dict[str, object]:
|
|||||||
"active_compute_type": getattr(app.state, "runtime_compute_type", ""),
|
"active_compute_type": getattr(app.state, "runtime_compute_type", ""),
|
||||||
"download_root": str(DOWNLOAD_ROOT),
|
"download_root": str(DOWNLOAD_ROOT),
|
||||||
"model_loaded": get_model.cache_info().currsize > 0,
|
"model_loaded": get_model.cache_info().currsize > 0,
|
||||||
|
"windows_cuda_runtime_dirs": getattr(app.state, "windows_cuda_runtime_dirs", []),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,3 +2,6 @@ fastapi==0.116.1
|
|||||||
uvicorn[standard]==0.35.0
|
uvicorn[standard]==0.35.0
|
||||||
python-multipart==0.0.20
|
python-multipart==0.0.20
|
||||||
faster-whisper>=1.1,<2
|
faster-whisper>=1.1,<2
|
||||||
|
nvidia-cublas-cu12; platform_system == "Windows"
|
||||||
|
nvidia-cuda-runtime-cu12; platform_system == "Windows"
|
||||||
|
nvidia-cudnn-cu12; platform_system == "Windows"
|
||||||
|
|||||||
@@ -71,6 +71,30 @@ class WindowsAsrHttpTests(unittest.TestCase):
|
|||||||
self.assertEqual(module.describe_language_mode(), "zh")
|
self.assertEqual(module.describe_language_mode(), "zh")
|
||||||
self.assertEqual(module.build_runtime_profiles(), [("cpu", "int8")])
|
self.assertEqual(module.build_runtime_profiles(), [("cpu", "int8")])
|
||||||
|
|
||||||
|
def test_windows_cuda_runtime_discovery_finds_nvidia_wheel_bins(self) -> None:
|
||||||
|
module = load_windows_asr_app()
|
||||||
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
|
root = Path(tempdir)
|
||||||
|
for rel in [
|
||||||
|
"nvidia/cublas/bin",
|
||||||
|
"nvidia/cuda_runtime/bin",
|
||||||
|
"nvidia/cuda_nvrtc/bin",
|
||||||
|
"nvidia/cudnn/bin",
|
||||||
|
]:
|
||||||
|
(root / rel).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
dirs = module.find_windows_cuda_runtime_dirs(root)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
[path.as_posix() for path in dirs],
|
||||||
|
[
|
||||||
|
(root / "nvidia/cublas/bin").as_posix(),
|
||||||
|
(root / "nvidia/cuda_runtime/bin").as_posix(),
|
||||||
|
(root / "nvidia/cuda_nvrtc/bin").as_posix(),
|
||||||
|
(root / "nvidia/cudnn/bin").as_posix(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
def test_auto_runtime_falls_back_to_cpu_when_cuda_runtime_is_missing(self) -> None:
|
def test_auto_runtime_falls_back_to_cpu_when_cuda_runtime_is_missing(self) -> None:
|
||||||
os.environ.pop("WHISPER_LANGUAGE", None)
|
os.environ.pop("WHISPER_LANGUAGE", None)
|
||||||
os.environ.pop("WHISPER_DEVICE", None)
|
os.environ.pop("WHISPER_DEVICE", None)
|
||||||
|
|||||||
Reference in New Issue
Block a user