feat: surface asr runtime state in workbench
This commit is contained in:
@@ -4,6 +4,12 @@
|
||||
|
||||
## 2026-04-06
|
||||
|
||||
### 工作台依赖健康现在会显示 ASR 真实运行模式
|
||||
|
||||
- `collector` 的 `/v2/integrations/health` 现在会带出 ASR 的 `language_mode / runtime_device_mode / runtime_compute_type_mode / active_device / active_compute_type / model_name`。
|
||||
- 工作台里的依赖健康卡不再只是显示 `ASR 在线`,而是会直接展示 `在线 · GPU` 或 `在线 · CPU`,并补充当前模型、语言模式和 compute type。
|
||||
- 这样以后排查“Windows ASR 当前到底有没有吃到 GPU”时,不需要再手查命令行或单独打 `/health`。
|
||||
|
||||
### Windows ASR GPU 失败时自动回退 CPU
|
||||
|
||||
- Windows `ASR HTTP` 现在在 `auto` 模式下仍会优先尝试 `cuda + int8_float16`,但如果在真正推理阶段命中 `cublas/cudnn/cuda` 运行库缺失,会自动切回 `cpu + int8` 重试,不再把整次转写卡死在 GPU 路径。
|
||||
|
||||
@@ -3152,6 +3152,23 @@ def probe_http(url: str, path: str = "", timeout: float = 3.0) -> dict[str, Any]
|
||||
return tcp
|
||||
|
||||
|
||||
def probe_http_json(url: str, path: str = "", timeout: float = 3.0) -> dict[str, Any]:
|
||||
detail = probe_http(url, path=path, timeout=timeout)
|
||||
detail["json"] = {}
|
||||
if not detail.get("configured") or not detail.get("reachable"):
|
||||
return detail
|
||||
target_url = detail.get("url") or (urljoin(url if url.endswith("/") else f"{url}/", path.lstrip("/")) if url else "")
|
||||
try:
|
||||
response = httpx.get(target_url or url, timeout=timeout, follow_redirects=True)
|
||||
if "application/json" in (response.headers.get("content-type") or ""):
|
||||
payload = response.json()
|
||||
if isinstance(payload, dict):
|
||||
detail["json"] = payload
|
||||
except Exception:
|
||||
pass
|
||||
return detail
|
||||
|
||||
|
||||
def live_recorder_request(method: str, path: str, payload: dict[str, Any] | None = None, timeout: float = 20.0) -> Any:
|
||||
if not LIVE_RECORDER_BASE_URL:
|
||||
raise HTTPException(status_code=503, detail="LIVE_RECORDER_BASE_URL is not configured")
|
||||
@@ -3274,6 +3291,8 @@ def integrations_health(account: dict[str, Any] = Depends(require_approved)) ->
|
||||
_ = account
|
||||
cutvideo_bootstrap = probe_http(CUTVIDEO_BASE_URL, "/api/bootstrap", timeout=5.0)
|
||||
cutvideo_uploads = probe_http(CUTVIDEO_BASE_URL, "/api/uploads", timeout=5.0)
|
||||
asr_probe = probe_http_json(ASR_HTTP_BASE_URL, "/health", timeout=5.0)
|
||||
asr_runtime = asr_probe.get("json") if isinstance(asr_probe.get("json"), dict) else {}
|
||||
cutvideo_supports_uploads = bool(
|
||||
cutvideo_uploads.get("configured")
|
||||
and cutvideo_uploads.get("reachable")
|
||||
@@ -3303,7 +3322,17 @@ def integrations_health(account: dict[str, Any] = Depends(require_approved)) ->
|
||||
},
|
||||
"asr": {
|
||||
"base_url": ASR_HTTP_BASE_URL,
|
||||
**probe_tcp(ASR_HTTP_BASE_URL),
|
||||
"configured": asr_probe.get("configured", False),
|
||||
"reachable": asr_probe.get("reachable", False),
|
||||
"status_code": int(asr_probe.get("status_code") or 0),
|
||||
"error": str(asr_probe.get("error") or ""),
|
||||
"url": str(asr_probe.get("url") or ASR_HTTP_BASE_URL or ""),
|
||||
"language_mode": str(asr_runtime.get("language") or ""),
|
||||
"runtime_device_mode": str(asr_runtime.get("device") or ""),
|
||||
"runtime_compute_type_mode": str(asr_runtime.get("compute_type") or ""),
|
||||
"active_device": str(asr_runtime.get("active_device") or ""),
|
||||
"active_compute_type": str(asr_runtime.get("active_compute_type") or ""),
|
||||
"model_name": str(asr_runtime.get("model_name") or ""),
|
||||
},
|
||||
"live_recorder": {
|
||||
"base_url": LIVE_RECORDER_BASE_URL,
|
||||
|
||||
@@ -269,6 +269,63 @@ class ProductionBaselineTests(unittest.TestCase):
|
||||
self.assertIn("cutvideoRouteMode", payload["lanRouting"])
|
||||
self.assertIn("cutvideoBaseUrl", payload["lanRouting"])
|
||||
|
||||
def test_integrations_health_exposes_asr_runtime_summary(self) -> None:
|
||||
ctx = self._seed_context("asr_runtime", exhausted=False)
|
||||
headers = {"Authorization": f"Bearer {ctx['token']}"}
|
||||
original_base_url = self.core.ASR_HTTP_BASE_URL
|
||||
original_probe_http_json = getattr(self.core, "probe_http_json", None)
|
||||
try:
|
||||
self.core.ASR_HTTP_BASE_URL = "http://asr.example:8088"
|
||||
|
||||
def fake_probe_http_json(url: str, path: str = "", timeout: float = 3.0) -> dict[str, Any]:
|
||||
if url == "http://asr.example:8088" and path == "/health":
|
||||
return {
|
||||
"configured": True,
|
||||
"reachable": True,
|
||||
"status_code": 200,
|
||||
"error": "",
|
||||
"url": "http://asr.example:8088/health",
|
||||
"json": {
|
||||
"service": "storyforge-windows-asr",
|
||||
"model_name": "base",
|
||||
"language": "auto",
|
||||
"device": "auto",
|
||||
"compute_type": "auto",
|
||||
"active_device": "cuda",
|
||||
"active_compute_type": "int8_float16",
|
||||
},
|
||||
}
|
||||
return {
|
||||
"configured": False,
|
||||
"reachable": False,
|
||||
"status_code": 0,
|
||||
"error": "not_configured",
|
||||
"url": "",
|
||||
"json": {},
|
||||
}
|
||||
|
||||
self.core.probe_http_json = fake_probe_http_json
|
||||
response = self.client.get("/v2/integrations/health", headers=headers)
|
||||
finally:
|
||||
self.core.ASR_HTTP_BASE_URL = original_base_url
|
||||
if original_probe_http_json is None:
|
||||
try:
|
||||
delattr(self.core, "probe_http_json")
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
self.core.probe_http_json = original_probe_http_json
|
||||
|
||||
self.assertEqual(response.status_code, 200, response.text)
|
||||
payload = response.json()
|
||||
self.assertIn("asr", payload)
|
||||
self.assertEqual(payload["asr"]["active_device"], "cuda")
|
||||
self.assertEqual(payload["asr"]["active_compute_type"], "int8_float16")
|
||||
self.assertEqual(payload["asr"]["runtime_device_mode"], "auto")
|
||||
self.assertEqual(payload["asr"]["runtime_compute_type_mode"], "auto")
|
||||
self.assertEqual(payload["asr"]["language_mode"], "auto")
|
||||
self.assertEqual(payload["asr"]["model_name"], "base")
|
||||
|
||||
def test_collector_deploy_script_exposes_health_retry_controls(self) -> None:
|
||||
script_path = ROOT / "scripts" / "deploy_fnos_storyforge_collector.sh"
|
||||
content = script_path.read_text(encoding="utf-8")
|
||||
|
||||
@@ -3932,7 +3932,13 @@ function getIntegrationDetail(key) {
|
||||
supportsUploads: raw?.supports_uploads !== undefined ? Boolean(raw?.supports_uploads) : true,
|
||||
uploadStatusCode: Number(raw?.upload_status_code || 0),
|
||||
uploadError: String(raw?.upload_error || ""),
|
||||
uploadUrl: String(raw?.upload_url || "")
|
||||
uploadUrl: String(raw?.upload_url || ""),
|
||||
runtimeDeviceMode: String(raw?.runtime_device_mode || ""),
|
||||
runtimeComputeTypeMode: String(raw?.runtime_compute_type_mode || ""),
|
||||
activeDevice: String(raw?.active_device || ""),
|
||||
activeComputeType: String(raw?.active_compute_type || ""),
|
||||
languageMode: String(raw?.language_mode || ""),
|
||||
modelName: String(raw?.model_name || "")
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3954,6 +3960,16 @@ function getCutvideoIntegrationUrlLabel(detail) {
|
||||
return isFnosTunnelCutvideo(detail) ? "fnOS NAS 隧道入口" : "Windows 直连";
|
||||
}
|
||||
|
||||
function getAsrRuntimeBadge(detail) {
|
||||
if (!detail || detail.key !== "asr") return "";
|
||||
const activeDevice = String(detail.activeDevice || "").trim().toLowerCase();
|
||||
if (activeDevice === "cuda") return "GPU";
|
||||
if (activeDevice === "cpu") return "CPU";
|
||||
const runtimeMode = String(detail.runtimeDeviceMode || "").trim().toLowerCase();
|
||||
if (runtimeMode === "auto") return "自动";
|
||||
return runtimeMode ? runtimeMode.toUpperCase() : "";
|
||||
}
|
||||
|
||||
function getIntegrationStatus(detail) {
|
||||
if (!detail.available) {
|
||||
return { tone: "blue", summary: "未拉取" };
|
||||
@@ -3962,6 +3978,10 @@ function getIntegrationStatus(detail) {
|
||||
return { tone: "orange", summary: "缺上传能力" };
|
||||
}
|
||||
if (detail.reachable) {
|
||||
if (detail.key === "asr") {
|
||||
const runtimeBadge = getAsrRuntimeBadge(detail);
|
||||
return { tone: "green", summary: runtimeBadge ? `在线 · ${runtimeBadge}` : "在线" };
|
||||
}
|
||||
return { tone: "green", summary: "在线" };
|
||||
}
|
||||
if (detail.configured) {
|
||||
@@ -4069,6 +4089,15 @@ function getIntegrationCards() {
|
||||
? `当前通过 fnOS NAS 隧道访问 ${detail.baseUrl || detail.url || "cutvideo"}`
|
||||
: `当前直连 ${detail.baseUrl || detail.url || "cutvideo"}`;
|
||||
}
|
||||
if (key === "asr") {
|
||||
const runtimeBadge = getAsrRuntimeBadge(detail) || "待热身";
|
||||
const computeLabel = detail.activeComputeType || detail.runtimeComputeTypeMode || "auto";
|
||||
const languageLabel = detail.languageMode || "auto";
|
||||
extra = `当前转写:${runtimeBadge} · ${computeLabel} · 语言 ${languageLabel}`;
|
||||
if (detail.modelName) {
|
||||
extra += ` · 当前模型:${detail.modelName}`;
|
||||
}
|
||||
}
|
||||
if (detail.available && !detail.configured && isSuperAdmin()) {
|
||||
actions = [
|
||||
actions,
|
||||
|
||||
@@ -1477,6 +1477,24 @@ test("live-first workbench flows no longer advertise stale missing-capability pl
|
||||
assert.match(APP, /暂未识别当前动作/);
|
||||
});
|
||||
|
||||
test("integration cards surface ASR runtime mode and model details", () => {
|
||||
const detailSource = extractBetween(APP, "function getIntegrationDetail(key) {", "function isFnosTunnelCutvideo");
|
||||
const statusSource = extractBetween(APP, "function getIntegrationStatus(detail) {", "function describeIntegrationFailure");
|
||||
const cardsSource = extractBetween(APP, "function getIntegrationCards()", "function renderLiveRecorderSummaryHtml()");
|
||||
|
||||
assert.match(APP, /function getAsrRuntimeBadge\(/);
|
||||
assert.match(detailSource, /runtimeDeviceMode:/);
|
||||
assert.match(detailSource, /runtimeComputeTypeMode:/);
|
||||
assert.match(detailSource, /activeDevice:/);
|
||||
assert.match(detailSource, /activeComputeType:/);
|
||||
assert.match(detailSource, /languageMode:/);
|
||||
assert.match(detailSource, /modelName:/);
|
||||
assert.match(statusSource, /detail\.key === "asr"/);
|
||||
assert.match(statusSource, /在线 ·/);
|
||||
assert.match(cardsSource, /当前转写:/);
|
||||
assert.match(cardsSource, /当前模型:/);
|
||||
});
|
||||
|
||||
test("smart discovery entrypoints prefer direct execute before falling back to forms", () => {
|
||||
const clicks = extractBetween(APP, "document.addEventListener(\"click\", async (event) => {", "document.addEventListener(\"submit\", async (event) => {");
|
||||
assert.match(clicks, /name === "open-similar-search"[\s\S]*const account = getSelectedAccount\(\);/);
|
||||
|
||||
Reference in New Issue
Block a user