diff --git a/docs/architecture/current_runtime_and_deploy_status_cn.md b/docs/architecture/current_runtime_and_deploy_status_cn.md index 490c0ff..ad57600 100644 --- a/docs/architecture/current_runtime_and_deploy_status_cn.md +++ b/docs/architecture/current_runtime_and_deploy_status_cn.md @@ -240,6 +240,7 @@ cd /Users/kris/code/boss - 当前 `local-agent` 对 `dispatch_execution` 任务会按 `orchestrationBackendId` 分流:默认走 `codex exec resume`;当任务显式选择 `omx-team` 且本机 `omxEnabled + omxCommand/omxArgs` 可用时,会改走 `OMX Team Runtime` JSON 协议执行并回写 `rawThreadReply / replyBody` - 当前 `local-agent` 会在 Codex 任务完成时回传 `executionProgress`:服务端把同一任务的进度卡从 queued / running 更新到 completed / failed,Android 原生聊天页会显示“进度 / 分支详情 / 生成结果 / 后台智能体”,其中 Git diff、GitHub CLI 可用性和产物名由本地 agent 补齐 - 当前 `local-agent` 对 `browser_control / desktop_control` 已从占位骨架升级成外部 runtime 桥:当本机配置了 `browserControlEnabled + browserControlCommand` 或 `computerUseEnabled + computerUseCommand` 时,会把标准化 JSON 请求透传给外部进程,并解析单行 JSON 结果;未启用时会 fail closed,返回明确的 runtime disabled 错误,不再假装执行成功 +- 远程电脑控制链路当前已有可复用压测基线:`npm run stress:remote-control` 可按参数压测 `local-agent -> MasterAgentTask -> browser_control / desktop_control runtime -> complete 回写` 全链路;`npm run stress:remote-control:ci` 固定 120 条链路任务和 360 条 runtime 并发任务,并用 p95 延迟预算判断是否退化。压测报告可通过 `--report-json=PATH` 落盘,便于后续接入真实 macOS AX / Windows UIA helper 后复用同一套稳定性判断。 - 当前历史脏群如果不再包含真实线程成员,群聊消息不会再表现成“无响应”;服务端会在群内追加明确 `system_notice`,提示先重新添加线程成员 - 当前设备导入决议已经升级成真正通过 `local-agent -> codex exec -> /complete` 回写的主 Agent 决议链;Web 和 Android 前台都会在 `pending_resolution` 阶段显示审核任务状态,并在任务完成后自动刷新出正式导入建议 - 当前 `local-agent` 已改成先启动本地 `4317` 健康监听,再异步跑首次 heartbeat 和 task poll,避免控制面短时阻塞时本地健康探针不可用 @@ -367,6 +368,8 @@ curl -I http://127.0.0.1:3000/api/v1/user/ota/package curl -sS http://127.0.0.1:4317/health curl -sS http://127.0.0.1:4317/api/v1/skills curl -sS -X POST http://127.0.0.1:4317/api/v1/heartbeat +npm run stress:remote-control:ci +npm run stress:remote-control -- --chain-tasks=120 --runtime-tasks=360 --runtime-concurrency=36 --timeout-ms=60000 --report-json=/tmp/boss-remote-control-stress.json ``` 服务器: diff --git a/package.json b/package.json index 7f6e6fc..55775f4 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "admin:web:build": "cd apps/boss-admin-web && npm run build", "admin:web:publish": "npm --prefix apps/boss-admin-web run build", "stress:remote-control": "node scripts/stress-remote-control.mjs", + "stress:remote-control:ci": "node scripts/stress-remote-control.mjs --chain-tasks=120 --runtime-tasks=360 --runtime-concurrency=36 --timeout-ms=60000 --max-chain-p95-ms=500 --max-runtime-p95-ms=2000", "test:master-agent-controls": "tsx --test tests/master-agent-chat-controls.test.ts", "apk:debug": "cd android && ./gradlew assembleDebug && cd .. && zsh ./scripts/publish-apk-to-public.sh", "apk:release": "zsh ./scripts/build-release-apk.sh", diff --git a/scripts/stress-remote-control.mjs b/scripts/stress-remote-control.mjs index c757ece..f26e989 100755 --- a/scripts/stress-remote-control.mjs +++ b/scripts/stress-remote-control.mjs @@ -20,6 +20,9 @@ function parseArgs(argv) { timeoutMs: 45_000, skipChain: false, skipRuntime: false, + reportJson: null, + maxChainP95Ms: null, + maxRuntimeP95Ms: null, }; for (const arg of argv) { @@ -31,7 +34,12 @@ function parseArgs(argv) { options.runtimeConcurrency = positiveInt(arg.split("=")[1], options.runtimeConcurrency); } else if (arg.startsWith("--poll-ms=")) options.pollMs = positiveInt(arg.split("=")[1], options.pollMs); else if (arg.startsWith("--timeout-ms=")) options.timeoutMs = positiveInt(arg.split("=")[1], options.timeoutMs); - else if (arg === "--help" || arg === "-h") { + else if (arg.startsWith("--report-json=")) options.reportJson = arg.slice("--report-json=".length); + else if (arg.startsWith("--max-chain-p95-ms=")) { + options.maxChainP95Ms = positiveInt(arg.split("=")[1], options.maxChainP95Ms); + } else if (arg.startsWith("--max-runtime-p95-ms=")) { + options.maxRuntimeP95Ms = positiveInt(arg.split("=")[1], options.maxRuntimeP95Ms); + } else if (arg === "--help" || arg === "-h") { options.help = true; } } @@ -453,6 +461,29 @@ function hasFailure(summary) { return summary.failed > 0; } +function findThresholdFailures(summaries, options) { + const failures = []; + const chain = summaries.find((summary) => summary.name === "chain"); + if (chain && options.maxChainP95Ms && chain.latencyMs.p95 > options.maxChainP95Ms) { + failures.push({ + name: "chain_p95_latency", + actualMs: chain.latencyMs.p95, + maxMs: options.maxChainP95Ms, + }); + } + + const runtime = summaries.find((summary) => summary.name === "runtime"); + if (runtime && options.maxRuntimeP95Ms && runtime.latencyMs.p95 > options.maxRuntimeP95Ms) { + failures.push({ + name: "runtime_p95_latency", + actualMs: runtime.latencyMs.p95, + maxMs: options.maxRuntimeP95Ms, + }); + } + + return failures; +} + function printHelp() { console.log(`Usage: node scripts/stress-remote-control.mjs [options] @@ -462,6 +493,9 @@ Options: --runtime-concurrency=N direct runtime concurrency, default 24 --poll-ms=N local-agent task poll interval, default 5 --timeout-ms=N chain stress timeout, default 45000 + --report-json=PATH write the full stress report to PATH + --max-chain-p95-ms=N fail when local-agent chain p95 latency is above N + --max-runtime-p95-ms=N fail when direct runtime p95 latency is above N --skip-chain skip local-agent chain stress --skip-runtime skip direct runtime stress `); @@ -473,6 +507,8 @@ if (options.help) { process.exit(0); } +const startedAt = new Date(); +const startedMs = Date.now(); const summaries = []; if (!options.skipChain) { summaries.push(await runChainStress(options)); @@ -481,7 +517,23 @@ if (!options.skipRuntime) { summaries.push(await runRuntimeStress(options)); } -console.log(JSON.stringify({ ok: summaries.every((summary) => !hasFailure(summary)), summaries }, null, 2)); -if (summaries.some(hasFailure)) { +const thresholdFailures = findThresholdFailures(summaries, options); +const report = { + ok: summaries.every((summary) => !hasFailure(summary)) && thresholdFailures.length === 0, + startedAt: startedAt.toISOString(), + finishedAt: new Date().toISOString(), + durationMs: Date.now() - startedMs, + options, + summaries, + thresholdFailures, +}; + +if (options.reportJson) { + await mkdir(path.dirname(path.resolve(options.reportJson)), { recursive: true }); + await writeFile(path.resolve(options.reportJson), `${JSON.stringify(report, null, 2)}\n`, "utf8"); +} + +console.log(JSON.stringify(report, null, 2)); +if (!report.ok) { process.exitCode = 1; } diff --git a/tests/stress-remote-control-script.test.mjs b/tests/stress-remote-control-script.test.mjs new file mode 100644 index 0000000..dc76023 --- /dev/null +++ b/tests/stress-remote-control-script.test.mjs @@ -0,0 +1,74 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { spawn } from "node:child_process"; +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; + +const repoRoot = path.resolve(import.meta.dirname, ".."); + +function runStress(args) { + return new Promise((resolve) => { + const child = spawn(process.execPath, ["scripts/stress-remote-control.mjs", ...args], { + cwd: repoRoot, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }); + let stdout = ""; + let stderr = ""; + child.stdout.setEncoding("utf8"); + child.stderr.setEncoding("utf8"); + child.stdout.on("data", (chunk) => { + stdout += chunk; + }); + child.stderr.on("data", (chunk) => { + stderr += chunk; + }); + child.on("close", (status) => { + resolve({ status, stdout, stderr }); + }); + }); +} + +test("remote control stress script writes a reusable JSON report", async () => { + const tempDir = await mkdtemp(path.join(os.tmpdir(), "boss-stress-report-test-")); + const reportPath = path.join(tempDir, "remote-control-stress.json"); + try { + const result = await runStress([ + "--chain-tasks=4", + "--runtime-tasks=6", + "--runtime-concurrency=3", + "--timeout-ms=20000", + `--report-json=${reportPath}`, + ]); + + assert.equal(result.status, 0, result.stderr || result.stdout); + const report = JSON.parse(await readFile(reportPath, "utf8")); + assert.equal(report.ok, true); + assert.equal(report.options.chainTasks, 4); + assert.equal(report.options.runtimeTasks, 6); + assert.match(report.startedAt, /^\d{4}-\d{2}-\d{2}T/); + assert.match(report.finishedAt, /^\d{4}-\d{2}-\d{2}T/); + assert.ok(report.durationMs >= 0); + assert.equal(report.summaries.length, 2); + assert.equal(report.summaries[0].missing, 0); + assert.equal(report.summaries[1].failed, 0); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } +}); + +test("remote control stress script fails when latency budget is exceeded", async () => { + const result = await runStress([ + "--chain-tasks=4", + "--skip-runtime", + "--timeout-ms=20000", + "--max-chain-p95-ms=1", + ]); + + assert.notEqual(result.status, 0); + const report = JSON.parse(result.stdout); + assert.equal(report.ok, false); + assert.equal(report.thresholdFailures.length, 1); + assert.equal(report.thresholdFailures[0].name, "chain_p95_latency"); +});