test: add remote control stress budgets

This commit is contained in:
AI Bot
2026-05-11 23:25:52 +08:00
parent 9c8ffebb92
commit bc199dcf5c
4 changed files with 133 additions and 3 deletions

View File

@@ -240,6 +240,7 @@ cd /Users/kris/code/boss
- 当前 `local-agent``dispatch_execution` 任务会按 `orchestrationBackendId` 分流:默认走 `codex exec resume`;当任务显式选择 `omx-team` 且本机 `omxEnabled + omxCommand/omxArgs` 可用时,会改走 `OMX Team Runtime` JSON 协议执行并回写 `rawThreadReply / replyBody`
- 当前 `local-agent` 会在 Codex 任务完成时回传 `executionProgress`:服务端把同一任务的进度卡从 queued / running 更新到 completed / failedAndroid 原生聊天页会显示“进度 / 分支详情 / 生成结果 / 后台智能体”,其中 Git diff、GitHub CLI 可用性和产物名由本地 agent 补齐
- 当前 `local-agent``browser_control / desktop_control` 已从占位骨架升级成外部 runtime 桥:当本机配置了 `browserControlEnabled + browserControlCommand``computerUseEnabled + computerUseCommand` 时,会把标准化 JSON 请求透传给外部进程,并解析单行 JSON 结果;未启用时会 fail closed返回明确的 runtime disabled 错误,不再假装执行成功
- 远程电脑控制链路当前已有可复用压测基线:`npm run stress:remote-control` 可按参数压测 `local-agent -> MasterAgentTask -> browser_control / desktop_control runtime -> complete 回写` 全链路;`npm run stress:remote-control:ci` 固定 120 条链路任务和 360 条 runtime 并发任务,并用 p95 延迟预算判断是否退化。压测报告可通过 `--report-json=PATH` 落盘,便于后续接入真实 macOS AX / Windows UIA helper 后复用同一套稳定性判断。
- 当前历史脏群如果不再包含真实线程成员,群聊消息不会再表现成“无响应”;服务端会在群内追加明确 `system_notice`,提示先重新添加线程成员
- 当前设备导入决议已经升级成真正通过 `local-agent -> codex exec -> /complete` 回写的主 Agent 决议链Web 和 Android 前台都会在 `pending_resolution` 阶段显示审核任务状态,并在任务完成后自动刷新出正式导入建议
- 当前 `local-agent` 已改成先启动本地 `4317` 健康监听,再异步跑首次 heartbeat 和 task poll避免控制面短时阻塞时本地健康探针不可用
@@ -367,6 +368,8 @@ curl -I http://127.0.0.1:3000/api/v1/user/ota/package
curl -sS http://127.0.0.1:4317/health
curl -sS http://127.0.0.1:4317/api/v1/skills
curl -sS -X POST http://127.0.0.1:4317/api/v1/heartbeat
npm run stress:remote-control:ci
npm run stress:remote-control -- --chain-tasks=120 --runtime-tasks=360 --runtime-concurrency=36 --timeout-ms=60000 --report-json=/tmp/boss-remote-control-stress.json
```
服务器:

View File

@@ -13,6 +13,7 @@
"admin:web:build": "cd apps/boss-admin-web && npm run build",
"admin:web:publish": "npm --prefix apps/boss-admin-web run build",
"stress:remote-control": "node scripts/stress-remote-control.mjs",
"stress:remote-control:ci": "node scripts/stress-remote-control.mjs --chain-tasks=120 --runtime-tasks=360 --runtime-concurrency=36 --timeout-ms=60000 --max-chain-p95-ms=500 --max-runtime-p95-ms=2000",
"test:master-agent-controls": "tsx --test tests/master-agent-chat-controls.test.ts",
"apk:debug": "cd android && ./gradlew assembleDebug && cd .. && zsh ./scripts/publish-apk-to-public.sh",
"apk:release": "zsh ./scripts/build-release-apk.sh",

View File

@@ -20,6 +20,9 @@ function parseArgs(argv) {
timeoutMs: 45_000,
skipChain: false,
skipRuntime: false,
reportJson: null,
maxChainP95Ms: null,
maxRuntimeP95Ms: null,
};
for (const arg of argv) {
@@ -31,7 +34,12 @@ function parseArgs(argv) {
options.runtimeConcurrency = positiveInt(arg.split("=")[1], options.runtimeConcurrency);
} else if (arg.startsWith("--poll-ms=")) options.pollMs = positiveInt(arg.split("=")[1], options.pollMs);
else if (arg.startsWith("--timeout-ms=")) options.timeoutMs = positiveInt(arg.split("=")[1], options.timeoutMs);
else if (arg === "--help" || arg === "-h") {
else if (arg.startsWith("--report-json=")) options.reportJson = arg.slice("--report-json=".length);
else if (arg.startsWith("--max-chain-p95-ms=")) {
options.maxChainP95Ms = positiveInt(arg.split("=")[1], options.maxChainP95Ms);
} else if (arg.startsWith("--max-runtime-p95-ms=")) {
options.maxRuntimeP95Ms = positiveInt(arg.split("=")[1], options.maxRuntimeP95Ms);
} else if (arg === "--help" || arg === "-h") {
options.help = true;
}
}
@@ -453,6 +461,29 @@ function hasFailure(summary) {
return summary.failed > 0;
}
function findThresholdFailures(summaries, options) {
const failures = [];
const chain = summaries.find((summary) => summary.name === "chain");
if (chain && options.maxChainP95Ms && chain.latencyMs.p95 > options.maxChainP95Ms) {
failures.push({
name: "chain_p95_latency",
actualMs: chain.latencyMs.p95,
maxMs: options.maxChainP95Ms,
});
}
const runtime = summaries.find((summary) => summary.name === "runtime");
if (runtime && options.maxRuntimeP95Ms && runtime.latencyMs.p95 > options.maxRuntimeP95Ms) {
failures.push({
name: "runtime_p95_latency",
actualMs: runtime.latencyMs.p95,
maxMs: options.maxRuntimeP95Ms,
});
}
return failures;
}
function printHelp() {
console.log(`Usage: node scripts/stress-remote-control.mjs [options]
@@ -462,6 +493,9 @@ Options:
--runtime-concurrency=N direct runtime concurrency, default 24
--poll-ms=N local-agent task poll interval, default 5
--timeout-ms=N chain stress timeout, default 45000
--report-json=PATH write the full stress report to PATH
--max-chain-p95-ms=N fail when local-agent chain p95 latency is above N
--max-runtime-p95-ms=N fail when direct runtime p95 latency is above N
--skip-chain skip local-agent chain stress
--skip-runtime skip direct runtime stress
`);
@@ -473,6 +507,8 @@ if (options.help) {
process.exit(0);
}
const startedAt = new Date();
const startedMs = Date.now();
const summaries = [];
if (!options.skipChain) {
summaries.push(await runChainStress(options));
@@ -481,7 +517,23 @@ if (!options.skipRuntime) {
summaries.push(await runRuntimeStress(options));
}
console.log(JSON.stringify({ ok: summaries.every((summary) => !hasFailure(summary)), summaries }, null, 2));
if (summaries.some(hasFailure)) {
const thresholdFailures = findThresholdFailures(summaries, options);
const report = {
ok: summaries.every((summary) => !hasFailure(summary)) && thresholdFailures.length === 0,
startedAt: startedAt.toISOString(),
finishedAt: new Date().toISOString(),
durationMs: Date.now() - startedMs,
options,
summaries,
thresholdFailures,
};
if (options.reportJson) {
await mkdir(path.dirname(path.resolve(options.reportJson)), { recursive: true });
await writeFile(path.resolve(options.reportJson), `${JSON.stringify(report, null, 2)}\n`, "utf8");
}
console.log(JSON.stringify(report, null, 2));
if (!report.ok) {
process.exitCode = 1;
}

View File

@@ -0,0 +1,74 @@
import test from "node:test";
import assert from "node:assert/strict";
import { spawn } from "node:child_process";
import { mkdtemp, readFile, rm } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
const repoRoot = path.resolve(import.meta.dirname, "..");
function runStress(args) {
return new Promise((resolve) => {
const child = spawn(process.execPath, ["scripts/stress-remote-control.mjs", ...args], {
cwd: repoRoot,
env: process.env,
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
child.stdout.setEncoding("utf8");
child.stderr.setEncoding("utf8");
child.stdout.on("data", (chunk) => {
stdout += chunk;
});
child.stderr.on("data", (chunk) => {
stderr += chunk;
});
child.on("close", (status) => {
resolve({ status, stdout, stderr });
});
});
}
test("remote control stress script writes a reusable JSON report", async () => {
const tempDir = await mkdtemp(path.join(os.tmpdir(), "boss-stress-report-test-"));
const reportPath = path.join(tempDir, "remote-control-stress.json");
try {
const result = await runStress([
"--chain-tasks=4",
"--runtime-tasks=6",
"--runtime-concurrency=3",
"--timeout-ms=20000",
`--report-json=${reportPath}`,
]);
assert.equal(result.status, 0, result.stderr || result.stdout);
const report = JSON.parse(await readFile(reportPath, "utf8"));
assert.equal(report.ok, true);
assert.equal(report.options.chainTasks, 4);
assert.equal(report.options.runtimeTasks, 6);
assert.match(report.startedAt, /^\d{4}-\d{2}-\d{2}T/);
assert.match(report.finishedAt, /^\d{4}-\d{2}-\d{2}T/);
assert.ok(report.durationMs >= 0);
assert.equal(report.summaries.length, 2);
assert.equal(report.summaries[0].missing, 0);
assert.equal(report.summaries[1].failed, 0);
} finally {
await rm(tempDir, { recursive: true, force: true });
}
});
test("remote control stress script fails when latency budget is exceeded", async () => {
const result = await runStress([
"--chain-tasks=4",
"--skip-runtime",
"--timeout-ms=20000",
"--max-chain-p95-ms=1",
]);
assert.notEqual(result.status, 0);
const report = JSON.parse(result.stdout);
assert.equal(report.ok, false);
assert.equal(report.thresholdFailures.length, 1);
assert.equal(report.thresholdFailures[0].name, "chain_p95_latency");
});