test: add remote control stress budgets
This commit is contained in:
@@ -240,6 +240,7 @@ cd /Users/kris/code/boss
|
||||
- 当前 `local-agent` 对 `dispatch_execution` 任务会按 `orchestrationBackendId` 分流:默认走 `codex exec resume`;当任务显式选择 `omx-team` 且本机 `omxEnabled + omxCommand/omxArgs` 可用时,会改走 `OMX Team Runtime` JSON 协议执行并回写 `rawThreadReply / replyBody`
|
||||
- 当前 `local-agent` 会在 Codex 任务完成时回传 `executionProgress`:服务端把同一任务的进度卡从 queued / running 更新到 completed / failed,Android 原生聊天页会显示“进度 / 分支详情 / 生成结果 / 后台智能体”,其中 Git diff、GitHub CLI 可用性和产物名由本地 agent 补齐
|
||||
- 当前 `local-agent` 对 `browser_control / desktop_control` 已从占位骨架升级成外部 runtime 桥:当本机配置了 `browserControlEnabled + browserControlCommand` 或 `computerUseEnabled + computerUseCommand` 时,会把标准化 JSON 请求透传给外部进程,并解析单行 JSON 结果;未启用时会 fail closed,返回明确的 runtime disabled 错误,不再假装执行成功
|
||||
- 远程电脑控制链路当前已有可复用压测基线:`npm run stress:remote-control` 可按参数压测 `local-agent -> MasterAgentTask -> browser_control / desktop_control runtime -> complete 回写` 全链路;`npm run stress:remote-control:ci` 固定 120 条链路任务和 360 条 runtime 并发任务,并用 p95 延迟预算判断是否退化。压测报告可通过 `--report-json=PATH` 落盘,便于后续接入真实 macOS AX / Windows UIA helper 后复用同一套稳定性判断。
|
||||
- 当前历史脏群如果不再包含真实线程成员,群聊消息不会再表现成“无响应”;服务端会在群内追加明确 `system_notice`,提示先重新添加线程成员
|
||||
- 当前设备导入决议已经升级成真正通过 `local-agent -> codex exec -> /complete` 回写的主 Agent 决议链;Web 和 Android 前台都会在 `pending_resolution` 阶段显示审核任务状态,并在任务完成后自动刷新出正式导入建议
|
||||
- 当前 `local-agent` 已改成先启动本地 `4317` 健康监听,再异步跑首次 heartbeat 和 task poll,避免控制面短时阻塞时本地健康探针不可用
|
||||
@@ -367,6 +368,8 @@ curl -I http://127.0.0.1:3000/api/v1/user/ota/package
|
||||
curl -sS http://127.0.0.1:4317/health
|
||||
curl -sS http://127.0.0.1:4317/api/v1/skills
|
||||
curl -sS -X POST http://127.0.0.1:4317/api/v1/heartbeat
|
||||
npm run stress:remote-control:ci
|
||||
npm run stress:remote-control -- --chain-tasks=120 --runtime-tasks=360 --runtime-concurrency=36 --timeout-ms=60000 --report-json=/tmp/boss-remote-control-stress.json
|
||||
```
|
||||
|
||||
服务器:
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
"admin:web:build": "cd apps/boss-admin-web && npm run build",
|
||||
"admin:web:publish": "npm --prefix apps/boss-admin-web run build",
|
||||
"stress:remote-control": "node scripts/stress-remote-control.mjs",
|
||||
"stress:remote-control:ci": "node scripts/stress-remote-control.mjs --chain-tasks=120 --runtime-tasks=360 --runtime-concurrency=36 --timeout-ms=60000 --max-chain-p95-ms=500 --max-runtime-p95-ms=2000",
|
||||
"test:master-agent-controls": "tsx --test tests/master-agent-chat-controls.test.ts",
|
||||
"apk:debug": "cd android && ./gradlew assembleDebug && cd .. && zsh ./scripts/publish-apk-to-public.sh",
|
||||
"apk:release": "zsh ./scripts/build-release-apk.sh",
|
||||
|
||||
@@ -20,6 +20,9 @@ function parseArgs(argv) {
|
||||
timeoutMs: 45_000,
|
||||
skipChain: false,
|
||||
skipRuntime: false,
|
||||
reportJson: null,
|
||||
maxChainP95Ms: null,
|
||||
maxRuntimeP95Ms: null,
|
||||
};
|
||||
|
||||
for (const arg of argv) {
|
||||
@@ -31,7 +34,12 @@ function parseArgs(argv) {
|
||||
options.runtimeConcurrency = positiveInt(arg.split("=")[1], options.runtimeConcurrency);
|
||||
} else if (arg.startsWith("--poll-ms=")) options.pollMs = positiveInt(arg.split("=")[1], options.pollMs);
|
||||
else if (arg.startsWith("--timeout-ms=")) options.timeoutMs = positiveInt(arg.split("=")[1], options.timeoutMs);
|
||||
else if (arg === "--help" || arg === "-h") {
|
||||
else if (arg.startsWith("--report-json=")) options.reportJson = arg.slice("--report-json=".length);
|
||||
else if (arg.startsWith("--max-chain-p95-ms=")) {
|
||||
options.maxChainP95Ms = positiveInt(arg.split("=")[1], options.maxChainP95Ms);
|
||||
} else if (arg.startsWith("--max-runtime-p95-ms=")) {
|
||||
options.maxRuntimeP95Ms = positiveInt(arg.split("=")[1], options.maxRuntimeP95Ms);
|
||||
} else if (arg === "--help" || arg === "-h") {
|
||||
options.help = true;
|
||||
}
|
||||
}
|
||||
@@ -453,6 +461,29 @@ function hasFailure(summary) {
|
||||
return summary.failed > 0;
|
||||
}
|
||||
|
||||
function findThresholdFailures(summaries, options) {
|
||||
const failures = [];
|
||||
const chain = summaries.find((summary) => summary.name === "chain");
|
||||
if (chain && options.maxChainP95Ms && chain.latencyMs.p95 > options.maxChainP95Ms) {
|
||||
failures.push({
|
||||
name: "chain_p95_latency",
|
||||
actualMs: chain.latencyMs.p95,
|
||||
maxMs: options.maxChainP95Ms,
|
||||
});
|
||||
}
|
||||
|
||||
const runtime = summaries.find((summary) => summary.name === "runtime");
|
||||
if (runtime && options.maxRuntimeP95Ms && runtime.latencyMs.p95 > options.maxRuntimeP95Ms) {
|
||||
failures.push({
|
||||
name: "runtime_p95_latency",
|
||||
actualMs: runtime.latencyMs.p95,
|
||||
maxMs: options.maxRuntimeP95Ms,
|
||||
});
|
||||
}
|
||||
|
||||
return failures;
|
||||
}
|
||||
|
||||
function printHelp() {
|
||||
console.log(`Usage: node scripts/stress-remote-control.mjs [options]
|
||||
|
||||
@@ -462,6 +493,9 @@ Options:
|
||||
--runtime-concurrency=N direct runtime concurrency, default 24
|
||||
--poll-ms=N local-agent task poll interval, default 5
|
||||
--timeout-ms=N chain stress timeout, default 45000
|
||||
--report-json=PATH write the full stress report to PATH
|
||||
--max-chain-p95-ms=N fail when local-agent chain p95 latency is above N
|
||||
--max-runtime-p95-ms=N fail when direct runtime p95 latency is above N
|
||||
--skip-chain skip local-agent chain stress
|
||||
--skip-runtime skip direct runtime stress
|
||||
`);
|
||||
@@ -473,6 +507,8 @@ if (options.help) {
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const startedAt = new Date();
|
||||
const startedMs = Date.now();
|
||||
const summaries = [];
|
||||
if (!options.skipChain) {
|
||||
summaries.push(await runChainStress(options));
|
||||
@@ -481,7 +517,23 @@ if (!options.skipRuntime) {
|
||||
summaries.push(await runRuntimeStress(options));
|
||||
}
|
||||
|
||||
console.log(JSON.stringify({ ok: summaries.every((summary) => !hasFailure(summary)), summaries }, null, 2));
|
||||
if (summaries.some(hasFailure)) {
|
||||
const thresholdFailures = findThresholdFailures(summaries, options);
|
||||
const report = {
|
||||
ok: summaries.every((summary) => !hasFailure(summary)) && thresholdFailures.length === 0,
|
||||
startedAt: startedAt.toISOString(),
|
||||
finishedAt: new Date().toISOString(),
|
||||
durationMs: Date.now() - startedMs,
|
||||
options,
|
||||
summaries,
|
||||
thresholdFailures,
|
||||
};
|
||||
|
||||
if (options.reportJson) {
|
||||
await mkdir(path.dirname(path.resolve(options.reportJson)), { recursive: true });
|
||||
await writeFile(path.resolve(options.reportJson), `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
}
|
||||
|
||||
console.log(JSON.stringify(report, null, 2));
|
||||
if (!report.ok) {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
|
||||
74
tests/stress-remote-control-script.test.mjs
Normal file
74
tests/stress-remote-control-script.test.mjs
Normal file
@@ -0,0 +1,74 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { spawn } from "node:child_process";
|
||||
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
const repoRoot = path.resolve(import.meta.dirname, "..");
|
||||
|
||||
function runStress(args) {
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(process.execPath, ["scripts/stress-remote-control.mjs", ...args], {
|
||||
cwd: repoRoot,
|
||||
env: process.env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
child.stdout.setEncoding("utf8");
|
||||
child.stderr.setEncoding("utf8");
|
||||
child.stdout.on("data", (chunk) => {
|
||||
stdout += chunk;
|
||||
});
|
||||
child.stderr.on("data", (chunk) => {
|
||||
stderr += chunk;
|
||||
});
|
||||
child.on("close", (status) => {
|
||||
resolve({ status, stdout, stderr });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
test("remote control stress script writes a reusable JSON report", async () => {
|
||||
const tempDir = await mkdtemp(path.join(os.tmpdir(), "boss-stress-report-test-"));
|
||||
const reportPath = path.join(tempDir, "remote-control-stress.json");
|
||||
try {
|
||||
const result = await runStress([
|
||||
"--chain-tasks=4",
|
||||
"--runtime-tasks=6",
|
||||
"--runtime-concurrency=3",
|
||||
"--timeout-ms=20000",
|
||||
`--report-json=${reportPath}`,
|
||||
]);
|
||||
|
||||
assert.equal(result.status, 0, result.stderr || result.stdout);
|
||||
const report = JSON.parse(await readFile(reportPath, "utf8"));
|
||||
assert.equal(report.ok, true);
|
||||
assert.equal(report.options.chainTasks, 4);
|
||||
assert.equal(report.options.runtimeTasks, 6);
|
||||
assert.match(report.startedAt, /^\d{4}-\d{2}-\d{2}T/);
|
||||
assert.match(report.finishedAt, /^\d{4}-\d{2}-\d{2}T/);
|
||||
assert.ok(report.durationMs >= 0);
|
||||
assert.equal(report.summaries.length, 2);
|
||||
assert.equal(report.summaries[0].missing, 0);
|
||||
assert.equal(report.summaries[1].failed, 0);
|
||||
} finally {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("remote control stress script fails when latency budget is exceeded", async () => {
|
||||
const result = await runStress([
|
||||
"--chain-tasks=4",
|
||||
"--skip-runtime",
|
||||
"--timeout-ms=20000",
|
||||
"--max-chain-p95-ms=1",
|
||||
]);
|
||||
|
||||
assert.notEqual(result.status, 0);
|
||||
const report = JSON.parse(result.stdout);
|
||||
assert.equal(report.ok, false);
|
||||
assert.equal(report.thresholdFailures.length, 1);
|
||||
assert.equal(report.thresholdFailures[0].name, "chain_p95_latency");
|
||||
});
|
||||
Reference in New Issue
Block a user