From 9c8ffebb9202596eb20e5955d40f66d77beea51d Mon Sep 17 00:00:00 2001 From: AI Bot Date: Mon, 11 May 2026 23:12:47 +0800 Subject: [PATCH] test: harden remote control stress flow --- local-agent/master-task-completion.mjs | 100 ++++ local-agent/server.mjs | 76 ++- package.json | 1 + scripts/stress-remote-control.mjs | 487 ++++++++++++++++++ .../fixtures/computer-use-dialog-runtime.mjs | 24 + ...ocal-agent-master-task-completion.test.mjs | 69 +++ ...t-server-computer-use-dialog-flow.test.mjs | 167 ++++++ 7 files changed, 884 insertions(+), 40 deletions(-) create mode 100644 local-agent/master-task-completion.mjs create mode 100755 scripts/stress-remote-control.mjs create mode 100644 tests/fixtures/computer-use-dialog-runtime.mjs create mode 100644 tests/local-agent-master-task-completion.test.mjs create mode 100644 tests/local-agent-server-computer-use-dialog-flow.test.mjs diff --git a/local-agent/master-task-completion.mjs b/local-agent/master-task-completion.mjs new file mode 100644 index 0000000..4598ca8 --- /dev/null +++ b/local-agent/master-task-completion.mjs @@ -0,0 +1,100 @@ +function trimToUndefined(value) { + return typeof value === "string" && value.trim() ? value.trim() : undefined; +} + +function normalizeCompletionStatus(status) { + if (status === "failed") return "failed"; + if (status === "needs_user_action") return "needs_user_action"; + return "completed"; +} + +function normalizeStringArray(value) { + return Array.isArray(value) + ? value.map((item) => String(item).trim()).filter(Boolean) + : undefined; +} + +export function buildRemoteExecutionCompletionPayload(task, payload) { + return { + taskId: task.taskId, + status: normalizeCompletionStatus(payload.status), + requestId: trimToUndefined(payload.requestId), + replyBody: trimToUndefined(payload.replyBody), + errorMessage: trimToUndefined(payload.errorMessage), + kind: trimToUndefined(payload.kind), + dialogId: trimToUndefined(payload.dialogId), + appName: trimToUndefined(payload.appName), + platform: trimToUndefined(payload.platform), + risk: trimToUndefined(payload.risk), + summary: trimToUndefined(payload.summary), + recommendedAction: trimToUndefined(payload.recommendedAction), + availableActions: normalizeStringArray(payload.availableActions), + dispatchExecutionId: trimToUndefined(payload.dispatchExecutionId), + targetProjectId: trimToUndefined(payload.targetProjectId), + targetThreadId: trimToUndefined(payload.targetThreadId), + targetUrl: trimToUndefined(payload.targetUrl), + targetApp: trimToUndefined(payload.targetApp), + rawThreadReply: trimToUndefined(payload.rawThreadReply), + executionProgress: + payload.executionProgress && typeof payload.executionProgress === "object" + ? payload.executionProgress + : undefined, + }; +} + +export function buildComputerUseCompletionPayload(task, result) { + if (result?.status === "needs_user_action") { + return buildRemoteExecutionCompletionPayload(task, { + status: "needs_user_action", + requestId: result.requestId, + kind: result.kind, + dialogId: result.dialogId, + appName: result.appName, + platform: result.platform, + risk: result.risk, + summary: result.summary, + recommendedAction: result.recommendedAction, + availableActions: result.availableActions, + dispatchExecutionId: task.dispatchExecutionId, + targetProjectId: task.targetProjectId, + targetThreadId: task.targetThreadId, + targetApp: result.targetApp ?? result.appName, + }); + } + + return buildRemoteExecutionCompletionPayload(task, { + status: result?.status === "failed" ? "failed" : "completed", + requestId: result?.requestId, + replyBody: result?.replyBody, + errorMessage: result?.errorMessage, + dispatchExecutionId: task.dispatchExecutionId, + targetProjectId: task.targetProjectId, + targetThreadId: task.targetThreadId, + targetApp: result?.targetApp, + }); +} + +export function buildMasterAgentTaskCompletionRequestBody(config, payload) { + return { + deviceId: config.deviceId, + status: payload.status, + replyBody: payload.replyBody, + errorMessage: payload.errorMessage, + requestId: payload.requestId, + kind: payload.kind, + dialogId: payload.dialogId, + appName: payload.appName, + platform: payload.platform, + risk: payload.risk, + summary: payload.summary, + recommendedAction: payload.recommendedAction, + availableActions: payload.availableActions, + dispatchExecutionId: payload.dispatchExecutionId, + targetProjectId: payload.targetProjectId, + targetThreadId: payload.targetThreadId, + targetUrl: payload.targetUrl, + targetApp: payload.targetApp, + rawThreadReply: payload.rawThreadReply, + executionProgress: payload.executionProgress, + }; +} diff --git a/local-agent/server.mjs b/local-agent/server.mjs index 07258b5..44ab3bd 100755 --- a/local-agent/server.mjs +++ b/local-agent/server.mjs @@ -38,6 +38,11 @@ import { resolveMasterAgentTaskTimeoutMs, runWithTaskTimeout, } from "./master-task-timeout.mjs"; +import { + buildComputerUseCompletionPayload, + buildMasterAgentTaskCompletionRequestBody, + buildRemoteExecutionCompletionPayload, +} from "./master-task-completion.mjs"; import { createSerializedRunner } from "./serialized-runner.mjs"; async function loadConfig(configPath) { @@ -346,20 +351,7 @@ async function completeMasterAgentTask(config, runtime, payload) { "Content-Type": "application/json", ...deviceTokenHeaders(config, runtime), }, - body: JSON.stringify({ - deviceId: config.deviceId, - status: payload.status, - replyBody: payload.replyBody, - errorMessage: payload.errorMessage, - requestId: payload.requestId, - dispatchExecutionId: payload.dispatchExecutionId, - targetProjectId: payload.targetProjectId, - targetThreadId: payload.targetThreadId, - targetUrl: payload.targetUrl, - targetApp: payload.targetApp, - rawThreadReply: payload.rawThreadReply, - executionProgress: payload.executionProgress, - }), + body: JSON.stringify(buildMasterAgentTaskCompletionRequestBody(config, payload)), }, ); @@ -450,32 +442,6 @@ function parseDispatchExecutionCompletion(rawOutput) { }; } -function buildRemoteExecutionCompletionPayload(task, payload) { - return { - taskId: task.taskId, - status: payload.status === "failed" ? "failed" : "completed", - requestId: payload.requestId, - replyBody: typeof payload.replyBody === "string" ? payload.replyBody.trim() || undefined : undefined, - errorMessage: typeof payload.errorMessage === "string" ? payload.errorMessage.trim() || undefined : undefined, - dispatchExecutionId: - typeof payload.dispatchExecutionId === "string" ? payload.dispatchExecutionId.trim() || undefined : undefined, - targetProjectId: - typeof payload.targetProjectId === "string" ? payload.targetProjectId.trim() || undefined : undefined, - targetThreadId: - typeof payload.targetThreadId === "string" ? payload.targetThreadId.trim() || undefined : undefined, - targetUrl: - typeof payload.targetUrl === "string" ? payload.targetUrl.trim() || undefined : undefined, - targetApp: - typeof payload.targetApp === "string" ? payload.targetApp.trim() || undefined : undefined, - rawThreadReply: - typeof payload.rawThreadReply === "string" ? payload.rawThreadReply.trim() || undefined : undefined, - executionProgress: - payload.executionProgress && typeof payload.executionProgress === "object" - ? payload.executionProgress - : undefined, - }; -} - function runShortCommand(command, args, options = {}) { return new Promise((resolve) => { const child = spawn(command, args, { @@ -592,6 +558,11 @@ async function runMasterAgentTask(config, runtime, task) { if (computerUseResult.status === "failed") { throw new Error(computerUseResult.errorMessage || "COMPUTER_USE_FAILED"); } + if (computerUseResult.status === "needs_user_action") { + return { + waitingUserActionCompletion: buildComputerUseCompletionPayload(task, computerUseResult), + }; + } return { replyBody: computerUseResult.replyBody, dispatchExecutionCompletion: { @@ -707,6 +678,31 @@ async function runMasterAgentTask(config, runtime, task) { : null, }; })(); + if (executionResult.waitingUserActionCompletion) { + const completion = await completeMasterAgentTask( + config, + runtime, + executionResult.waitingUserActionCompletion, + ); + if (!completion.ok) { + throw new Error(`DIALOG_GUARD_COMPLETION_FAILED:${completion.status}:${completion.body}`); + } + runtime.activeMasterTask = { + taskId: task.taskId, + status: "needs_user_action", + completedAt: new Date().toISOString(), + detail: completion.body, + }; + await postAppLog(config, runtime, { + projectId: "master-agent", + level: "info", + category: "local_agent.desktop_dialog_guard_waiting_user_action", + message: `Master Codex Node 等待用户处理桌面弹窗:${task.taskId}`, + detail: executionResult.waitingUserActionCompletion.summary, + mirrorToMaster: false, + }); + return; + } const { replyBody, dispatchExecutionCompletion, executionProgress } = executionResult; const completion = await completeMasterAgentTask( diff --git a/package.json b/package.json index 538631c..7f6e6fc 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "admin:web:dev": "cd apps/boss-admin-web && npm run dev", "admin:web:build": "cd apps/boss-admin-web && npm run build", "admin:web:publish": "npm --prefix apps/boss-admin-web run build", + "stress:remote-control": "node scripts/stress-remote-control.mjs", "test:master-agent-controls": "tsx --test tests/master-agent-chat-controls.test.ts", "apk:debug": "cd android && ./gradlew assembleDebug && cd .. && zsh ./scripts/publish-apk-to-public.sh", "apk:release": "zsh ./scripts/build-release-apk.sh", diff --git a/scripts/stress-remote-control.mjs b/scripts/stress-remote-control.mjs new file mode 100755 index 0000000..c757ece --- /dev/null +++ b/scripts/stress-remote-control.mjs @@ -0,0 +1,487 @@ +#!/usr/bin/env node + +import { spawn } from "node:child_process"; +import { createServer } from "node:http"; +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { executeBrowserControlTask } from "../local-agent/browser-control-task-runner.mjs"; +import { executeComputerUseTask } from "../local-agent/computer-use-task-runner.mjs"; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); + +function parseArgs(argv) { + const options = { + chainTasks: 80, + runtimeTasks: 240, + runtimeConcurrency: 24, + pollMs: 5, + timeoutMs: 45_000, + skipChain: false, + skipRuntime: false, + }; + + for (const arg of argv) { + if (arg === "--skip-chain") options.skipChain = true; + else if (arg === "--skip-runtime") options.skipRuntime = true; + else if (arg.startsWith("--chain-tasks=")) options.chainTasks = positiveInt(arg.split("=")[1], options.chainTasks); + else if (arg.startsWith("--runtime-tasks=")) options.runtimeTasks = positiveInt(arg.split("=")[1], options.runtimeTasks); + else if (arg.startsWith("--runtime-concurrency=")) { + options.runtimeConcurrency = positiveInt(arg.split("=")[1], options.runtimeConcurrency); + } else if (arg.startsWith("--poll-ms=")) options.pollMs = positiveInt(arg.split("=")[1], options.pollMs); + else if (arg.startsWith("--timeout-ms=")) options.timeoutMs = positiveInt(arg.split("=")[1], options.timeoutMs); + else if (arg === "--help" || arg === "-h") { + options.help = true; + } + } + return options; +} + +function positiveInt(value, fallback) { + const parsed = Number.parseInt(String(value || ""), 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback; +} + +function percentile(values, p) { + return values[Math.min(values.length - 1, Math.floor(values.length * p))] || 0; +} + +function listen(server, host = "127.0.0.1") { + return new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, host, () => { + server.off("error", reject); + resolve(server.address().port); + }); + }); +} + +function closeServer(server) { + return new Promise((resolve) => server.close(resolve)); +} + +function readJsonBody(request) { + return new Promise((resolve, reject) => { + let raw = ""; + request.setEncoding("utf8"); + request.on("data", (chunk) => { + raw += chunk; + }); + request.on("end", () => { + try { + resolve(raw ? JSON.parse(raw) : {}); + } catch (error) { + reject(error); + } + }); + request.on("error", reject); + }); +} + +async function waitFor(predicate, timeoutMs) { + const started = Date.now(); + while (Date.now() - started < timeoutMs) { + if (await predicate()) return; + await new Promise((resolve) => setTimeout(resolve, 25)); + } + throw new Error(`stress timeout after ${timeoutMs}ms`); +} + +async function writeChainRuntimeFixtures(root) { + const browserRuntime = path.join(root, "browser-runtime.mjs"); + await writeFile( + browserRuntime, + ` +let input = ""; +process.stdin.setEncoding("utf8"); +process.stdin.on("data", chunk => input += chunk); +process.stdin.on("end", () => { + const payload = JSON.parse(input || "{}"); + const url = (payload.objective.match(/https?:\\/\\/\\S+/) || [])[0] || "https://example.com"; + process.stdout.write(JSON.stringify({ + status: "completed", + requestId: payload.requestId, + replyBody: "browser ok " + payload.requestId, + targetUrl: url, + executionSummary: "stress-browser-ok" + }) + "\\n"); +}); +`, + "utf8", + ); + + const computerRuntime = path.join(root, "computer-runtime.mjs"); + await writeFile( + computerRuntime, + ` +let input = ""; +process.stdin.setEncoding("utf8"); +process.stdin.on("data", chunk => input += chunk); +process.stdin.on("end", () => { + const payload = JSON.parse(input || "{}"); + if (String(payload.objective || "").includes("dialog")) { + process.stdout.write(JSON.stringify({ + status: "needs_user_action", + requestId: payload.requestId, + kind: "dialog_intervention_required", + dialogId: "stress-dialog-" + payload.requestId, + appName: "System Settings", + platform: "darwin", + risk: "high", + summary: "stress dialog requires user action " + payload.requestId, + recommendedAction: "handled_on_device", + availableActions: ["handled_on_device", "cancel_task"] + }) + "\\n"); + return; + } + process.stdout.write(JSON.stringify({ + status: "completed", + requestId: payload.requestId, + replyBody: "desktop ok " + payload.requestId, + targetApp: "Finder", + executionSummary: "stress-desktop-ok" + }) + "\\n"); +}); +`, + "utf8", + ); + + return { browserRuntime, computerRuntime }; +} + +function buildChainTasks(totalTasks) { + return Array.from({ length: totalTasks }, (_, index) => { + const n = index + 1; + const isDialog = n % 10 === 0; + const isBrowser = !isDialog && n % 2 === 0; + return { + taskId: `stress-task-${String(n).padStart(3, "0")}`, + taskType: isBrowser ? "browser_control" : "desktop_control", + projectId: "master-agent", + requestText: isDialog + ? `open system settings dialog ${n}` + : isBrowser + ? `open https://example.com/stress/${n}` + : `open Finder action ${n}`, + executionPrompt: "", + requestedByAccount: "krisolo", + deviceId: "mac-studio", + dispatchExecutionId: `stress-dispatch-${n}`, + targetThreadId: `stress-thread-${n}`, + requestedAt: new Date().toISOString(), + riskLevel: isDialog ? "high" : "medium", + }; + }); +} + +async function runChainStress(options) { + const runtimeRoot = await mkdtemp(path.join(os.tmpdir(), "boss-remote-control-stress-")); + const skillsDir = path.join(runtimeRoot, "skills"); + await mkdir(skillsDir, { recursive: true }); + const { browserRuntime, computerRuntime } = await writeChainRuntimeFixtures(runtimeRoot); + const tasks = buildChainTasks(options.chainTasks); + const pending = [...tasks]; + const claimedAt = new Map(); + const completions = []; + const appLogs = []; + let claimRequests = 0; + let heartbeatRequests = 0; + let skillRequests = 0; + + const controlPlane = createServer(async (request, response) => { + const url = request.url || ""; + try { + if (request.method === "POST" && url === "/api/v1/master-agent/tasks/claim") { + claimRequests += 1; + const task = pending.shift() || null; + if (task) claimedAt.set(task.taskId, Date.now()); + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true, task })); + return; + } + const completeMatch = url.match(/^\/api\/v1\/master-agent\/tasks\/([^/]+)\/complete$/); + if (request.method === "POST" && completeMatch) { + const body = await readJsonBody(request); + completions.push({ taskId: completeMatch[1], body, receivedAt: Date.now() }); + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true })); + return; + } + if (request.method === "POST" && url === "/api/device-heartbeat") { + heartbeatRequests += 1; + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true, token: "stress-server-token" })); + return; + } + if (request.method === "POST" && url === "/api/v1/devices/mac-studio/skills") { + skillRequests += 1; + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true })); + return; + } + if (request.method === "POST" && url === "/api/v1/app-logs") { + appLogs.push(await readJsonBody(request)); + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true })); + return; + } + response.writeHead(404, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: false, url })); + } catch (error) { + response.writeHead(500, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: false, error: error.message })); + } + }); + + const controlPort = await listen(controlPlane); + const probe = createServer(); + const agentPort = await listen(probe); + await closeServer(probe); + + const configPath = path.join(runtimeRoot, "local-agent-config.json"); + await writeFile( + configPath, + JSON.stringify({ + port: agentPort, + bindHost: "127.0.0.1", + controlPlaneUrl: `http://127.0.0.1:${controlPort}`, + deviceId: "mac-studio", + token: "stress-local-token", + name: "Mac Studio Stress", + account: "krisolo", + status: "online", + codexSessionDiscoveryEnabled: false, + projects: ["master-agent"], + skillsDir, + masterAgentEnabled: true, + masterAgentPollIntervalMs: options.pollMs, + heartbeatIntervalMs: 60_000, + skillLifecycleEnabled: false, + browserControlEnabled: true, + browserControlCommand: process.execPath, + browserControlArgs: [browserRuntime], + browserControlWorkdir: repoRoot, + browserControlTimeoutMs: 5_000, + computerUseEnabled: true, + computerUseCommand: process.execPath, + computerUseArgs: [computerRuntime], + computerUseWorkdir: repoRoot, + computerUseTimeoutMs: 5_000, + }), + ); + + const child = spawn(process.execPath, ["local-agent/server.mjs", configPath], { + cwd: repoRoot, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }); + let stderr = ""; + child.stderr.setEncoding("utf8"); + child.stderr.on("data", (chunk) => { + stderr += chunk; + }); + + const started = Date.now(); + try { + await waitFor(() => completions.length === options.chainTasks, options.timeoutMs); + const durationMs = Date.now() - started; + return summarizeChainStress({ + totalTasks: options.chainTasks, + durationMs, + completions, + tasks, + claimedAt, + claimRequests, + heartbeatRequests, + skillRequests, + appLogs, + stderr, + }); + } finally { + child.kill("SIGTERM"); + await closeServer(controlPlane).catch(() => null); + await rm(runtimeRoot, { recursive: true, force: true }).catch(() => null); + } +} + +function summarizeChainStress(input) { + const completed = input.completions.filter((item) => item.body.status === "completed"); + const waiting = input.completions.filter((item) => item.body.status === "needs_user_action"); + const failed = input.completions.filter((item) => item.body.status === "failed"); + const missing = input.tasks.filter( + (task) => !input.completions.some((item) => item.taskId === task.taskId), + ); + const duplicateCount = + input.completions.length - new Set(input.completions.map((item) => item.taskId)).size; + const latencies = input.completions + .map((item) => item.receivedAt - (input.claimedAt.get(item.taskId) || item.receivedAt)) + .sort((a, b) => a - b); + const invalidDialog = waiting.filter( + (item) => + item.body.kind !== "dialog_intervention_required" || + !Array.isArray(item.body.availableActions), + ); + const invalidCompleted = completed.filter((item) => !item.body.replyBody); + + return { + name: "chain", + totalTasks: input.totalTasks, + durationMs: input.durationMs, + throughputPerSec: Number((input.totalTasks / (input.durationMs / 1000)).toFixed(2)), + completed: completed.length, + waitingUserAction: waiting.length, + failed: failed.length, + missing: missing.length, + duplicateCount, + claimRequests: input.claimRequests, + heartbeatRequests: input.heartbeatRequests, + skillRequests: input.skillRequests, + appLogs: input.appLogs.length, + latencyMs: { + min: latencies[0] || 0, + p50: percentile(latencies, 0.5), + p95: percentile(latencies, 0.95), + max: latencies.at(-1) || 0, + }, + invalidDialog: invalidDialog.length, + invalidCompleted: invalidCompleted.length, + stderrTail: input.stderr.trim().slice(-500), + }; +} + +async function runRuntimeStress(options) { + const total = options.runtimeTasks; + const concurrency = Math.min(options.runtimeConcurrency, total); + let next = 0; + let active = 0; + const results = []; + const started = Date.now(); + + async function runOne(index) { + const n = index + 1; + if (n % 3 === 0) { + return executeComputerUseTask( + { + taskId: `runtime-desktop-${n}`, + taskType: "desktop_control", + requestText: `open Finder ${n}`, + projectId: "master-agent", + targetThreadId: `thread-${n}`, + requestedByAccount: "krisolo", + }, + { + computerUseEnabled: true, + computerUseCommand: process.execPath, + computerUseArgs: ["tests/fixtures/computer-use-runtime.mjs"], + computerUseWorkdir: repoRoot, + computerUseTimeoutMs: 5000, + }, + ); + } + return executeBrowserControlTask( + { + taskId: `runtime-browser-${n}`, + taskType: "browser_control", + requestText: `open https://example.com/runtime/${n}`, + projectId: "master-agent", + targetThreadId: `thread-${n}`, + requestedByAccount: "krisolo", + }, + { + browserControlEnabled: true, + browserControlCommand: process.execPath, + browserControlArgs: ["tests/fixtures/browser-control-runtime.mjs"], + browserControlWorkdir: repoRoot, + browserControlTimeoutMs: 5000, + }, + ); + } + + await new Promise((resolve) => { + const pump = () => { + while (active < concurrency && next < total) { + const index = next; + next += 1; + active += 1; + const taskStarted = Date.now(); + runOne(index) + .then((result) => results.push({ ok: true, result, latencyMs: Date.now() - taskStarted })) + .catch((error) => results.push({ ok: false, error: error.message, latencyMs: Date.now() - taskStarted })) + .finally(() => { + active -= 1; + if (results.length === total) resolve(); + else pump(); + }); + } + }; + pump(); + }); + + const durationMs = Date.now() - started; + const failed = results.filter((item) => !item.ok || item.result?.status === "failed"); + const completed = results.filter((item) => item.ok && item.result?.status === "completed"); + const latencies = results.map((item) => item.latencyMs).sort((a, b) => a - b); + return { + name: "runtime", + total, + concurrency, + durationMs, + throughputPerSec: Number((total / (durationMs / 1000)).toFixed(2)), + completed: completed.length, + failed: failed.length, + latencyMs: { + min: latencies[0] || 0, + p50: percentile(latencies, 0.5), + p95: percentile(latencies, 0.95), + max: latencies.at(-1) || 0, + }, + firstFailure: failed[0] || null, + }; +} + +function hasFailure(summary) { + if (summary.name === "chain") { + return ( + summary.failed > 0 || + summary.missing > 0 || + summary.duplicateCount > 0 || + summary.invalidDialog > 0 || + summary.invalidCompleted > 0 + ); + } + return summary.failed > 0; +} + +function printHelp() { + console.log(`Usage: node scripts/stress-remote-control.mjs [options] + +Options: + --chain-tasks=N local-agent chain tasks, default 80 + --runtime-tasks=N direct runtime tasks, default 240 + --runtime-concurrency=N direct runtime concurrency, default 24 + --poll-ms=N local-agent task poll interval, default 5 + --timeout-ms=N chain stress timeout, default 45000 + --skip-chain skip local-agent chain stress + --skip-runtime skip direct runtime stress +`); +} + +const options = parseArgs(process.argv.slice(2)); +if (options.help) { + printHelp(); + process.exit(0); +} + +const summaries = []; +if (!options.skipChain) { + summaries.push(await runChainStress(options)); +} +if (!options.skipRuntime) { + summaries.push(await runRuntimeStress(options)); +} + +console.log(JSON.stringify({ ok: summaries.every((summary) => !hasFailure(summary)), summaries }, null, 2)); +if (summaries.some(hasFailure)) { + process.exitCode = 1; +} diff --git a/tests/fixtures/computer-use-dialog-runtime.mjs b/tests/fixtures/computer-use-dialog-runtime.mjs new file mode 100644 index 0000000..3b4523d --- /dev/null +++ b/tests/fixtures/computer-use-dialog-runtime.mjs @@ -0,0 +1,24 @@ +let input = ""; + +process.stdin.setEncoding("utf8"); +process.stdin.on("data", (chunk) => { + input += chunk; +}); + +process.stdin.on("end", () => { + const payload = JSON.parse(input || "{}"); + process.stdout.write( + `${JSON.stringify({ + status: "needs_user_action", + requestId: payload.requestId, + kind: "dialog_intervention_required", + dialogId: "dialog-system-permission", + appName: "System Settings", + platform: "darwin", + risk: "high", + summary: "System Settings 弹窗需要用户确认。", + recommendedAction: "handled_on_device", + availableActions: ["handled_on_device", "cancel_task"], + })}\n`, + ); +}); diff --git a/tests/local-agent-master-task-completion.test.mjs b/tests/local-agent-master-task-completion.test.mjs new file mode 100644 index 0000000..37bcb85 --- /dev/null +++ b/tests/local-agent-master-task-completion.test.mjs @@ -0,0 +1,69 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { + buildComputerUseCompletionPayload, + buildMasterAgentTaskCompletionRequestBody, + buildRemoteExecutionCompletionPayload, +} from "../local-agent/master-task-completion.mjs"; + +test("computer use needs_user_action is preserved for server dialog guard completion", () => { + const task = { + taskId: "desktop-task-dialog", + taskType: "desktop_control", + projectId: "master-agent", + targetThreadId: "thread-1", + dispatchExecutionId: "dispatch-1", + }; + const runtimeResult = { + status: "needs_user_action", + requestId: "runtime-request-1", + kind: "dialog_intervention_required", + dialogId: "dialog-permission-1", + appName: "System Settings", + platform: "darwin", + risk: "high", + summary: "System Settings 弹窗需要用户确认。", + recommendedAction: "handled_on_device", + availableActions: ["handled_on_device", "cancel_task"], + }; + + const completion = buildComputerUseCompletionPayload(task, runtimeResult); + + assert.equal(completion.status, "needs_user_action"); + assert.equal(completion.taskId, "desktop-task-dialog"); + assert.equal(completion.kind, "dialog_intervention_required"); + assert.equal(completion.dialogId, "dialog-permission-1"); + assert.equal(completion.summary, "System Settings 弹窗需要用户确认。"); + assert.deepEqual(completion.availableActions, ["handled_on_device", "cancel_task"]); + + const requestBody = buildMasterAgentTaskCompletionRequestBody( + { deviceId: "mac-studio" }, + completion, + ); + + assert.equal(requestBody.deviceId, "mac-studio"); + assert.equal(requestBody.status, "needs_user_action"); + assert.equal(requestBody.kind, "dialog_intervention_required"); + assert.equal(requestBody.dialogId, "dialog-permission-1"); + assert.equal(requestBody.appName, "System Settings"); + assert.equal(requestBody.platform, "darwin"); + assert.equal(requestBody.risk, "high"); + assert.deepEqual(requestBody.availableActions, ["handled_on_device", "cancel_task"]); +}); + +test("remote execution completion payload does not coerce waiting state into completed", () => { + const payload = buildRemoteExecutionCompletionPayload( + { taskId: "desktop-task-dialog" }, + { + status: "needs_user_action", + requestId: "runtime-request-2", + kind: "dialog_intervention_required", + dialogId: "dialog-2", + summary: "需要确认。", + }, + ); + + assert.equal(payload.status, "needs_user_action"); + assert.equal(payload.kind, "dialog_intervention_required"); + assert.equal(payload.dialogId, "dialog-2"); +}); diff --git a/tests/local-agent-server-computer-use-dialog-flow.test.mjs b/tests/local-agent-server-computer-use-dialog-flow.test.mjs new file mode 100644 index 0000000..483677e --- /dev/null +++ b/tests/local-agent-server-computer-use-dialog-flow.test.mjs @@ -0,0 +1,167 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { spawn } from "node:child_process"; +import { createServer } from "node:http"; +import os from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); + +function listen(server, host = "127.0.0.1") { + return new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, host, () => { + server.off("error", reject); + resolve(server.address().port); + }); + }); +} + +function readJsonBody(request) { + return new Promise((resolve, reject) => { + let raw = ""; + request.setEncoding("utf8"); + request.on("data", (chunk) => { + raw += chunk; + }); + request.on("end", () => { + try { + resolve(raw ? JSON.parse(raw) : {}); + } catch (error) { + reject(error); + } + }); + request.on("error", reject); + }); +} + +async function waitFor(predicate, timeoutMs = 5000) { + const started = Date.now(); + while (Date.now() - started < timeoutMs) { + if (await predicate()) return; + await new Promise((resolve) => setTimeout(resolve, 50)); + } + throw new Error("waitFor timeout"); +} + +test("local-agent forwards computer-use dialog intervention to control plane instead of completing task", async () => { + const runtimeRoot = await mkdtemp(path.join(os.tmpdir(), "boss-local-agent-dialog-flow-")); + const skillsDir = path.join(runtimeRoot, "skills"); + await mkdir(skillsDir, { recursive: true }); + + const completeBodies = []; + let claimCount = 0; + const controlPlane = createServer(async (request, response) => { + const url = request.url || ""; + if (request.method === "POST" && url === "/api/v1/master-agent/tasks/claim") { + claimCount += 1; + response.writeHead(200, { "content-type": "application/json" }); + response.end( + JSON.stringify({ + ok: true, + task: + claimCount === 1 + ? { + taskId: "desktop-dialog-task", + taskType: "desktop_control", + projectId: "master-agent", + requestText: "打开系统设置并处理权限弹窗", + requestedByAccount: "krisolo", + deviceId: "mac-studio", + dispatchExecutionId: "dispatch-dialog-task", + targetThreadId: "thread-dialog", + requestedAt: "2026-05-11T10:00:00.000Z", + riskLevel: "high", + } + : null, + }), + ); + return; + } + if ( + request.method === "POST" && + url === "/api/v1/master-agent/tasks/desktop-dialog-task/complete" + ) { + completeBodies.push(await readJsonBody(request)); + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true })); + return; + } + if (request.method === "POST" && url === "/api/device-heartbeat") { + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true, token: "server-token" })); + return; + } + if (request.method === "POST" && url === "/api/v1/app-logs") { + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true })); + return; + } + if (request.method === "POST" && url === "/api/v1/devices/mac-studio/skills") { + response.writeHead(200, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: true })); + return; + } + response.writeHead(404, { "content-type": "application/json" }); + response.end(JSON.stringify({ ok: false, message: "not_found", url })); + }); + + const controlPort = await listen(controlPlane); + const agentServer = createServer(); + const agentPort = await listen(agentServer); + await new Promise((resolve) => agentServer.close(resolve)); + + const configPath = path.join(runtimeRoot, "local-agent-config.json"); + await writeFile( + configPath, + JSON.stringify({ + port: agentPort, + bindHost: "127.0.0.1", + controlPlaneUrl: `http://127.0.0.1:${controlPort}`, + deviceId: "mac-studio", + token: "local-token", + name: "Mac Studio", + account: "krisolo", + status: "online", + codexSessionDiscoveryEnabled: false, + skillsDir, + masterAgentEnabled: true, + masterAgentPollIntervalMs: 60_000, + heartbeatIntervalMs: 60_000, + skillLifecycleEnabled: false, + computerUseEnabled: true, + computerUseCommand: process.execPath, + computerUseArgs: ["tests/fixtures/computer-use-dialog-runtime.mjs"], + computerUseWorkdir: repoRoot, + computerUseTimeoutMs: 5000, + }), + ); + + const child = spawn(process.execPath, ["local-agent/server.mjs", configPath], { + cwd: repoRoot, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }); + + try { + await waitFor(() => completeBodies.length > 0); + const body = completeBodies.at(0); + assert.equal(body.status, "needs_user_action"); + assert.equal(body.kind, "dialog_intervention_required"); + assert.equal(body.dialogId, "dialog-system-permission"); + assert.equal(body.appName, "System Settings"); + assert.equal(body.platform, "darwin"); + assert.equal(body.risk, "high"); + assert.equal(body.summary, "System Settings 弹窗需要用户确认。"); + assert.deepEqual(body.availableActions, ["handled_on_device", "cancel_task"]); + assert.equal(body.dispatchExecutionId, "dispatch-dialog-task"); + assert.equal(body.targetThreadId, "thread-dialog"); + assert.equal(body.replyBody, undefined); + } finally { + child.kill("SIGTERM"); + controlPlane.close(); + await rm(runtimeRoot, { recursive: true, force: true }); + } +});