test: harden remote control stress flow
This commit is contained in:
100
local-agent/master-task-completion.mjs
Normal file
100
local-agent/master-task-completion.mjs
Normal file
@@ -0,0 +1,100 @@
|
||||
function trimToUndefined(value) {
|
||||
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
||||
}
|
||||
|
||||
function normalizeCompletionStatus(status) {
|
||||
if (status === "failed") return "failed";
|
||||
if (status === "needs_user_action") return "needs_user_action";
|
||||
return "completed";
|
||||
}
|
||||
|
||||
function normalizeStringArray(value) {
|
||||
return Array.isArray(value)
|
||||
? value.map((item) => String(item).trim()).filter(Boolean)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
export function buildRemoteExecutionCompletionPayload(task, payload) {
|
||||
return {
|
||||
taskId: task.taskId,
|
||||
status: normalizeCompletionStatus(payload.status),
|
||||
requestId: trimToUndefined(payload.requestId),
|
||||
replyBody: trimToUndefined(payload.replyBody),
|
||||
errorMessage: trimToUndefined(payload.errorMessage),
|
||||
kind: trimToUndefined(payload.kind),
|
||||
dialogId: trimToUndefined(payload.dialogId),
|
||||
appName: trimToUndefined(payload.appName),
|
||||
platform: trimToUndefined(payload.platform),
|
||||
risk: trimToUndefined(payload.risk),
|
||||
summary: trimToUndefined(payload.summary),
|
||||
recommendedAction: trimToUndefined(payload.recommendedAction),
|
||||
availableActions: normalizeStringArray(payload.availableActions),
|
||||
dispatchExecutionId: trimToUndefined(payload.dispatchExecutionId),
|
||||
targetProjectId: trimToUndefined(payload.targetProjectId),
|
||||
targetThreadId: trimToUndefined(payload.targetThreadId),
|
||||
targetUrl: trimToUndefined(payload.targetUrl),
|
||||
targetApp: trimToUndefined(payload.targetApp),
|
||||
rawThreadReply: trimToUndefined(payload.rawThreadReply),
|
||||
executionProgress:
|
||||
payload.executionProgress && typeof payload.executionProgress === "object"
|
||||
? payload.executionProgress
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildComputerUseCompletionPayload(task, result) {
|
||||
if (result?.status === "needs_user_action") {
|
||||
return buildRemoteExecutionCompletionPayload(task, {
|
||||
status: "needs_user_action",
|
||||
requestId: result.requestId,
|
||||
kind: result.kind,
|
||||
dialogId: result.dialogId,
|
||||
appName: result.appName,
|
||||
platform: result.platform,
|
||||
risk: result.risk,
|
||||
summary: result.summary,
|
||||
recommendedAction: result.recommendedAction,
|
||||
availableActions: result.availableActions,
|
||||
dispatchExecutionId: task.dispatchExecutionId,
|
||||
targetProjectId: task.targetProjectId,
|
||||
targetThreadId: task.targetThreadId,
|
||||
targetApp: result.targetApp ?? result.appName,
|
||||
});
|
||||
}
|
||||
|
||||
return buildRemoteExecutionCompletionPayload(task, {
|
||||
status: result?.status === "failed" ? "failed" : "completed",
|
||||
requestId: result?.requestId,
|
||||
replyBody: result?.replyBody,
|
||||
errorMessage: result?.errorMessage,
|
||||
dispatchExecutionId: task.dispatchExecutionId,
|
||||
targetProjectId: task.targetProjectId,
|
||||
targetThreadId: task.targetThreadId,
|
||||
targetApp: result?.targetApp,
|
||||
});
|
||||
}
|
||||
|
||||
export function buildMasterAgentTaskCompletionRequestBody(config, payload) {
|
||||
return {
|
||||
deviceId: config.deviceId,
|
||||
status: payload.status,
|
||||
replyBody: payload.replyBody,
|
||||
errorMessage: payload.errorMessage,
|
||||
requestId: payload.requestId,
|
||||
kind: payload.kind,
|
||||
dialogId: payload.dialogId,
|
||||
appName: payload.appName,
|
||||
platform: payload.platform,
|
||||
risk: payload.risk,
|
||||
summary: payload.summary,
|
||||
recommendedAction: payload.recommendedAction,
|
||||
availableActions: payload.availableActions,
|
||||
dispatchExecutionId: payload.dispatchExecutionId,
|
||||
targetProjectId: payload.targetProjectId,
|
||||
targetThreadId: payload.targetThreadId,
|
||||
targetUrl: payload.targetUrl,
|
||||
targetApp: payload.targetApp,
|
||||
rawThreadReply: payload.rawThreadReply,
|
||||
executionProgress: payload.executionProgress,
|
||||
};
|
||||
}
|
||||
@@ -38,6 +38,11 @@ import {
|
||||
resolveMasterAgentTaskTimeoutMs,
|
||||
runWithTaskTimeout,
|
||||
} from "./master-task-timeout.mjs";
|
||||
import {
|
||||
buildComputerUseCompletionPayload,
|
||||
buildMasterAgentTaskCompletionRequestBody,
|
||||
buildRemoteExecutionCompletionPayload,
|
||||
} from "./master-task-completion.mjs";
|
||||
import { createSerializedRunner } from "./serialized-runner.mjs";
|
||||
|
||||
async function loadConfig(configPath) {
|
||||
@@ -346,20 +351,7 @@ async function completeMasterAgentTask(config, runtime, payload) {
|
||||
"Content-Type": "application/json",
|
||||
...deviceTokenHeaders(config, runtime),
|
||||
},
|
||||
body: JSON.stringify({
|
||||
deviceId: config.deviceId,
|
||||
status: payload.status,
|
||||
replyBody: payload.replyBody,
|
||||
errorMessage: payload.errorMessage,
|
||||
requestId: payload.requestId,
|
||||
dispatchExecutionId: payload.dispatchExecutionId,
|
||||
targetProjectId: payload.targetProjectId,
|
||||
targetThreadId: payload.targetThreadId,
|
||||
targetUrl: payload.targetUrl,
|
||||
targetApp: payload.targetApp,
|
||||
rawThreadReply: payload.rawThreadReply,
|
||||
executionProgress: payload.executionProgress,
|
||||
}),
|
||||
body: JSON.stringify(buildMasterAgentTaskCompletionRequestBody(config, payload)),
|
||||
},
|
||||
);
|
||||
|
||||
@@ -450,32 +442,6 @@ function parseDispatchExecutionCompletion(rawOutput) {
|
||||
};
|
||||
}
|
||||
|
||||
function buildRemoteExecutionCompletionPayload(task, payload) {
|
||||
return {
|
||||
taskId: task.taskId,
|
||||
status: payload.status === "failed" ? "failed" : "completed",
|
||||
requestId: payload.requestId,
|
||||
replyBody: typeof payload.replyBody === "string" ? payload.replyBody.trim() || undefined : undefined,
|
||||
errorMessage: typeof payload.errorMessage === "string" ? payload.errorMessage.trim() || undefined : undefined,
|
||||
dispatchExecutionId:
|
||||
typeof payload.dispatchExecutionId === "string" ? payload.dispatchExecutionId.trim() || undefined : undefined,
|
||||
targetProjectId:
|
||||
typeof payload.targetProjectId === "string" ? payload.targetProjectId.trim() || undefined : undefined,
|
||||
targetThreadId:
|
||||
typeof payload.targetThreadId === "string" ? payload.targetThreadId.trim() || undefined : undefined,
|
||||
targetUrl:
|
||||
typeof payload.targetUrl === "string" ? payload.targetUrl.trim() || undefined : undefined,
|
||||
targetApp:
|
||||
typeof payload.targetApp === "string" ? payload.targetApp.trim() || undefined : undefined,
|
||||
rawThreadReply:
|
||||
typeof payload.rawThreadReply === "string" ? payload.rawThreadReply.trim() || undefined : undefined,
|
||||
executionProgress:
|
||||
payload.executionProgress && typeof payload.executionProgress === "object"
|
||||
? payload.executionProgress
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function runShortCommand(command, args, options = {}) {
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(command, args, {
|
||||
@@ -592,6 +558,11 @@ async function runMasterAgentTask(config, runtime, task) {
|
||||
if (computerUseResult.status === "failed") {
|
||||
throw new Error(computerUseResult.errorMessage || "COMPUTER_USE_FAILED");
|
||||
}
|
||||
if (computerUseResult.status === "needs_user_action") {
|
||||
return {
|
||||
waitingUserActionCompletion: buildComputerUseCompletionPayload(task, computerUseResult),
|
||||
};
|
||||
}
|
||||
return {
|
||||
replyBody: computerUseResult.replyBody,
|
||||
dispatchExecutionCompletion: {
|
||||
@@ -707,6 +678,31 @@ async function runMasterAgentTask(config, runtime, task) {
|
||||
: null,
|
||||
};
|
||||
})();
|
||||
if (executionResult.waitingUserActionCompletion) {
|
||||
const completion = await completeMasterAgentTask(
|
||||
config,
|
||||
runtime,
|
||||
executionResult.waitingUserActionCompletion,
|
||||
);
|
||||
if (!completion.ok) {
|
||||
throw new Error(`DIALOG_GUARD_COMPLETION_FAILED:${completion.status}:${completion.body}`);
|
||||
}
|
||||
runtime.activeMasterTask = {
|
||||
taskId: task.taskId,
|
||||
status: "needs_user_action",
|
||||
completedAt: new Date().toISOString(),
|
||||
detail: completion.body,
|
||||
};
|
||||
await postAppLog(config, runtime, {
|
||||
projectId: "master-agent",
|
||||
level: "info",
|
||||
category: "local_agent.desktop_dialog_guard_waiting_user_action",
|
||||
message: `Master Codex Node 等待用户处理桌面弹窗:${task.taskId}`,
|
||||
detail: executionResult.waitingUserActionCompletion.summary,
|
||||
mirrorToMaster: false,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const { replyBody, dispatchExecutionCompletion, executionProgress } = executionResult;
|
||||
|
||||
const completion = await completeMasterAgentTask(
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
"admin:web:dev": "cd apps/boss-admin-web && npm run dev",
|
||||
"admin:web:build": "cd apps/boss-admin-web && npm run build",
|
||||
"admin:web:publish": "npm --prefix apps/boss-admin-web run build",
|
||||
"stress:remote-control": "node scripts/stress-remote-control.mjs",
|
||||
"test:master-agent-controls": "tsx --test tests/master-agent-chat-controls.test.ts",
|
||||
"apk:debug": "cd android && ./gradlew assembleDebug && cd .. && zsh ./scripts/publish-apk-to-public.sh",
|
||||
"apk:release": "zsh ./scripts/build-release-apk.sh",
|
||||
|
||||
487
scripts/stress-remote-control.mjs
Executable file
487
scripts/stress-remote-control.mjs
Executable file
@@ -0,0 +1,487 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { createServer } from "node:http";
|
||||
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { executeBrowserControlTask } from "../local-agent/browser-control-task-runner.mjs";
|
||||
import { executeComputerUseTask } from "../local-agent/computer-use-task-runner.mjs";
|
||||
|
||||
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
|
||||
|
||||
function parseArgs(argv) {
|
||||
const options = {
|
||||
chainTasks: 80,
|
||||
runtimeTasks: 240,
|
||||
runtimeConcurrency: 24,
|
||||
pollMs: 5,
|
||||
timeoutMs: 45_000,
|
||||
skipChain: false,
|
||||
skipRuntime: false,
|
||||
};
|
||||
|
||||
for (const arg of argv) {
|
||||
if (arg === "--skip-chain") options.skipChain = true;
|
||||
else if (arg === "--skip-runtime") options.skipRuntime = true;
|
||||
else if (arg.startsWith("--chain-tasks=")) options.chainTasks = positiveInt(arg.split("=")[1], options.chainTasks);
|
||||
else if (arg.startsWith("--runtime-tasks=")) options.runtimeTasks = positiveInt(arg.split("=")[1], options.runtimeTasks);
|
||||
else if (arg.startsWith("--runtime-concurrency=")) {
|
||||
options.runtimeConcurrency = positiveInt(arg.split("=")[1], options.runtimeConcurrency);
|
||||
} else if (arg.startsWith("--poll-ms=")) options.pollMs = positiveInt(arg.split("=")[1], options.pollMs);
|
||||
else if (arg.startsWith("--timeout-ms=")) options.timeoutMs = positiveInt(arg.split("=")[1], options.timeoutMs);
|
||||
else if (arg === "--help" || arg === "-h") {
|
||||
options.help = true;
|
||||
}
|
||||
}
|
||||
return options;
|
||||
}
|
||||
|
||||
function positiveInt(value, fallback) {
|
||||
const parsed = Number.parseInt(String(value || ""), 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
||||
}
|
||||
|
||||
function percentile(values, p) {
|
||||
return values[Math.min(values.length - 1, Math.floor(values.length * p))] || 0;
|
||||
}
|
||||
|
||||
function listen(server, host = "127.0.0.1") {
|
||||
return new Promise((resolve, reject) => {
|
||||
server.once("error", reject);
|
||||
server.listen(0, host, () => {
|
||||
server.off("error", reject);
|
||||
resolve(server.address().port);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function closeServer(server) {
|
||||
return new Promise((resolve) => server.close(resolve));
|
||||
}
|
||||
|
||||
function readJsonBody(request) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let raw = "";
|
||||
request.setEncoding("utf8");
|
||||
request.on("data", (chunk) => {
|
||||
raw += chunk;
|
||||
});
|
||||
request.on("end", () => {
|
||||
try {
|
||||
resolve(raw ? JSON.parse(raw) : {});
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
request.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function waitFor(predicate, timeoutMs) {
|
||||
const started = Date.now();
|
||||
while (Date.now() - started < timeoutMs) {
|
||||
if (await predicate()) return;
|
||||
await new Promise((resolve) => setTimeout(resolve, 25));
|
||||
}
|
||||
throw new Error(`stress timeout after ${timeoutMs}ms`);
|
||||
}
|
||||
|
||||
async function writeChainRuntimeFixtures(root) {
|
||||
const browserRuntime = path.join(root, "browser-runtime.mjs");
|
||||
await writeFile(
|
||||
browserRuntime,
|
||||
`
|
||||
let input = "";
|
||||
process.stdin.setEncoding("utf8");
|
||||
process.stdin.on("data", chunk => input += chunk);
|
||||
process.stdin.on("end", () => {
|
||||
const payload = JSON.parse(input || "{}");
|
||||
const url = (payload.objective.match(/https?:\\/\\/\\S+/) || [])[0] || "https://example.com";
|
||||
process.stdout.write(JSON.stringify({
|
||||
status: "completed",
|
||||
requestId: payload.requestId,
|
||||
replyBody: "browser ok " + payload.requestId,
|
||||
targetUrl: url,
|
||||
executionSummary: "stress-browser-ok"
|
||||
}) + "\\n");
|
||||
});
|
||||
`,
|
||||
"utf8",
|
||||
);
|
||||
|
||||
const computerRuntime = path.join(root, "computer-runtime.mjs");
|
||||
await writeFile(
|
||||
computerRuntime,
|
||||
`
|
||||
let input = "";
|
||||
process.stdin.setEncoding("utf8");
|
||||
process.stdin.on("data", chunk => input += chunk);
|
||||
process.stdin.on("end", () => {
|
||||
const payload = JSON.parse(input || "{}");
|
||||
if (String(payload.objective || "").includes("dialog")) {
|
||||
process.stdout.write(JSON.stringify({
|
||||
status: "needs_user_action",
|
||||
requestId: payload.requestId,
|
||||
kind: "dialog_intervention_required",
|
||||
dialogId: "stress-dialog-" + payload.requestId,
|
||||
appName: "System Settings",
|
||||
platform: "darwin",
|
||||
risk: "high",
|
||||
summary: "stress dialog requires user action " + payload.requestId,
|
||||
recommendedAction: "handled_on_device",
|
||||
availableActions: ["handled_on_device", "cancel_task"]
|
||||
}) + "\\n");
|
||||
return;
|
||||
}
|
||||
process.stdout.write(JSON.stringify({
|
||||
status: "completed",
|
||||
requestId: payload.requestId,
|
||||
replyBody: "desktop ok " + payload.requestId,
|
||||
targetApp: "Finder",
|
||||
executionSummary: "stress-desktop-ok"
|
||||
}) + "\\n");
|
||||
});
|
||||
`,
|
||||
"utf8",
|
||||
);
|
||||
|
||||
return { browserRuntime, computerRuntime };
|
||||
}
|
||||
|
||||
function buildChainTasks(totalTasks) {
|
||||
return Array.from({ length: totalTasks }, (_, index) => {
|
||||
const n = index + 1;
|
||||
const isDialog = n % 10 === 0;
|
||||
const isBrowser = !isDialog && n % 2 === 0;
|
||||
return {
|
||||
taskId: `stress-task-${String(n).padStart(3, "0")}`,
|
||||
taskType: isBrowser ? "browser_control" : "desktop_control",
|
||||
projectId: "master-agent",
|
||||
requestText: isDialog
|
||||
? `open system settings dialog ${n}`
|
||||
: isBrowser
|
||||
? `open https://example.com/stress/${n}`
|
||||
: `open Finder action ${n}`,
|
||||
executionPrompt: "",
|
||||
requestedByAccount: "krisolo",
|
||||
deviceId: "mac-studio",
|
||||
dispatchExecutionId: `stress-dispatch-${n}`,
|
||||
targetThreadId: `stress-thread-${n}`,
|
||||
requestedAt: new Date().toISOString(),
|
||||
riskLevel: isDialog ? "high" : "medium",
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function runChainStress(options) {
|
||||
const runtimeRoot = await mkdtemp(path.join(os.tmpdir(), "boss-remote-control-stress-"));
|
||||
const skillsDir = path.join(runtimeRoot, "skills");
|
||||
await mkdir(skillsDir, { recursive: true });
|
||||
const { browserRuntime, computerRuntime } = await writeChainRuntimeFixtures(runtimeRoot);
|
||||
const tasks = buildChainTasks(options.chainTasks);
|
||||
const pending = [...tasks];
|
||||
const claimedAt = new Map();
|
||||
const completions = [];
|
||||
const appLogs = [];
|
||||
let claimRequests = 0;
|
||||
let heartbeatRequests = 0;
|
||||
let skillRequests = 0;
|
||||
|
||||
const controlPlane = createServer(async (request, response) => {
|
||||
const url = request.url || "";
|
||||
try {
|
||||
if (request.method === "POST" && url === "/api/v1/master-agent/tasks/claim") {
|
||||
claimRequests += 1;
|
||||
const task = pending.shift() || null;
|
||||
if (task) claimedAt.set(task.taskId, Date.now());
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true, task }));
|
||||
return;
|
||||
}
|
||||
const completeMatch = url.match(/^\/api\/v1\/master-agent\/tasks\/([^/]+)\/complete$/);
|
||||
if (request.method === "POST" && completeMatch) {
|
||||
const body = await readJsonBody(request);
|
||||
completions.push({ taskId: completeMatch[1], body, receivedAt: Date.now() });
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true }));
|
||||
return;
|
||||
}
|
||||
if (request.method === "POST" && url === "/api/device-heartbeat") {
|
||||
heartbeatRequests += 1;
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true, token: "stress-server-token" }));
|
||||
return;
|
||||
}
|
||||
if (request.method === "POST" && url === "/api/v1/devices/mac-studio/skills") {
|
||||
skillRequests += 1;
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true }));
|
||||
return;
|
||||
}
|
||||
if (request.method === "POST" && url === "/api/v1/app-logs") {
|
||||
appLogs.push(await readJsonBody(request));
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true }));
|
||||
return;
|
||||
}
|
||||
response.writeHead(404, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: false, url }));
|
||||
} catch (error) {
|
||||
response.writeHead(500, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: false, error: error.message }));
|
||||
}
|
||||
});
|
||||
|
||||
const controlPort = await listen(controlPlane);
|
||||
const probe = createServer();
|
||||
const agentPort = await listen(probe);
|
||||
await closeServer(probe);
|
||||
|
||||
const configPath = path.join(runtimeRoot, "local-agent-config.json");
|
||||
await writeFile(
|
||||
configPath,
|
||||
JSON.stringify({
|
||||
port: agentPort,
|
||||
bindHost: "127.0.0.1",
|
||||
controlPlaneUrl: `http://127.0.0.1:${controlPort}`,
|
||||
deviceId: "mac-studio",
|
||||
token: "stress-local-token",
|
||||
name: "Mac Studio Stress",
|
||||
account: "krisolo",
|
||||
status: "online",
|
||||
codexSessionDiscoveryEnabled: false,
|
||||
projects: ["master-agent"],
|
||||
skillsDir,
|
||||
masterAgentEnabled: true,
|
||||
masterAgentPollIntervalMs: options.pollMs,
|
||||
heartbeatIntervalMs: 60_000,
|
||||
skillLifecycleEnabled: false,
|
||||
browserControlEnabled: true,
|
||||
browserControlCommand: process.execPath,
|
||||
browserControlArgs: [browserRuntime],
|
||||
browserControlWorkdir: repoRoot,
|
||||
browserControlTimeoutMs: 5_000,
|
||||
computerUseEnabled: true,
|
||||
computerUseCommand: process.execPath,
|
||||
computerUseArgs: [computerRuntime],
|
||||
computerUseWorkdir: repoRoot,
|
||||
computerUseTimeoutMs: 5_000,
|
||||
}),
|
||||
);
|
||||
|
||||
const child = spawn(process.execPath, ["local-agent/server.mjs", configPath], {
|
||||
cwd: repoRoot,
|
||||
env: process.env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
let stderr = "";
|
||||
child.stderr.setEncoding("utf8");
|
||||
child.stderr.on("data", (chunk) => {
|
||||
stderr += chunk;
|
||||
});
|
||||
|
||||
const started = Date.now();
|
||||
try {
|
||||
await waitFor(() => completions.length === options.chainTasks, options.timeoutMs);
|
||||
const durationMs = Date.now() - started;
|
||||
return summarizeChainStress({
|
||||
totalTasks: options.chainTasks,
|
||||
durationMs,
|
||||
completions,
|
||||
tasks,
|
||||
claimedAt,
|
||||
claimRequests,
|
||||
heartbeatRequests,
|
||||
skillRequests,
|
||||
appLogs,
|
||||
stderr,
|
||||
});
|
||||
} finally {
|
||||
child.kill("SIGTERM");
|
||||
await closeServer(controlPlane).catch(() => null);
|
||||
await rm(runtimeRoot, { recursive: true, force: true }).catch(() => null);
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeChainStress(input) {
|
||||
const completed = input.completions.filter((item) => item.body.status === "completed");
|
||||
const waiting = input.completions.filter((item) => item.body.status === "needs_user_action");
|
||||
const failed = input.completions.filter((item) => item.body.status === "failed");
|
||||
const missing = input.tasks.filter(
|
||||
(task) => !input.completions.some((item) => item.taskId === task.taskId),
|
||||
);
|
||||
const duplicateCount =
|
||||
input.completions.length - new Set(input.completions.map((item) => item.taskId)).size;
|
||||
const latencies = input.completions
|
||||
.map((item) => item.receivedAt - (input.claimedAt.get(item.taskId) || item.receivedAt))
|
||||
.sort((a, b) => a - b);
|
||||
const invalidDialog = waiting.filter(
|
||||
(item) =>
|
||||
item.body.kind !== "dialog_intervention_required" ||
|
||||
!Array.isArray(item.body.availableActions),
|
||||
);
|
||||
const invalidCompleted = completed.filter((item) => !item.body.replyBody);
|
||||
|
||||
return {
|
||||
name: "chain",
|
||||
totalTasks: input.totalTasks,
|
||||
durationMs: input.durationMs,
|
||||
throughputPerSec: Number((input.totalTasks / (input.durationMs / 1000)).toFixed(2)),
|
||||
completed: completed.length,
|
||||
waitingUserAction: waiting.length,
|
||||
failed: failed.length,
|
||||
missing: missing.length,
|
||||
duplicateCount,
|
||||
claimRequests: input.claimRequests,
|
||||
heartbeatRequests: input.heartbeatRequests,
|
||||
skillRequests: input.skillRequests,
|
||||
appLogs: input.appLogs.length,
|
||||
latencyMs: {
|
||||
min: latencies[0] || 0,
|
||||
p50: percentile(latencies, 0.5),
|
||||
p95: percentile(latencies, 0.95),
|
||||
max: latencies.at(-1) || 0,
|
||||
},
|
||||
invalidDialog: invalidDialog.length,
|
||||
invalidCompleted: invalidCompleted.length,
|
||||
stderrTail: input.stderr.trim().slice(-500),
|
||||
};
|
||||
}
|
||||
|
||||
async function runRuntimeStress(options) {
|
||||
const total = options.runtimeTasks;
|
||||
const concurrency = Math.min(options.runtimeConcurrency, total);
|
||||
let next = 0;
|
||||
let active = 0;
|
||||
const results = [];
|
||||
const started = Date.now();
|
||||
|
||||
async function runOne(index) {
|
||||
const n = index + 1;
|
||||
if (n % 3 === 0) {
|
||||
return executeComputerUseTask(
|
||||
{
|
||||
taskId: `runtime-desktop-${n}`,
|
||||
taskType: "desktop_control",
|
||||
requestText: `open Finder ${n}`,
|
||||
projectId: "master-agent",
|
||||
targetThreadId: `thread-${n}`,
|
||||
requestedByAccount: "krisolo",
|
||||
},
|
||||
{
|
||||
computerUseEnabled: true,
|
||||
computerUseCommand: process.execPath,
|
||||
computerUseArgs: ["tests/fixtures/computer-use-runtime.mjs"],
|
||||
computerUseWorkdir: repoRoot,
|
||||
computerUseTimeoutMs: 5000,
|
||||
},
|
||||
);
|
||||
}
|
||||
return executeBrowserControlTask(
|
||||
{
|
||||
taskId: `runtime-browser-${n}`,
|
||||
taskType: "browser_control",
|
||||
requestText: `open https://example.com/runtime/${n}`,
|
||||
projectId: "master-agent",
|
||||
targetThreadId: `thread-${n}`,
|
||||
requestedByAccount: "krisolo",
|
||||
},
|
||||
{
|
||||
browserControlEnabled: true,
|
||||
browserControlCommand: process.execPath,
|
||||
browserControlArgs: ["tests/fixtures/browser-control-runtime.mjs"],
|
||||
browserControlWorkdir: repoRoot,
|
||||
browserControlTimeoutMs: 5000,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
await new Promise((resolve) => {
|
||||
const pump = () => {
|
||||
while (active < concurrency && next < total) {
|
||||
const index = next;
|
||||
next += 1;
|
||||
active += 1;
|
||||
const taskStarted = Date.now();
|
||||
runOne(index)
|
||||
.then((result) => results.push({ ok: true, result, latencyMs: Date.now() - taskStarted }))
|
||||
.catch((error) => results.push({ ok: false, error: error.message, latencyMs: Date.now() - taskStarted }))
|
||||
.finally(() => {
|
||||
active -= 1;
|
||||
if (results.length === total) resolve();
|
||||
else pump();
|
||||
});
|
||||
}
|
||||
};
|
||||
pump();
|
||||
});
|
||||
|
||||
const durationMs = Date.now() - started;
|
||||
const failed = results.filter((item) => !item.ok || item.result?.status === "failed");
|
||||
const completed = results.filter((item) => item.ok && item.result?.status === "completed");
|
||||
const latencies = results.map((item) => item.latencyMs).sort((a, b) => a - b);
|
||||
return {
|
||||
name: "runtime",
|
||||
total,
|
||||
concurrency,
|
||||
durationMs,
|
||||
throughputPerSec: Number((total / (durationMs / 1000)).toFixed(2)),
|
||||
completed: completed.length,
|
||||
failed: failed.length,
|
||||
latencyMs: {
|
||||
min: latencies[0] || 0,
|
||||
p50: percentile(latencies, 0.5),
|
||||
p95: percentile(latencies, 0.95),
|
||||
max: latencies.at(-1) || 0,
|
||||
},
|
||||
firstFailure: failed[0] || null,
|
||||
};
|
||||
}
|
||||
|
||||
function hasFailure(summary) {
|
||||
if (summary.name === "chain") {
|
||||
return (
|
||||
summary.failed > 0 ||
|
||||
summary.missing > 0 ||
|
||||
summary.duplicateCount > 0 ||
|
||||
summary.invalidDialog > 0 ||
|
||||
summary.invalidCompleted > 0
|
||||
);
|
||||
}
|
||||
return summary.failed > 0;
|
||||
}
|
||||
|
||||
function printHelp() {
|
||||
console.log(`Usage: node scripts/stress-remote-control.mjs [options]
|
||||
|
||||
Options:
|
||||
--chain-tasks=N local-agent chain tasks, default 80
|
||||
--runtime-tasks=N direct runtime tasks, default 240
|
||||
--runtime-concurrency=N direct runtime concurrency, default 24
|
||||
--poll-ms=N local-agent task poll interval, default 5
|
||||
--timeout-ms=N chain stress timeout, default 45000
|
||||
--skip-chain skip local-agent chain stress
|
||||
--skip-runtime skip direct runtime stress
|
||||
`);
|
||||
}
|
||||
|
||||
const options = parseArgs(process.argv.slice(2));
|
||||
if (options.help) {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const summaries = [];
|
||||
if (!options.skipChain) {
|
||||
summaries.push(await runChainStress(options));
|
||||
}
|
||||
if (!options.skipRuntime) {
|
||||
summaries.push(await runRuntimeStress(options));
|
||||
}
|
||||
|
||||
console.log(JSON.stringify({ ok: summaries.every((summary) => !hasFailure(summary)), summaries }, null, 2));
|
||||
if (summaries.some(hasFailure)) {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
24
tests/fixtures/computer-use-dialog-runtime.mjs
vendored
Normal file
24
tests/fixtures/computer-use-dialog-runtime.mjs
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
let input = "";
|
||||
|
||||
process.stdin.setEncoding("utf8");
|
||||
process.stdin.on("data", (chunk) => {
|
||||
input += chunk;
|
||||
});
|
||||
|
||||
process.stdin.on("end", () => {
|
||||
const payload = JSON.parse(input || "{}");
|
||||
process.stdout.write(
|
||||
`${JSON.stringify({
|
||||
status: "needs_user_action",
|
||||
requestId: payload.requestId,
|
||||
kind: "dialog_intervention_required",
|
||||
dialogId: "dialog-system-permission",
|
||||
appName: "System Settings",
|
||||
platform: "darwin",
|
||||
risk: "high",
|
||||
summary: "System Settings 弹窗需要用户确认。",
|
||||
recommendedAction: "handled_on_device",
|
||||
availableActions: ["handled_on_device", "cancel_task"],
|
||||
})}\n`,
|
||||
);
|
||||
});
|
||||
69
tests/local-agent-master-task-completion.test.mjs
Normal file
69
tests/local-agent-master-task-completion.test.mjs
Normal file
@@ -0,0 +1,69 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
buildComputerUseCompletionPayload,
|
||||
buildMasterAgentTaskCompletionRequestBody,
|
||||
buildRemoteExecutionCompletionPayload,
|
||||
} from "../local-agent/master-task-completion.mjs";
|
||||
|
||||
test("computer use needs_user_action is preserved for server dialog guard completion", () => {
|
||||
const task = {
|
||||
taskId: "desktop-task-dialog",
|
||||
taskType: "desktop_control",
|
||||
projectId: "master-agent",
|
||||
targetThreadId: "thread-1",
|
||||
dispatchExecutionId: "dispatch-1",
|
||||
};
|
||||
const runtimeResult = {
|
||||
status: "needs_user_action",
|
||||
requestId: "runtime-request-1",
|
||||
kind: "dialog_intervention_required",
|
||||
dialogId: "dialog-permission-1",
|
||||
appName: "System Settings",
|
||||
platform: "darwin",
|
||||
risk: "high",
|
||||
summary: "System Settings 弹窗需要用户确认。",
|
||||
recommendedAction: "handled_on_device",
|
||||
availableActions: ["handled_on_device", "cancel_task"],
|
||||
};
|
||||
|
||||
const completion = buildComputerUseCompletionPayload(task, runtimeResult);
|
||||
|
||||
assert.equal(completion.status, "needs_user_action");
|
||||
assert.equal(completion.taskId, "desktop-task-dialog");
|
||||
assert.equal(completion.kind, "dialog_intervention_required");
|
||||
assert.equal(completion.dialogId, "dialog-permission-1");
|
||||
assert.equal(completion.summary, "System Settings 弹窗需要用户确认。");
|
||||
assert.deepEqual(completion.availableActions, ["handled_on_device", "cancel_task"]);
|
||||
|
||||
const requestBody = buildMasterAgentTaskCompletionRequestBody(
|
||||
{ deviceId: "mac-studio" },
|
||||
completion,
|
||||
);
|
||||
|
||||
assert.equal(requestBody.deviceId, "mac-studio");
|
||||
assert.equal(requestBody.status, "needs_user_action");
|
||||
assert.equal(requestBody.kind, "dialog_intervention_required");
|
||||
assert.equal(requestBody.dialogId, "dialog-permission-1");
|
||||
assert.equal(requestBody.appName, "System Settings");
|
||||
assert.equal(requestBody.platform, "darwin");
|
||||
assert.equal(requestBody.risk, "high");
|
||||
assert.deepEqual(requestBody.availableActions, ["handled_on_device", "cancel_task"]);
|
||||
});
|
||||
|
||||
test("remote execution completion payload does not coerce waiting state into completed", () => {
|
||||
const payload = buildRemoteExecutionCompletionPayload(
|
||||
{ taskId: "desktop-task-dialog" },
|
||||
{
|
||||
status: "needs_user_action",
|
||||
requestId: "runtime-request-2",
|
||||
kind: "dialog_intervention_required",
|
||||
dialogId: "dialog-2",
|
||||
summary: "需要确认。",
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(payload.status, "needs_user_action");
|
||||
assert.equal(payload.kind, "dialog_intervention_required");
|
||||
assert.equal(payload.dialogId, "dialog-2");
|
||||
});
|
||||
167
tests/local-agent-server-computer-use-dialog-flow.test.mjs
Normal file
167
tests/local-agent-server-computer-use-dialog-flow.test.mjs
Normal file
@@ -0,0 +1,167 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { spawn } from "node:child_process";
|
||||
import { createServer } from "node:http";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||
|
||||
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
|
||||
|
||||
function listen(server, host = "127.0.0.1") {
|
||||
return new Promise((resolve, reject) => {
|
||||
server.once("error", reject);
|
||||
server.listen(0, host, () => {
|
||||
server.off("error", reject);
|
||||
resolve(server.address().port);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function readJsonBody(request) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let raw = "";
|
||||
request.setEncoding("utf8");
|
||||
request.on("data", (chunk) => {
|
||||
raw += chunk;
|
||||
});
|
||||
request.on("end", () => {
|
||||
try {
|
||||
resolve(raw ? JSON.parse(raw) : {});
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
request.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function waitFor(predicate, timeoutMs = 5000) {
|
||||
const started = Date.now();
|
||||
while (Date.now() - started < timeoutMs) {
|
||||
if (await predicate()) return;
|
||||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
}
|
||||
throw new Error("waitFor timeout");
|
||||
}
|
||||
|
||||
test("local-agent forwards computer-use dialog intervention to control plane instead of completing task", async () => {
|
||||
const runtimeRoot = await mkdtemp(path.join(os.tmpdir(), "boss-local-agent-dialog-flow-"));
|
||||
const skillsDir = path.join(runtimeRoot, "skills");
|
||||
await mkdir(skillsDir, { recursive: true });
|
||||
|
||||
const completeBodies = [];
|
||||
let claimCount = 0;
|
||||
const controlPlane = createServer(async (request, response) => {
|
||||
const url = request.url || "";
|
||||
if (request.method === "POST" && url === "/api/v1/master-agent/tasks/claim") {
|
||||
claimCount += 1;
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(
|
||||
JSON.stringify({
|
||||
ok: true,
|
||||
task:
|
||||
claimCount === 1
|
||||
? {
|
||||
taskId: "desktop-dialog-task",
|
||||
taskType: "desktop_control",
|
||||
projectId: "master-agent",
|
||||
requestText: "打开系统设置并处理权限弹窗",
|
||||
requestedByAccount: "krisolo",
|
||||
deviceId: "mac-studio",
|
||||
dispatchExecutionId: "dispatch-dialog-task",
|
||||
targetThreadId: "thread-dialog",
|
||||
requestedAt: "2026-05-11T10:00:00.000Z",
|
||||
riskLevel: "high",
|
||||
}
|
||||
: null,
|
||||
}),
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (
|
||||
request.method === "POST" &&
|
||||
url === "/api/v1/master-agent/tasks/desktop-dialog-task/complete"
|
||||
) {
|
||||
completeBodies.push(await readJsonBody(request));
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true }));
|
||||
return;
|
||||
}
|
||||
if (request.method === "POST" && url === "/api/device-heartbeat") {
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true, token: "server-token" }));
|
||||
return;
|
||||
}
|
||||
if (request.method === "POST" && url === "/api/v1/app-logs") {
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true }));
|
||||
return;
|
||||
}
|
||||
if (request.method === "POST" && url === "/api/v1/devices/mac-studio/skills") {
|
||||
response.writeHead(200, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: true }));
|
||||
return;
|
||||
}
|
||||
response.writeHead(404, { "content-type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: false, message: "not_found", url }));
|
||||
});
|
||||
|
||||
const controlPort = await listen(controlPlane);
|
||||
const agentServer = createServer();
|
||||
const agentPort = await listen(agentServer);
|
||||
await new Promise((resolve) => agentServer.close(resolve));
|
||||
|
||||
const configPath = path.join(runtimeRoot, "local-agent-config.json");
|
||||
await writeFile(
|
||||
configPath,
|
||||
JSON.stringify({
|
||||
port: agentPort,
|
||||
bindHost: "127.0.0.1",
|
||||
controlPlaneUrl: `http://127.0.0.1:${controlPort}`,
|
||||
deviceId: "mac-studio",
|
||||
token: "local-token",
|
||||
name: "Mac Studio",
|
||||
account: "krisolo",
|
||||
status: "online",
|
||||
codexSessionDiscoveryEnabled: false,
|
||||
skillsDir,
|
||||
masterAgentEnabled: true,
|
||||
masterAgentPollIntervalMs: 60_000,
|
||||
heartbeatIntervalMs: 60_000,
|
||||
skillLifecycleEnabled: false,
|
||||
computerUseEnabled: true,
|
||||
computerUseCommand: process.execPath,
|
||||
computerUseArgs: ["tests/fixtures/computer-use-dialog-runtime.mjs"],
|
||||
computerUseWorkdir: repoRoot,
|
||||
computerUseTimeoutMs: 5000,
|
||||
}),
|
||||
);
|
||||
|
||||
const child = spawn(process.execPath, ["local-agent/server.mjs", configPath], {
|
||||
cwd: repoRoot,
|
||||
env: process.env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
try {
|
||||
await waitFor(() => completeBodies.length > 0);
|
||||
const body = completeBodies.at(0);
|
||||
assert.equal(body.status, "needs_user_action");
|
||||
assert.equal(body.kind, "dialog_intervention_required");
|
||||
assert.equal(body.dialogId, "dialog-system-permission");
|
||||
assert.equal(body.appName, "System Settings");
|
||||
assert.equal(body.platform, "darwin");
|
||||
assert.equal(body.risk, "high");
|
||||
assert.equal(body.summary, "System Settings 弹窗需要用户确认。");
|
||||
assert.deepEqual(body.availableActions, ["handled_on_device", "cancel_task"]);
|
||||
assert.equal(body.dispatchExecutionId, "dispatch-dialog-task");
|
||||
assert.equal(body.targetThreadId, "thread-dialog");
|
||||
assert.equal(body.replyBody, undefined);
|
||||
} finally {
|
||||
child.kill("SIGTERM");
|
||||
controlPlane.close();
|
||||
await rm(runtimeRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
Reference in New Issue
Block a user