Files
boss/tests/local-agent-computer-use-runner.test.mjs
2026-05-17 02:20:08 +08:00

300 lines
11 KiB
JavaScript

import test from "node:test";
import assert from "node:assert/strict";
import path from "node:path";
import { fileURLToPath } from "node:url";
import {
buildComputerUseTaskExecution,
canHandleComputerUseTask,
executeComputerUseTask,
getComputerUseTaskRunnerConfig,
parseComputerUseTaskResult,
} from "../local-agent/computer-use-task-runner.mjs";
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
test("computer use runner handles desktop_control tasks", async () => {
assert.equal(
canHandleComputerUseTask({
taskType: "desktop_control",
requestText: "打开系统设置",
}),
true,
);
});
test("computer use runner derives config from explicit values", () => {
const config = getComputerUseTaskRunnerConfig({}, {
computerUseEnabled: true,
computerUseCommand: "node",
computerUseArgs: ["tests/fixtures/computer-use-runtime.mjs"],
computerUseWorkdir: repoRoot,
computerUseTimeoutMs: 12000,
dialogGuardEnabled: true,
dialogGuardConsentRequired: true,
dialogGuardPlatformAdapters: ["darwin", "win32"],
dialogGuardMacActionCommand: "/usr/local/bin/boss-mac-dialog-helper",
dialogGuardMacActionArgs: ["click-dialog"],
cuaDriverCommand: "/usr/local/bin/cua-driver",
cuaDriverArgs: ["--no-relaunch"],
cuaDriverTimeoutMs: 9000,
dialogGuardWindowsActionCommand: "powershell.exe",
dialogGuardWindowsActionArgs: ["-File", "C:/Boss/dialog-helper.ps1"],
});
assert.equal(config.enabled, true);
assert.equal(config.command, "node");
assert.deepEqual(config.args, ["tests/fixtures/computer-use-runtime.mjs"]);
assert.equal(config.cwd, repoRoot);
assert.equal(config.timeoutMs, 12000);
assert.equal(config.dialogGuardEnabled, true);
assert.equal(config.dialogGuardConsentRequired, true);
assert.deepEqual(config.dialogGuardPlatformAdapters, ["darwin"]);
assert.equal(config.dialogGuardMacActionCommand, "/usr/local/bin/boss-mac-dialog-helper");
assert.deepEqual(config.dialogGuardMacActionArgs, ["click-dialog"]);
assert.equal(config.cuaDriverCommand, "/usr/local/bin/cua-driver");
assert.deepEqual(config.cuaDriverArgs, ["--no-relaunch"]);
assert.equal(config.cuaDriverTimeoutMs, 9000);
});
test("computer use runner builds normalized stdin payload", () => {
const execution = buildComputerUseTaskExecution(
{
enabled: true,
command: "node",
args: ["tests/fixtures/computer-use-runtime.mjs"],
cwd: repoRoot,
timeoutMs: 3000,
},
{
taskId: "desktop-task-1",
taskType: "desktop_control",
requestText: "打开系统设置",
projectId: "boss-console",
threadId: "thread-desktop",
requestedByAccount: "17600001111",
confirmationScopeKey: "thread:desktop",
riskLevel: "high",
controlPlatform: "macos",
computerUseProvider: "cua-driver-computer-use",
},
);
assert.equal(execution.command, "node");
assert.equal(execution.cwd, repoRoot);
assert.equal(execution.timeoutMs, 3000);
assert.equal(execution.stdinPayload.requestKind, "desktop_control");
assert.equal(execution.stdinPayload.requestId, "desktop-task-1");
assert.equal(execution.stdinPayload.objective, "打开系统设置");
assert.equal(execution.stdinPayload.platform, "macos");
assert.equal(execution.stdinPayload.provider, "cua-driver-computer-use");
assert.equal(execution.stdinPayload.context.projectId, "boss-console");
assert.equal(execution.stdinPayload.context.threadId, "thread-desktop");
assert.equal(execution.stdinPayload.context.confirmationScopeKey, "thread:desktop");
assert.equal(execution.stdinPayload.context.riskLevel, "high");
assert.equal(execution.stdinPayload.context.controlPlatform, "macos");
assert.equal(execution.stdinPayload.context.computerUseProvider, "cua-driver-computer-use");
});
test("computer use runner rejects non-mac control platforms", () => {
assert.throws(
() =>
buildComputerUseTaskExecution(
{
enabled: true,
command: "node",
args: ["tests/fixtures/computer-use-runtime.mjs"],
cwd: repoRoot,
timeoutMs: 3000,
},
{
taskId: "desktop-task-windows",
taskType: "desktop_control",
requestText: "打开系统设置",
controlPlatform: "windows",
},
),
/UNSUPPORTED_CONTROL_PLATFORM/,
);
});
test("computer use runner passes mac-only dialog guard config to runtime env", () => {
const execution = buildComputerUseTaskExecution(
{
enabled: true,
command: "node",
args: ["tests/fixtures/computer-use-runtime.mjs"],
cwd: repoRoot,
timeoutMs: 3000,
dialogGuardEnabled: true,
dialogGuardConsentRequired: true,
dialogGuardPlatformAdapters: ["darwin", "win32"],
dialogGuardMacActionCommand: "/usr/local/bin/boss-mac-dialog-helper",
dialogGuardMacActionArgs: ["click-dialog"],
cuaDriverCommand: "/usr/local/bin/cua-driver",
cuaDriverArgs: ["--no-relaunch"],
cuaDriverTimeoutMs: 9000,
dialogGuardWindowsActionCommand: "powershell.exe",
dialogGuardWindowsActionArgs: ["-File", "C:/Boss/dialog-helper.ps1"],
},
{
taskId: "desktop-dialog-env",
taskType: "desktop_control",
requestText: "打开 QQ",
controlPlatform: "macos",
computerUseProvider: "cua-driver-computer-use",
},
);
assert.equal(execution.env.BOSS_DIALOG_GUARD_ENABLED, "true");
assert.equal(execution.env.BOSS_DIALOG_GUARD_CONSENT_REQUIRED, "true");
assert.equal(execution.env.BOSS_DIALOG_GUARD_PLATFORM_ADAPTERS, "darwin");
assert.equal(execution.env.BOSS_MAC_DIALOG_GUARD_ACTION_COMMAND, "/usr/local/bin/boss-mac-dialog-helper");
assert.equal(execution.env.BOSS_MAC_DIALOG_GUARD_ACTION_ARGS_JSON, JSON.stringify(["click-dialog"]));
assert.equal(execution.env.BOSS_CUA_DRIVER_COMMAND, "/usr/local/bin/cua-driver");
assert.equal(execution.env.BOSS_CUA_DRIVER_ARGS_JSON, JSON.stringify(["--no-relaunch"]));
assert.equal(execution.env.BOSS_CUA_DRIVER_TIMEOUT_MS, "9000");
assert.equal(execution.env.BOSS_CONTROL_PLATFORM, "macos");
assert.equal(execution.env.BOSS_COMPUTER_USE_PROVIDER, "cua-driver-computer-use");
assert.equal(execution.env.BOSS_WINDOWS_DIALOG_GUARD_ACTION_COMMAND, undefined);
assert.equal(execution.env.BOSS_WINDOWS_DIALOG_GUARD_ACTION_ARGS_JSON, undefined);
});
test("computer use runner parses completed runtime payload", () => {
const result = parseComputerUseTaskResult(
'{"status":"completed","replyBody":"已打开系统设置","executionSummary":"desktop ok"}',
);
assert.equal(result.status, "completed");
assert.equal(result.replyBody, "已打开系统设置");
assert.equal(result.executionSummary, "desktop ok");
});
test("computer use runner parses failed runtime payload", () => {
const result = parseComputerUseTaskResult('{"status":"failed","error":"COMPUTER_USE_DENIED"}');
assert.equal(result.status, "failed");
assert.equal(result.errorMessage, "COMPUTER_USE_DENIED");
});
test("computer use runner parses dialog intervention runtime payload", () => {
const result = parseComputerUseTaskResult(
JSON.stringify({
status: "needs_user_action",
requestId: "desktop-task-dialog",
kind: "dialog_intervention_required",
dialogId: "dialog-1",
risk: "medium",
summary: "QQ 弹窗需要确认",
recommendedAction: "review",
availableActions: ["allow_once", "deny"],
platform: "darwin",
appName: "QQ",
}),
);
assert.equal(result.status, "needs_user_action");
assert.equal(result.requestId, "desktop-task-dialog");
assert.equal(result.kind, "dialog_intervention_required");
assert.equal(result.dialogId, "dialog-1");
assert.equal(result.risk, "medium");
assert.equal(result.summary, "QQ 弹窗需要确认");
assert.deepEqual(result.availableActions, ["allow_once", "deny"]);
});
test("computer use runner executes configured runtime command", async () => {
const result = await executeComputerUseTask(
{
taskId: "desktop-task-exec",
taskType: "desktop_control",
requestText: "打开飞书",
projectId: "boss-console",
threadId: "thread-desktop",
requestedByAccount: "17600002222",
},
{
computerUseEnabled: true,
computerUseCommand: process.execPath,
computerUseArgs: ["tests/fixtures/computer-use-runtime.mjs"],
computerUseWorkdir: repoRoot,
computerUseTimeoutMs: 4000,
},
);
assert.equal(result.status, "completed");
assert.match(result.replyBody ?? "", /桌面运行时已执行/);
assert.match(result.replyBody ?? "", /打开飞书/);
});
test("computer use runner prefers Codex Computer Use and falls back to CUA runtime when Codex fails", async () => {
const previous = process.env.BOSS_CODEX_COMPUTER_USE_FIXTURE_STATUS;
process.env.BOSS_CODEX_COMPUTER_USE_FIXTURE_STATUS = "failed";
try {
const result = await executeComputerUseTask(
{
taskId: "desktop-task-codex-fallback",
taskType: "desktop_control",
requestText: "打开系统设置",
controlPlatform: "macos",
computerUseProvider: "codex-computer-use",
},
{
codexComputerUseEnabled: true,
codexComputerUseCommand: process.execPath,
codexComputerUseArgs: ["tests/fixtures/codex-computer-use-runtime.mjs"],
codexComputerUseWorkdir: repoRoot,
codexComputerUseTimeoutMs: 4000,
codexComputerUseFallbackToCua: true,
computerUseEnabled: true,
computerUseCommand: process.execPath,
computerUseArgs: ["tests/fixtures/computer-use-runtime.mjs"],
computerUseWorkdir: repoRoot,
computerUseTimeoutMs: 4000,
},
);
assert.equal(result.status, "completed");
assert.equal(result.computerUseProvider, "cua-driver-computer-use");
assert.match(result.replyBody ?? "", /桌面运行时已执行/);
} finally {
if (previous === undefined) {
delete process.env.BOSS_CODEX_COMPUTER_USE_FIXTURE_STATUS;
} else {
process.env.BOSS_CODEX_COMPUTER_USE_FIXTURE_STATUS = previous;
}
}
});
test("computer use runner uses Codex Computer Use without CUA when Codex completes", async () => {
const result = await executeComputerUseTask(
{
taskId: "desktop-task-codex-primary",
taskType: "desktop_control",
requestText: "打开系统设置",
controlPlatform: "macos",
computerUseProvider: "codex-computer-use",
},
{
codexComputerUseEnabled: true,
codexComputerUseCommand: process.execPath,
codexComputerUseArgs: ["tests/fixtures/codex-computer-use-runtime.mjs"],
codexComputerUseWorkdir: repoRoot,
codexComputerUseTimeoutMs: 4000,
computerUseEnabled: false,
},
);
assert.equal(result.status, "completed");
assert.equal(result.computerUseProvider, "codex-computer-use");
assert.match(result.replyBody ?? "", /Codex Computer Use 已执行/);
});
test("computer use runner reports disabled runtime instead of pretending desktop work completed", async () => {
const result = await executeComputerUseTask({
taskId: "task-desktop-control",
requestText: "打开系统设置",
}, {});
assert.equal(result.status, "failed");
assert.equal(result.errorMessage, "COMPUTER_USE_RUNTIME_DISABLED");
});