Files
boss/tests/cua-driver-computer-use-runtime.test.mjs
2026-05-17 02:20:08 +08:00

281 lines
8.6 KiB
JavaScript

import test from "node:test";
import assert from "node:assert/strict";
import { mkdtemp, writeFile, chmod, mkdir, symlink } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import {
buildCuaLaunchArgs,
detectCuaTargetApp,
isSubmitLikeObjective,
runCuaDriverComputerUseTask,
} from "../scripts/cua-driver-computer-use-runtime.mjs";
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
async function createFakeCuaDriver() {
const dir = await mkdtemp(path.join(os.tmpdir(), "boss-cua-driver-"));
const command = path.join(dir, "fake-cua-driver.mjs");
const logPath = path.join(dir, "calls.jsonl");
await writeFile(
command,
`#!/usr/bin/env node
import { appendFileSync } from "node:fs";
const logPath = process.env.FAKE_CUA_LOG_PATH;
const args = process.argv.slice(2);
const callIndex = args.indexOf("call");
const tool = callIndex >= 0 ? args[callIndex + 1] : args[0];
const rawJson = callIndex >= 0 ? args[callIndex + 2] : args[1];
let payload = {};
try { payload = rawJson ? JSON.parse(rawJson) : {}; } catch {}
appendFileSync(logPath, JSON.stringify({ tool, payload }) + "\\n");
if (tool === "launch_app") {
if (process.env.FAKE_CUA_FAIL_LAUNCH === "true") {
process.stdout.write(JSON.stringify({
content: [{ type: "text", text: "Launch failed: app not found" }],
isError: true,
}));
process.exit(1);
}
process.stdout.write(JSON.stringify({
structuredContent: {
pid: 2468,
name: payload.name || "Google Chrome",
bundle_id: payload.bundle_id || "com.google.Chrome",
windows: [
{ window_id: 1357, title: "Boss Cua Test", is_on_screen: true, on_current_space: true },
],
},
content: [{ type: "text", text: "✅ Launched app" }],
}));
process.exit(0);
}
if (tool === "list_apps") {
process.stdout.write(JSON.stringify({
structuredContent: {
apps: [
{ pid: 2468, name: "Google Chrome", bundle_id: "com.google.Chrome", running: true },
],
},
content: [{ type: "text", text: "✅ Found app" }],
}));
process.exit(0);
}
if (tool === "list_windows") {
process.stdout.write(JSON.stringify({
structuredContent: {
windows: [
{ window_id: 1357, title: "Boss Cua Test", is_on_screen: true, on_current_space: true },
],
},
content: [{ type: "text", text: "✅ Found window" }],
}));
process.exit(0);
}
if (tool === "get_window_state") {
process.stdout.write(JSON.stringify({
content: [{ type: "text", text: "✅ Google Chrome — 3 elements, turn 1 + screenshot\\n- [0] AXWindow Boss Cua Test" }],
structuredContent: { element_count: 3, has_screenshot: true },
}));
process.exit(0);
}
if (tool === "type_text") {
process.stdout.write(JSON.stringify({
content: [{ type: "text", text: "✅ Inserted text" }],
structuredContent: { ok: true },
}));
process.exit(0);
}
if (tool === "press_key") {
process.stdout.write(JSON.stringify({
content: [{ type: "text", text: "✅ Pressed key" }],
structuredContent: { ok: true },
}));
process.exit(0);
}
process.stderr.write("unknown tool " + tool);
process.exit(64);
`,
"utf8",
);
await chmod(command, 0o755);
return { command, logPath, cwd: dir };
}
test("cua runtime detects common macOS targets", () => {
assert.equal(detectCuaTargetApp("打开 Chrome 浏览器搜索测试")?.name, "Google Chrome");
assert.equal(detectCuaTargetApp("打开系统设置看屏幕录制")?.bundleId, "com.apple.systempreferences");
assert.equal(detectCuaTargetApp("打开 QQ 群 AI开发")?.name, "QQ");
});
test("cua runtime adds about:blank for browser launch when objective has no URL", () => {
const target = detectCuaTargetApp("打开 Safari");
const launchArgs = buildCuaLaunchArgs(target, "打开 Safari");
assert.equal(launchArgs.bundle_id, "com.apple.Safari");
assert.deepEqual(launchArgs.urls, ["about:blank"]);
});
test("cua runtime uses the current macOS system settings bundle id", () => {
const target = detectCuaTargetApp("打开系统设置");
const launchArgs = buildCuaLaunchArgs(target, "打开系统设置");
assert.equal(launchArgs.bundle_id, "com.apple.systempreferences");
});
test("cua runtime launches target app and observes window state through cua-driver", async () => {
const fake = await createFakeCuaDriver();
const result = await runCuaDriverComputerUseTask(
{
requestKind: "desktop_control",
requestId: "cua-task-1",
objective: "打开 Chrome 浏览器",
platform: "macos",
provider: "cua-driver-computer-use",
},
{
env: {
BOSS_CUA_DRIVER_COMMAND: fake.command,
BOSS_CUA_DRIVER_TIMEOUT_MS: "4000",
FAKE_CUA_LOG_PATH: fake.logPath,
},
cwd: repoRoot,
},
);
assert.equal(result.status, "completed");
assert.equal(result.requestId, "cua-task-1");
assert.equal(result.targetApp, "Google Chrome");
assert.match(result.replyBody, /已通过 Cua Driver/);
assert.match(result.executionSummary, /launch_app -> get_window_state/);
});
test("cua runtime falls back to running app windows when launch_app fails", async () => {
const fake = await createFakeCuaDriver();
const result = await runCuaDriverComputerUseTask(
{
requestKind: "desktop_control",
requestId: "cua-task-running-app",
objective: "打开 Chrome 浏览器",
platform: "macos",
provider: "cua-driver-computer-use",
},
{
env: {
BOSS_CUA_DRIVER_COMMAND: fake.command,
BOSS_CUA_DRIVER_TIMEOUT_MS: "4000",
FAKE_CUA_LOG_PATH: fake.logPath,
FAKE_CUA_FAIL_LAUNCH: "true",
},
cwd: repoRoot,
},
);
assert.equal(result.status, "completed");
assert.match(result.executionSummary, /launch_app_failed -> list_apps -> list_windows -> get_window_state/);
});
test("cua runtime can discover cua-driver from HOME local bin when PATH is empty", async () => {
const fake = await createFakeCuaDriver();
const localBin = path.join(fake.cwd, ".local", "bin");
await mkdir(localBin, { recursive: true });
await symlink(fake.command, path.join(localBin, "cua-driver"));
const result = await runCuaDriverComputerUseTask(
{
requestKind: "desktop_control",
requestId: "cua-task-path",
objective: "打开 Chrome 浏览器",
platform: "macos",
provider: "cua-driver-computer-use",
},
{
env: {
HOME: fake.cwd,
PATH: path.dirname(process.execPath),
BOSS_CUA_DRIVER_COMMAND: "cua-driver",
BOSS_CUA_DRIVER_TIMEOUT_MS: "4000",
FAKE_CUA_LOG_PATH: fake.logPath,
},
cwd: repoRoot,
},
);
assert.equal(result.status, "completed");
});
test("cua runtime types quoted text but does not submit without explicit submit allowance", async () => {
const fake = await createFakeCuaDriver();
const result = await runCuaDriverComputerUseTask(
{
requestKind: "desktop_control",
requestId: "cua-task-typing",
objective: "打开 QQ 输入“我是测试”",
platform: "macos",
provider: "cua-driver-computer-use",
},
{
env: {
BOSS_CUA_DRIVER_COMMAND: fake.command,
BOSS_CUA_DRIVER_TIMEOUT_MS: "4000",
FAKE_CUA_LOG_PATH: fake.logPath,
},
cwd: repoRoot,
},
);
assert.equal(result.status, "completed");
assert.match(result.executionSummary, /type_text/);
});
test("cua runtime asks for confirmation before send-like objectives", async () => {
assert.equal(isSubmitLikeObjective("在 QQ 里输入“你好”并发送"), true);
const fake = await createFakeCuaDriver();
const result = await runCuaDriverComputerUseTask(
{
requestKind: "desktop_control",
requestId: "cua-task-send",
objective: "在 QQ 里输入“你好”并发送",
platform: "macos",
provider: "cua-driver-computer-use",
},
{
env: {
BOSS_CUA_DRIVER_COMMAND: fake.command,
FAKE_CUA_LOG_PATH: fake.logPath,
},
cwd: repoRoot,
},
);
assert.equal(result.status, "needs_user_action");
assert.equal(result.kind, "desktop_submit_confirmation_required");
assert.deepEqual(result.availableActions, ["allow_once", "deny"]);
});
test("cua runtime rejects non-mac platforms", async () => {
const result = await runCuaDriverComputerUseTask(
{
requestId: "cua-task-windows",
objective: "打开系统设置",
platform: "windows",
provider: "cua-driver-computer-use",
},
{
env: {},
cwd: repoRoot,
},
);
assert.equal(result.status, "failed");
assert.equal(result.error, "UNSUPPORTED_CONTROL_PLATFORM");
});