Files
boss/scripts/cua-driver-computer-use-runtime.mjs
2026-05-17 02:20:08 +08:00

529 lines
16 KiB
JavaScript
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
import { spawn } from "node:child_process";
import { access } from "node:fs/promises";
import { fileURLToPath } from "node:url";
import path from "node:path";
const DEFAULT_CUA_TIMEOUT_MS = 45000;
const TARGET_APPS = [
{
label: "Google Chrome",
name: "Google Chrome",
bundleId: "com.google.Chrome",
browser: true,
aliases: ["chrome", "google chrome", "谷歌浏览器", "谷歌"],
},
{
label: "Safari",
name: "Safari",
bundleId: "com.apple.Safari",
browser: true,
aliases: ["safari"],
},
{
label: "QQ",
name: "QQ",
aliases: ["qq"],
},
{
label: "微信",
name: "微信",
aliases: ["微信", "wechat"],
},
{
label: "飞书",
name: "飞书",
aliases: ["飞书", "lark", "feishu"],
},
{
label: "Telegram",
name: "Telegram",
aliases: ["telegram", "tg"],
},
{
label: "Finder",
name: "Finder",
bundleId: "com.apple.finder",
aliases: ["finder", "访达"],
},
{
label: "系统设置",
name: "System Settings",
bundleId: "com.apple.systempreferences",
aliases: ["系统设置", "system settings", "settings"],
},
{
label: "终端",
name: "Terminal",
bundleId: "com.apple.Terminal",
aliases: ["terminal", "终端"],
},
{
label: "Codex",
name: "Codex",
aliases: ["codex"],
},
];
function writeJson(payload) {
process.stdout.write(`${JSON.stringify(payload)}\n`);
}
async function readStdin() {
const chunks = [];
for await (const chunk of process.stdin) {
chunks.push(typeof chunk === "string" ? chunk : chunk.toString("utf8"));
}
return chunks.join("").trim();
}
function parseJsonPayload(raw) {
try {
const parsed = JSON.parse(String(raw || "{}"));
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
} catch {
return {};
}
}
function parseArgs(value) {
return String(value || "")
.trim()
.split(/\s+/)
.filter(Boolean);
}
function parseArgsJson(value) {
const raw = String(value || "").trim();
if (!raw) return undefined;
try {
const parsed = JSON.parse(raw);
return Array.isArray(parsed) ? parsed.map((item) => String(item)).filter(Boolean) : undefined;
} catch {
return undefined;
}
}
function parseTimeoutMs(value) {
const parsed = Number.parseInt(String(value || ""), 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_CUA_TIMEOUT_MS;
}
function normalizeText(value) {
return String(value || "").trim();
}
function normalizePlatform(value) {
const platform = normalizeText(value).toLowerCase();
return !platform || platform === "macos" || platform === "darwin" ? "macos" : platform;
}
function normalizeProvider(value) {
const provider = normalizeText(value);
return provider || "cua-driver-computer-use";
}
export function detectCuaTargetApp(objective) {
const text = normalizeText(objective).toLowerCase();
if (!text) return undefined;
return TARGET_APPS.find((candidate) =>
candidate.aliases.some((alias) => text.includes(alias.toLowerCase())),
);
}
function extractTargetUrl(objective) {
const text = normalizeText(objective);
return text.match(/https?:\/\/[^\s、)"”]+/i)?.[0];
}
function extractQuotedText(objective) {
const text = normalizeText(objective);
const patterns = [
/[“"]([^“”"]+)[”"]/,
/[「『]([^」』]+)[」』]/,
/输入[:]\s*([^\n。;]+)/,
/打字[:]\s*([^\n。;]+)/,
];
for (const pattern of patterns) {
const match = text.match(pattern);
const value = match?.[1]?.trim();
if (value) return value;
}
return undefined;
}
export function isSubmitLikeObjective(objective) {
const text = normalizeText(objective).toLowerCase();
return [
"发送",
"提交",
"发出去",
"回车发送",
"删除",
"购买",
"下单",
"支付",
"转账",
"send",
"submit",
"delete",
"purchase",
"pay",
].some((keyword) => text.includes(keyword));
}
function isSubmitAllowed(env, payload) {
if (String(env.BOSS_CUA_ALLOW_SUBMIT || "").trim().toLowerCase() === "true") {
return true;
}
return payload?.context?.desktopActionConfirmed === true || payload?.desktopActionConfirmed === true;
}
export function buildCuaLaunchArgs(targetApp, objective) {
if (!targetApp) return {};
const launchArgs = targetApp.bundleId ? { bundle_id: targetApp.bundleId } : { name: targetApp.name };
const url = extractTargetUrl(objective);
if (targetApp.browser) {
launchArgs.urls = [url || "about:blank"];
} else if (url) {
launchArgs.urls = [url];
}
return launchArgs;
}
function selectWindow(windows) {
const candidates = Array.isArray(windows) ? windows : [];
return (
candidates.find((window) => window?.is_on_screen === true && window?.on_current_space !== false) ||
candidates.find((window) => window?.on_current_space !== false) ||
candidates[0]
);
}
function getPid(launchResult) {
const value = launchResult?.structured?.pid ?? launchResult?.structured?.process_id ?? launchResult?.pid;
const parsed = Number(value);
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
}
function getWindowId(window) {
const value = window?.window_id ?? window?.id;
const parsed = Number(value);
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
}
function extractTextContent(parsed, raw) {
if (Array.isArray(parsed?.content)) {
return parsed.content
.map((item) => (typeof item?.text === "string" ? item.text.trim() : ""))
.filter(Boolean)
.join("\n")
.trim();
}
if (typeof parsed?.text === "string") return parsed.text.trim();
if (typeof raw === "string") return raw.trim();
return "";
}
function normalizeCuaToolOutput(rawOutput) {
const raw = String(rawOutput || "").trim();
if (!raw) {
return { raw: "", text: "", structured: {} };
}
try {
const parsed = JSON.parse(raw);
const structured =
parsed?.structuredContent && typeof parsed.structuredContent === "object"
? parsed.structuredContent
: parsed && typeof parsed === "object" && !Array.isArray(parsed)
? parsed
: {};
return {
raw,
text: extractTextContent(parsed, raw),
structured,
isError: parsed?.isError === true,
};
} catch {
return {
raw,
text: raw,
structured: {},
isError: false,
};
}
}
function buildExecutableCandidates(command, env, cwd) {
const normalizedCommand = normalizeText(command);
if (!normalizedCommand) return [];
if (normalizedCommand.includes("/") || path.isAbsolute(normalizedCommand)) {
return [path.isAbsolute(normalizedCommand) ? normalizedCommand : path.resolve(cwd || process.cwd(), normalizedCommand)];
}
const pathCandidates = String(env.PATH || "")
.split(path.delimiter)
.filter(Boolean)
.map((item) => path.join(item, normalizedCommand));
const home = normalizeText(env.HOME);
return [
...pathCandidates,
home ? path.join(home, ".local", "bin", normalizedCommand) : undefined,
path.join("/usr/local/bin", normalizedCommand),
path.join("/opt/homebrew/bin", normalizedCommand),
normalizedCommand === "cua-driver" ? "/Applications/CuaDriver.app/Contents/MacOS/cua-driver" : undefined,
].filter(Boolean);
}
async function resolveExecutableCommand(command, env, cwd) {
for (const candidate of buildExecutableCandidates(command, env, cwd)) {
try {
await access(candidate);
return candidate;
} catch {
// Try the next well-known install location.
}
}
throw new Error("CUA_DRIVER_COMMAND_NOT_FOUND");
}
async function callCuaTool(toolName, args, options) {
const env = options.env || process.env;
const command = await resolveExecutableCommand(
normalizeText(env.BOSS_CUA_DRIVER_COMMAND) || "cua-driver",
env,
options.cwd || process.cwd(),
);
const prefixArgs = parseArgsJson(env.BOSS_CUA_DRIVER_ARGS_JSON) ?? parseArgs(env.BOSS_CUA_DRIVER_ARGS);
const timeoutMs = parseTimeoutMs(env.BOSS_CUA_DRIVER_TIMEOUT_MS);
const childArgs = [...prefixArgs, "call", toolName, JSON.stringify(args || {}), "--raw", "--compact"];
return new Promise((resolve, reject) => {
const child = spawn(command, childArgs, {
cwd: options.cwd || process.cwd(),
env: {
...process.env,
...env,
},
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
child.kill("SIGKILL");
}, timeoutMs);
child.stdout.setEncoding("utf8");
child.stderr.setEncoding("utf8");
child.stdout.on("data", (chunk) => {
stdout += chunk;
});
child.stderr.on("data", (chunk) => {
stderr += chunk;
});
child.on("error", (error) => {
clearTimeout(timer);
if (error?.code === "ENOENT") {
reject(new Error("CUA_DRIVER_COMMAND_NOT_FOUND"));
return;
}
reject(error);
});
child.on("close", (code) => {
clearTimeout(timer);
if (timedOut) {
reject(new Error("CUA_DRIVER_TIMEOUT"));
return;
}
if (code !== 0) {
const detail = stderr.trim() || stdout.trim() || `cua-driver exit code ${code}`;
reject(new Error(`CUA_DRIVER_TOOL_FAILED: ${toolName}: ${detail}`));
return;
}
const result = normalizeCuaToolOutput(stdout);
if (result.isError) {
reject(new Error(`CUA_DRIVER_TOOL_ERROR: ${toolName}: ${result.text || result.raw}`));
return;
}
resolve(result);
});
});
}
function matchesTargetApp(app, targetApp) {
const bundleId = normalizeText(app?.bundle_id).toLowerCase();
const name = normalizeText(app?.name).toLowerCase();
const targetBundleId = normalizeText(targetApp?.bundleId).toLowerCase();
const targetName = normalizeText(targetApp?.name).toLowerCase();
const targetLabel = normalizeText(targetApp?.label).toLowerCase();
if (targetBundleId && bundleId === targetBundleId) return true;
if (targetName && name === targetName) return true;
if (targetLabel && name === targetLabel) return true;
return targetApp?.aliases?.some((alias) => name.includes(alias.toLowerCase())) === true;
}
function selectRunningApp(apps, targetApp) {
const candidates = Array.isArray(apps) ? apps : [];
return candidates.find((app) => app?.running === true && matchesTargetApp(app, targetApp));
}
async function resolveTargetAppSession(targetApp, objective, options, toolTrace) {
try {
const launchResult = await callCuaTool("launch_app", buildCuaLaunchArgs(targetApp, objective), options);
toolTrace.push("launch_app");
const pid = getPid(launchResult);
return {
pid,
window: selectWindow(launchResult.structured?.windows),
sourceText: launchResult.text || launchResult.raw,
};
} catch (error) {
toolTrace.push("launch_app_failed");
const appsResult = await callCuaTool("list_apps", {}, options);
toolTrace.push("list_apps");
const runningApp = selectRunningApp(appsResult.structured?.apps, targetApp);
const pid = getPid({ structured: runningApp });
if (!pid) {
throw error;
}
const windowsResult = await callCuaTool("list_windows", { pid }, options);
toolTrace.push("list_windows");
return {
pid,
window: selectWindow(windowsResult.structured?.windows),
sourceText: windowsResult.text || appsResult.text || error?.message,
};
}
}
function buildConfirmationResult(payload, targetApp) {
return {
status: "needs_user_action",
requestId: normalizeText(payload.requestId) || undefined,
kind: "desktop_submit_confirmation_required",
risk: "high",
summary: "这条指令会在桌面应用里发送、提交或删除内容,需要你先确认。",
recommendedAction: "allow_once",
availableActions: ["allow_once", "deny"],
platform: "macos",
appName: targetApp?.label || targetApp?.name,
};
}
function buildFailure(requestId, error, detail) {
return {
status: "failed",
requestId: normalizeText(requestId) || undefined,
error,
detail: normalizeText(detail) || undefined,
};
}
export async function runCuaDriverComputerUseTask(payload, options = {}) {
const env = options.env || process.env;
const requestId = normalizeText(payload?.requestId);
const objective = normalizeText(payload?.objective);
const platform = normalizePlatform(payload?.platform || payload?.context?.controlPlatform);
const provider = normalizeProvider(payload?.provider || payload?.context?.computerUseProvider);
if (platform !== "macos") {
return buildFailure(requestId, "UNSUPPORTED_CONTROL_PLATFORM");
}
if (provider !== "cua-driver-computer-use") {
return buildFailure(requestId, "UNSUPPORTED_COMPUTER_USE_PROVIDER");
}
if (!objective) {
return buildFailure(requestId, "CUA_OBJECTIVE_REQUIRED");
}
const targetApp = detectCuaTargetApp(objective);
if (!targetApp) {
return buildFailure(
requestId,
"CUA_TARGET_APP_REQUIRED",
"请在指令里明确要控制的 macOS 应用,例如 Chrome、Safari、QQ、微信、Finder 或系统设置。",
);
}
if (isSubmitLikeObjective(objective) && !isSubmitAllowed(env, payload)) {
return buildConfirmationResult(payload, targetApp);
}
const toolTrace = [];
try {
const targetSession = await resolveTargetAppSession(targetApp, objective, {
...options,
env,
}, toolTrace);
const pid = targetSession.pid;
if (!pid) {
return buildFailure(requestId, "CUA_TARGET_PID_NOT_FOUND", targetSession.sourceText);
}
let window = targetSession.window;
if (!window) {
const windowsResult = await callCuaTool("list_windows", { pid }, { ...options, env });
toolTrace.push("list_windows");
window = selectWindow(windowsResult.structured?.windows);
}
const windowId = getWindowId(window);
if (!windowId) {
return buildFailure(requestId, "CUA_TARGET_WINDOW_NOT_FOUND", targetSession.sourceText);
}
const beforeState = await callCuaTool("get_window_state", { pid, window_id: windowId }, { ...options, env });
toolTrace.push("get_window_state");
const typedText = extractQuotedText(objective);
if (typedText) {
await callCuaTool("type_text", { pid, window_id: windowId, text: typedText, delay_ms: 20 }, { ...options, env });
toolTrace.push("type_text");
if (isSubmitLikeObjective(objective) && isSubmitAllowed(env, payload)) {
await callCuaTool("press_key", { pid, window_id: windowId, key: "return" }, { ...options, env });
toolTrace.push("press_key");
}
await callCuaTool("get_window_state", { pid, window_id: windowId }, { ...options, env });
toolTrace.push("get_window_state");
}
const observation = beforeState.text ? `窗口观测:${beforeState.text.split(/\r?\n/)[0]}` : "已完成窗口观测。";
const actionSummary = typedText ? `并已向目标应用写入 ${typedText.length} 个字符。` : "已打开并读取目标窗口。";
return {
status: "completed",
requestId: requestId || undefined,
replyBody: `已通过 Cua Driver 接入 ${targetApp.label}${actionSummary}${observation}`,
targetApp: targetApp.label,
executionSummary: toolTrace.join(" -> "),
};
} catch (error) {
return buildFailure(requestId, error?.message || "CUA_DRIVER_EXECUTION_FAILED");
}
}
async function main() {
const raw = await readStdin();
const payload = parseJsonPayload(raw);
const result = await runCuaDriverComputerUseTask(payload, {
env: process.env,
cwd: process.cwd(),
});
writeJson(result);
}
const currentFile = fileURLToPath(import.meta.url);
if (process.argv[1] && path.resolve(process.argv[1]) === currentFile) {
main().catch((error) => {
writeJson({
status: "failed",
error: error?.message || "CUA_DRIVER_RUNTIME_FAILED",
});
});
}