#!/usr/bin/env node import { spawn } from "node:child_process"; import { access } from "node:fs/promises"; import { fileURLToPath } from "node:url"; import path from "node:path"; const DEFAULT_CUA_TIMEOUT_MS = 45000; const TARGET_APPS = [ { label: "Google Chrome", name: "Google Chrome", bundleId: "com.google.Chrome", browser: true, aliases: ["chrome", "google chrome", "谷歌浏览器", "谷歌"], }, { label: "Safari", name: "Safari", bundleId: "com.apple.Safari", browser: true, aliases: ["safari"], }, { label: "QQ", name: "QQ", aliases: ["qq"], }, { label: "微信", name: "微信", aliases: ["微信", "wechat"], }, { label: "飞书", name: "飞书", aliases: ["飞书", "lark", "feishu"], }, { label: "Telegram", name: "Telegram", aliases: ["telegram", "tg"], }, { label: "Finder", name: "Finder", bundleId: "com.apple.finder", aliases: ["finder", "访达"], }, { label: "系统设置", name: "System Settings", bundleId: "com.apple.systempreferences", aliases: ["系统设置", "system settings", "settings"], }, { label: "终端", name: "Terminal", bundleId: "com.apple.Terminal", aliases: ["terminal", "终端"], }, { label: "Codex", name: "Codex", aliases: ["codex"], }, ]; function writeJson(payload) { process.stdout.write(`${JSON.stringify(payload)}\n`); } async function readStdin() { const chunks = []; for await (const chunk of process.stdin) { chunks.push(typeof chunk === "string" ? chunk : chunk.toString("utf8")); } return chunks.join("").trim(); } function parseJsonPayload(raw) { try { const parsed = JSON.parse(String(raw || "{}")); return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {}; } catch { return {}; } } function parseArgs(value) { return String(value || "") .trim() .split(/\s+/) .filter(Boolean); } function parseArgsJson(value) { const raw = String(value || "").trim(); if (!raw) return undefined; try { const parsed = JSON.parse(raw); return Array.isArray(parsed) ? parsed.map((item) => String(item)).filter(Boolean) : undefined; } catch { return undefined; } } function parseTimeoutMs(value) { const parsed = Number.parseInt(String(value || ""), 10); return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_CUA_TIMEOUT_MS; } function normalizeText(value) { return String(value || "").trim(); } function normalizePlatform(value) { const platform = normalizeText(value).toLowerCase(); return !platform || platform === "macos" || platform === "darwin" ? "macos" : platform; } function normalizeProvider(value) { const provider = normalizeText(value); return provider || "cua-driver-computer-use"; } export function detectCuaTargetApp(objective) { const text = normalizeText(objective).toLowerCase(); if (!text) return undefined; return TARGET_APPS.find((candidate) => candidate.aliases.some((alias) => text.includes(alias.toLowerCase())), ); } function extractTargetUrl(objective) { const text = normalizeText(objective); return text.match(/https?:\/\/[^\s,。;、))"”]+/i)?.[0]; } function extractQuotedText(objective) { const text = normalizeText(objective); const patterns = [ /[“"]([^“”"]+)[”"]/, /[「『]([^」』]+)[」』]/, /输入[::]\s*([^\n。;;]+)/, /打字[::]\s*([^\n。;;]+)/, ]; for (const pattern of patterns) { const match = text.match(pattern); const value = match?.[1]?.trim(); if (value) return value; } return undefined; } export function isSubmitLikeObjective(objective) { const text = normalizeText(objective).toLowerCase(); return [ "发送", "提交", "发出去", "回车发送", "删除", "购买", "下单", "支付", "转账", "send", "submit", "delete", "purchase", "pay", ].some((keyword) => text.includes(keyword)); } function isSubmitAllowed(env, payload) { if (String(env.BOSS_CUA_ALLOW_SUBMIT || "").trim().toLowerCase() === "true") { return true; } return payload?.context?.desktopActionConfirmed === true || payload?.desktopActionConfirmed === true; } export function buildCuaLaunchArgs(targetApp, objective) { if (!targetApp) return {}; const launchArgs = targetApp.bundleId ? { bundle_id: targetApp.bundleId } : { name: targetApp.name }; const url = extractTargetUrl(objective); if (targetApp.browser) { launchArgs.urls = [url || "about:blank"]; } else if (url) { launchArgs.urls = [url]; } return launchArgs; } function selectWindow(windows) { const candidates = Array.isArray(windows) ? windows : []; return ( candidates.find((window) => window?.is_on_screen === true && window?.on_current_space !== false) || candidates.find((window) => window?.on_current_space !== false) || candidates[0] ); } function getPid(launchResult) { const value = launchResult?.structured?.pid ?? launchResult?.structured?.process_id ?? launchResult?.pid; const parsed = Number(value); return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined; } function getWindowId(window) { const value = window?.window_id ?? window?.id; const parsed = Number(value); return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined; } function extractTextContent(parsed, raw) { if (Array.isArray(parsed?.content)) { return parsed.content .map((item) => (typeof item?.text === "string" ? item.text.trim() : "")) .filter(Boolean) .join("\n") .trim(); } if (typeof parsed?.text === "string") return parsed.text.trim(); if (typeof raw === "string") return raw.trim(); return ""; } function normalizeCuaToolOutput(rawOutput) { const raw = String(rawOutput || "").trim(); if (!raw) { return { raw: "", text: "", structured: {} }; } try { const parsed = JSON.parse(raw); const structured = parsed?.structuredContent && typeof parsed.structuredContent === "object" ? parsed.structuredContent : parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {}; return { raw, text: extractTextContent(parsed, raw), structured, isError: parsed?.isError === true, }; } catch { return { raw, text: raw, structured: {}, isError: false, }; } } function buildExecutableCandidates(command, env, cwd) { const normalizedCommand = normalizeText(command); if (!normalizedCommand) return []; if (normalizedCommand.includes("/") || path.isAbsolute(normalizedCommand)) { return [path.isAbsolute(normalizedCommand) ? normalizedCommand : path.resolve(cwd || process.cwd(), normalizedCommand)]; } const pathCandidates = String(env.PATH || "") .split(path.delimiter) .filter(Boolean) .map((item) => path.join(item, normalizedCommand)); const home = normalizeText(env.HOME); return [ ...pathCandidates, home ? path.join(home, ".local", "bin", normalizedCommand) : undefined, path.join("/usr/local/bin", normalizedCommand), path.join("/opt/homebrew/bin", normalizedCommand), normalizedCommand === "cua-driver" ? "/Applications/CuaDriver.app/Contents/MacOS/cua-driver" : undefined, ].filter(Boolean); } async function resolveExecutableCommand(command, env, cwd) { for (const candidate of buildExecutableCandidates(command, env, cwd)) { try { await access(candidate); return candidate; } catch { // Try the next well-known install location. } } throw new Error("CUA_DRIVER_COMMAND_NOT_FOUND"); } async function callCuaTool(toolName, args, options) { const env = options.env || process.env; const command = await resolveExecutableCommand( normalizeText(env.BOSS_CUA_DRIVER_COMMAND) || "cua-driver", env, options.cwd || process.cwd(), ); const prefixArgs = parseArgsJson(env.BOSS_CUA_DRIVER_ARGS_JSON) ?? parseArgs(env.BOSS_CUA_DRIVER_ARGS); const timeoutMs = parseTimeoutMs(env.BOSS_CUA_DRIVER_TIMEOUT_MS); const childArgs = [...prefixArgs, "call", toolName, JSON.stringify(args || {}), "--raw", "--compact"]; return new Promise((resolve, reject) => { const child = spawn(command, childArgs, { cwd: options.cwd || process.cwd(), env: { ...process.env, ...env, }, stdio: ["ignore", "pipe", "pipe"], }); let stdout = ""; let stderr = ""; let timedOut = false; const timer = setTimeout(() => { timedOut = true; child.kill("SIGKILL"); }, timeoutMs); child.stdout.setEncoding("utf8"); child.stderr.setEncoding("utf8"); child.stdout.on("data", (chunk) => { stdout += chunk; }); child.stderr.on("data", (chunk) => { stderr += chunk; }); child.on("error", (error) => { clearTimeout(timer); if (error?.code === "ENOENT") { reject(new Error("CUA_DRIVER_COMMAND_NOT_FOUND")); return; } reject(error); }); child.on("close", (code) => { clearTimeout(timer); if (timedOut) { reject(new Error("CUA_DRIVER_TIMEOUT")); return; } if (code !== 0) { const detail = stderr.trim() || stdout.trim() || `cua-driver exit code ${code}`; reject(new Error(`CUA_DRIVER_TOOL_FAILED: ${toolName}: ${detail}`)); return; } const result = normalizeCuaToolOutput(stdout); if (result.isError) { reject(new Error(`CUA_DRIVER_TOOL_ERROR: ${toolName}: ${result.text || result.raw}`)); return; } resolve(result); }); }); } function matchesTargetApp(app, targetApp) { const bundleId = normalizeText(app?.bundle_id).toLowerCase(); const name = normalizeText(app?.name).toLowerCase(); const targetBundleId = normalizeText(targetApp?.bundleId).toLowerCase(); const targetName = normalizeText(targetApp?.name).toLowerCase(); const targetLabel = normalizeText(targetApp?.label).toLowerCase(); if (targetBundleId && bundleId === targetBundleId) return true; if (targetName && name === targetName) return true; if (targetLabel && name === targetLabel) return true; return targetApp?.aliases?.some((alias) => name.includes(alias.toLowerCase())) === true; } function selectRunningApp(apps, targetApp) { const candidates = Array.isArray(apps) ? apps : []; return candidates.find((app) => app?.running === true && matchesTargetApp(app, targetApp)); } async function resolveTargetAppSession(targetApp, objective, options, toolTrace) { try { const launchResult = await callCuaTool("launch_app", buildCuaLaunchArgs(targetApp, objective), options); toolTrace.push("launch_app"); const pid = getPid(launchResult); return { pid, window: selectWindow(launchResult.structured?.windows), sourceText: launchResult.text || launchResult.raw, }; } catch (error) { toolTrace.push("launch_app_failed"); const appsResult = await callCuaTool("list_apps", {}, options); toolTrace.push("list_apps"); const runningApp = selectRunningApp(appsResult.structured?.apps, targetApp); const pid = getPid({ structured: runningApp }); if (!pid) { throw error; } const windowsResult = await callCuaTool("list_windows", { pid }, options); toolTrace.push("list_windows"); return { pid, window: selectWindow(windowsResult.structured?.windows), sourceText: windowsResult.text || appsResult.text || error?.message, }; } } function buildConfirmationResult(payload, targetApp) { return { status: "needs_user_action", requestId: normalizeText(payload.requestId) || undefined, kind: "desktop_submit_confirmation_required", risk: "high", summary: "这条指令会在桌面应用里发送、提交或删除内容,需要你先确认。", recommendedAction: "allow_once", availableActions: ["allow_once", "deny"], platform: "macos", appName: targetApp?.label || targetApp?.name, }; } function buildFailure(requestId, error, detail) { return { status: "failed", requestId: normalizeText(requestId) || undefined, error, detail: normalizeText(detail) || undefined, }; } export async function runCuaDriverComputerUseTask(payload, options = {}) { const env = options.env || process.env; const requestId = normalizeText(payload?.requestId); const objective = normalizeText(payload?.objective); const platform = normalizePlatform(payload?.platform || payload?.context?.controlPlatform); const provider = normalizeProvider(payload?.provider || payload?.context?.computerUseProvider); if (platform !== "macos") { return buildFailure(requestId, "UNSUPPORTED_CONTROL_PLATFORM"); } if (provider !== "cua-driver-computer-use") { return buildFailure(requestId, "UNSUPPORTED_COMPUTER_USE_PROVIDER"); } if (!objective) { return buildFailure(requestId, "CUA_OBJECTIVE_REQUIRED"); } const targetApp = detectCuaTargetApp(objective); if (!targetApp) { return buildFailure( requestId, "CUA_TARGET_APP_REQUIRED", "请在指令里明确要控制的 macOS 应用,例如 Chrome、Safari、QQ、微信、Finder 或系统设置。", ); } if (isSubmitLikeObjective(objective) && !isSubmitAllowed(env, payload)) { return buildConfirmationResult(payload, targetApp); } const toolTrace = []; try { const targetSession = await resolveTargetAppSession(targetApp, objective, { ...options, env, }, toolTrace); const pid = targetSession.pid; if (!pid) { return buildFailure(requestId, "CUA_TARGET_PID_NOT_FOUND", targetSession.sourceText); } let window = targetSession.window; if (!window) { const windowsResult = await callCuaTool("list_windows", { pid }, { ...options, env }); toolTrace.push("list_windows"); window = selectWindow(windowsResult.structured?.windows); } const windowId = getWindowId(window); if (!windowId) { return buildFailure(requestId, "CUA_TARGET_WINDOW_NOT_FOUND", targetSession.sourceText); } const beforeState = await callCuaTool("get_window_state", { pid, window_id: windowId }, { ...options, env }); toolTrace.push("get_window_state"); const typedText = extractQuotedText(objective); if (typedText) { await callCuaTool("type_text", { pid, window_id: windowId, text: typedText, delay_ms: 20 }, { ...options, env }); toolTrace.push("type_text"); if (isSubmitLikeObjective(objective) && isSubmitAllowed(env, payload)) { await callCuaTool("press_key", { pid, window_id: windowId, key: "return" }, { ...options, env }); toolTrace.push("press_key"); } await callCuaTool("get_window_state", { pid, window_id: windowId }, { ...options, env }); toolTrace.push("get_window_state"); } const observation = beforeState.text ? `窗口观测:${beforeState.text.split(/\r?\n/)[0]}` : "已完成窗口观测。"; const actionSummary = typedText ? `并已向目标应用写入 ${typedText.length} 个字符。` : "已打开并读取目标窗口。"; return { status: "completed", requestId: requestId || undefined, replyBody: `已通过 Cua Driver 接入 ${targetApp.label},${actionSummary}${observation}`, targetApp: targetApp.label, executionSummary: toolTrace.join(" -> "), }; } catch (error) { return buildFailure(requestId, error?.message || "CUA_DRIVER_EXECUTION_FAILED"); } } async function main() { const raw = await readStdin(); const payload = parseJsonPayload(raw); const result = await runCuaDriverComputerUseTask(payload, { env: process.env, cwd: process.cwd(), }); writeJson(result); } const currentFile = fileURLToPath(import.meta.url); if (process.argv[1] && path.resolve(process.argv[1]) === currentFile) { main().catch((error) => { writeJson({ status: "failed", error: error?.message || "CUA_DRIVER_RUNTIME_FAILED", }); }); }