529 lines
16 KiB
JavaScript
Executable File
529 lines
16 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
||
|
||
import { spawn } from "node:child_process";
|
||
import { access } from "node:fs/promises";
|
||
import { fileURLToPath } from "node:url";
|
||
import path from "node:path";
|
||
|
||
const DEFAULT_CUA_TIMEOUT_MS = 45000;
|
||
|
||
const TARGET_APPS = [
|
||
{
|
||
label: "Google Chrome",
|
||
name: "Google Chrome",
|
||
bundleId: "com.google.Chrome",
|
||
browser: true,
|
||
aliases: ["chrome", "google chrome", "谷歌浏览器", "谷歌"],
|
||
},
|
||
{
|
||
label: "Safari",
|
||
name: "Safari",
|
||
bundleId: "com.apple.Safari",
|
||
browser: true,
|
||
aliases: ["safari"],
|
||
},
|
||
{
|
||
label: "QQ",
|
||
name: "QQ",
|
||
aliases: ["qq"],
|
||
},
|
||
{
|
||
label: "微信",
|
||
name: "微信",
|
||
aliases: ["微信", "wechat"],
|
||
},
|
||
{
|
||
label: "飞书",
|
||
name: "飞书",
|
||
aliases: ["飞书", "lark", "feishu"],
|
||
},
|
||
{
|
||
label: "Telegram",
|
||
name: "Telegram",
|
||
aliases: ["telegram", "tg"],
|
||
},
|
||
{
|
||
label: "Finder",
|
||
name: "Finder",
|
||
bundleId: "com.apple.finder",
|
||
aliases: ["finder", "访达"],
|
||
},
|
||
{
|
||
label: "系统设置",
|
||
name: "System Settings",
|
||
bundleId: "com.apple.systempreferences",
|
||
aliases: ["系统设置", "system settings", "settings"],
|
||
},
|
||
{
|
||
label: "终端",
|
||
name: "Terminal",
|
||
bundleId: "com.apple.Terminal",
|
||
aliases: ["terminal", "终端"],
|
||
},
|
||
{
|
||
label: "Codex",
|
||
name: "Codex",
|
||
aliases: ["codex"],
|
||
},
|
||
];
|
||
|
||
function writeJson(payload) {
|
||
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
||
}
|
||
|
||
async function readStdin() {
|
||
const chunks = [];
|
||
for await (const chunk of process.stdin) {
|
||
chunks.push(typeof chunk === "string" ? chunk : chunk.toString("utf8"));
|
||
}
|
||
return chunks.join("").trim();
|
||
}
|
||
|
||
function parseJsonPayload(raw) {
|
||
try {
|
||
const parsed = JSON.parse(String(raw || "{}"));
|
||
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
|
||
} catch {
|
||
return {};
|
||
}
|
||
}
|
||
|
||
function parseArgs(value) {
|
||
return String(value || "")
|
||
.trim()
|
||
.split(/\s+/)
|
||
.filter(Boolean);
|
||
}
|
||
|
||
function parseArgsJson(value) {
|
||
const raw = String(value || "").trim();
|
||
if (!raw) return undefined;
|
||
try {
|
||
const parsed = JSON.parse(raw);
|
||
return Array.isArray(parsed) ? parsed.map((item) => String(item)).filter(Boolean) : undefined;
|
||
} catch {
|
||
return undefined;
|
||
}
|
||
}
|
||
|
||
function parseTimeoutMs(value) {
|
||
const parsed = Number.parseInt(String(value || ""), 10);
|
||
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_CUA_TIMEOUT_MS;
|
||
}
|
||
|
||
function normalizeText(value) {
|
||
return String(value || "").trim();
|
||
}
|
||
|
||
function normalizePlatform(value) {
|
||
const platform = normalizeText(value).toLowerCase();
|
||
return !platform || platform === "macos" || platform === "darwin" ? "macos" : platform;
|
||
}
|
||
|
||
function normalizeProvider(value) {
|
||
const provider = normalizeText(value);
|
||
return provider || "cua-driver-computer-use";
|
||
}
|
||
|
||
export function detectCuaTargetApp(objective) {
|
||
const text = normalizeText(objective).toLowerCase();
|
||
if (!text) return undefined;
|
||
return TARGET_APPS.find((candidate) =>
|
||
candidate.aliases.some((alias) => text.includes(alias.toLowerCase())),
|
||
);
|
||
}
|
||
|
||
function extractTargetUrl(objective) {
|
||
const text = normalizeText(objective);
|
||
return text.match(/https?:\/\/[^\s,。;、))"”]+/i)?.[0];
|
||
}
|
||
|
||
function extractQuotedText(objective) {
|
||
const text = normalizeText(objective);
|
||
const patterns = [
|
||
/[“"]([^“”"]+)[”"]/,
|
||
/[「『]([^」』]+)[」』]/,
|
||
/输入[::]\s*([^\n。;;]+)/,
|
||
/打字[::]\s*([^\n。;;]+)/,
|
||
];
|
||
for (const pattern of patterns) {
|
||
const match = text.match(pattern);
|
||
const value = match?.[1]?.trim();
|
||
if (value) return value;
|
||
}
|
||
return undefined;
|
||
}
|
||
|
||
export function isSubmitLikeObjective(objective) {
|
||
const text = normalizeText(objective).toLowerCase();
|
||
return [
|
||
"发送",
|
||
"提交",
|
||
"发出去",
|
||
"回车发送",
|
||
"删除",
|
||
"购买",
|
||
"下单",
|
||
"支付",
|
||
"转账",
|
||
"send",
|
||
"submit",
|
||
"delete",
|
||
"purchase",
|
||
"pay",
|
||
].some((keyword) => text.includes(keyword));
|
||
}
|
||
|
||
function isSubmitAllowed(env, payload) {
|
||
if (String(env.BOSS_CUA_ALLOW_SUBMIT || "").trim().toLowerCase() === "true") {
|
||
return true;
|
||
}
|
||
return payload?.context?.desktopActionConfirmed === true || payload?.desktopActionConfirmed === true;
|
||
}
|
||
|
||
export function buildCuaLaunchArgs(targetApp, objective) {
|
||
if (!targetApp) return {};
|
||
const launchArgs = targetApp.bundleId ? { bundle_id: targetApp.bundleId } : { name: targetApp.name };
|
||
const url = extractTargetUrl(objective);
|
||
if (targetApp.browser) {
|
||
launchArgs.urls = [url || "about:blank"];
|
||
} else if (url) {
|
||
launchArgs.urls = [url];
|
||
}
|
||
return launchArgs;
|
||
}
|
||
|
||
function selectWindow(windows) {
|
||
const candidates = Array.isArray(windows) ? windows : [];
|
||
return (
|
||
candidates.find((window) => window?.is_on_screen === true && window?.on_current_space !== false) ||
|
||
candidates.find((window) => window?.on_current_space !== false) ||
|
||
candidates[0]
|
||
);
|
||
}
|
||
|
||
function getPid(launchResult) {
|
||
const value = launchResult?.structured?.pid ?? launchResult?.structured?.process_id ?? launchResult?.pid;
|
||
const parsed = Number(value);
|
||
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
|
||
}
|
||
|
||
function getWindowId(window) {
|
||
const value = window?.window_id ?? window?.id;
|
||
const parsed = Number(value);
|
||
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
|
||
}
|
||
|
||
function extractTextContent(parsed, raw) {
|
||
if (Array.isArray(parsed?.content)) {
|
||
return parsed.content
|
||
.map((item) => (typeof item?.text === "string" ? item.text.trim() : ""))
|
||
.filter(Boolean)
|
||
.join("\n")
|
||
.trim();
|
||
}
|
||
if (typeof parsed?.text === "string") return parsed.text.trim();
|
||
if (typeof raw === "string") return raw.trim();
|
||
return "";
|
||
}
|
||
|
||
function normalizeCuaToolOutput(rawOutput) {
|
||
const raw = String(rawOutput || "").trim();
|
||
if (!raw) {
|
||
return { raw: "", text: "", structured: {} };
|
||
}
|
||
|
||
try {
|
||
const parsed = JSON.parse(raw);
|
||
const structured =
|
||
parsed?.structuredContent && typeof parsed.structuredContent === "object"
|
||
? parsed.structuredContent
|
||
: parsed && typeof parsed === "object" && !Array.isArray(parsed)
|
||
? parsed
|
||
: {};
|
||
return {
|
||
raw,
|
||
text: extractTextContent(parsed, raw),
|
||
structured,
|
||
isError: parsed?.isError === true,
|
||
};
|
||
} catch {
|
||
return {
|
||
raw,
|
||
text: raw,
|
||
structured: {},
|
||
isError: false,
|
||
};
|
||
}
|
||
}
|
||
|
||
function buildExecutableCandidates(command, env, cwd) {
|
||
const normalizedCommand = normalizeText(command);
|
||
if (!normalizedCommand) return [];
|
||
if (normalizedCommand.includes("/") || path.isAbsolute(normalizedCommand)) {
|
||
return [path.isAbsolute(normalizedCommand) ? normalizedCommand : path.resolve(cwd || process.cwd(), normalizedCommand)];
|
||
}
|
||
|
||
const pathCandidates = String(env.PATH || "")
|
||
.split(path.delimiter)
|
||
.filter(Boolean)
|
||
.map((item) => path.join(item, normalizedCommand));
|
||
const home = normalizeText(env.HOME);
|
||
return [
|
||
...pathCandidates,
|
||
home ? path.join(home, ".local", "bin", normalizedCommand) : undefined,
|
||
path.join("/usr/local/bin", normalizedCommand),
|
||
path.join("/opt/homebrew/bin", normalizedCommand),
|
||
normalizedCommand === "cua-driver" ? "/Applications/CuaDriver.app/Contents/MacOS/cua-driver" : undefined,
|
||
].filter(Boolean);
|
||
}
|
||
|
||
async function resolveExecutableCommand(command, env, cwd) {
|
||
for (const candidate of buildExecutableCandidates(command, env, cwd)) {
|
||
try {
|
||
await access(candidate);
|
||
return candidate;
|
||
} catch {
|
||
// Try the next well-known install location.
|
||
}
|
||
}
|
||
throw new Error("CUA_DRIVER_COMMAND_NOT_FOUND");
|
||
}
|
||
|
||
async function callCuaTool(toolName, args, options) {
|
||
const env = options.env || process.env;
|
||
const command = await resolveExecutableCommand(
|
||
normalizeText(env.BOSS_CUA_DRIVER_COMMAND) || "cua-driver",
|
||
env,
|
||
options.cwd || process.cwd(),
|
||
);
|
||
const prefixArgs = parseArgsJson(env.BOSS_CUA_DRIVER_ARGS_JSON) ?? parseArgs(env.BOSS_CUA_DRIVER_ARGS);
|
||
const timeoutMs = parseTimeoutMs(env.BOSS_CUA_DRIVER_TIMEOUT_MS);
|
||
const childArgs = [...prefixArgs, "call", toolName, JSON.stringify(args || {}), "--raw", "--compact"];
|
||
|
||
return new Promise((resolve, reject) => {
|
||
const child = spawn(command, childArgs, {
|
||
cwd: options.cwd || process.cwd(),
|
||
env: {
|
||
...process.env,
|
||
...env,
|
||
},
|
||
stdio: ["ignore", "pipe", "pipe"],
|
||
});
|
||
|
||
let stdout = "";
|
||
let stderr = "";
|
||
let timedOut = false;
|
||
const timer = setTimeout(() => {
|
||
timedOut = true;
|
||
child.kill("SIGKILL");
|
||
}, timeoutMs);
|
||
|
||
child.stdout.setEncoding("utf8");
|
||
child.stderr.setEncoding("utf8");
|
||
child.stdout.on("data", (chunk) => {
|
||
stdout += chunk;
|
||
});
|
||
child.stderr.on("data", (chunk) => {
|
||
stderr += chunk;
|
||
});
|
||
child.on("error", (error) => {
|
||
clearTimeout(timer);
|
||
if (error?.code === "ENOENT") {
|
||
reject(new Error("CUA_DRIVER_COMMAND_NOT_FOUND"));
|
||
return;
|
||
}
|
||
reject(error);
|
||
});
|
||
child.on("close", (code) => {
|
||
clearTimeout(timer);
|
||
if (timedOut) {
|
||
reject(new Error("CUA_DRIVER_TIMEOUT"));
|
||
return;
|
||
}
|
||
if (code !== 0) {
|
||
const detail = stderr.trim() || stdout.trim() || `cua-driver exit code ${code}`;
|
||
reject(new Error(`CUA_DRIVER_TOOL_FAILED: ${toolName}: ${detail}`));
|
||
return;
|
||
}
|
||
const result = normalizeCuaToolOutput(stdout);
|
||
if (result.isError) {
|
||
reject(new Error(`CUA_DRIVER_TOOL_ERROR: ${toolName}: ${result.text || result.raw}`));
|
||
return;
|
||
}
|
||
resolve(result);
|
||
});
|
||
});
|
||
}
|
||
|
||
function matchesTargetApp(app, targetApp) {
|
||
const bundleId = normalizeText(app?.bundle_id).toLowerCase();
|
||
const name = normalizeText(app?.name).toLowerCase();
|
||
const targetBundleId = normalizeText(targetApp?.bundleId).toLowerCase();
|
||
const targetName = normalizeText(targetApp?.name).toLowerCase();
|
||
const targetLabel = normalizeText(targetApp?.label).toLowerCase();
|
||
if (targetBundleId && bundleId === targetBundleId) return true;
|
||
if (targetName && name === targetName) return true;
|
||
if (targetLabel && name === targetLabel) return true;
|
||
return targetApp?.aliases?.some((alias) => name.includes(alias.toLowerCase())) === true;
|
||
}
|
||
|
||
function selectRunningApp(apps, targetApp) {
|
||
const candidates = Array.isArray(apps) ? apps : [];
|
||
return candidates.find((app) => app?.running === true && matchesTargetApp(app, targetApp));
|
||
}
|
||
|
||
async function resolveTargetAppSession(targetApp, objective, options, toolTrace) {
|
||
try {
|
||
const launchResult = await callCuaTool("launch_app", buildCuaLaunchArgs(targetApp, objective), options);
|
||
toolTrace.push("launch_app");
|
||
const pid = getPid(launchResult);
|
||
return {
|
||
pid,
|
||
window: selectWindow(launchResult.structured?.windows),
|
||
sourceText: launchResult.text || launchResult.raw,
|
||
};
|
||
} catch (error) {
|
||
toolTrace.push("launch_app_failed");
|
||
const appsResult = await callCuaTool("list_apps", {}, options);
|
||
toolTrace.push("list_apps");
|
||
const runningApp = selectRunningApp(appsResult.structured?.apps, targetApp);
|
||
const pid = getPid({ structured: runningApp });
|
||
if (!pid) {
|
||
throw error;
|
||
}
|
||
const windowsResult = await callCuaTool("list_windows", { pid }, options);
|
||
toolTrace.push("list_windows");
|
||
return {
|
||
pid,
|
||
window: selectWindow(windowsResult.structured?.windows),
|
||
sourceText: windowsResult.text || appsResult.text || error?.message,
|
||
};
|
||
}
|
||
}
|
||
|
||
function buildConfirmationResult(payload, targetApp) {
|
||
return {
|
||
status: "needs_user_action",
|
||
requestId: normalizeText(payload.requestId) || undefined,
|
||
kind: "desktop_submit_confirmation_required",
|
||
risk: "high",
|
||
summary: "这条指令会在桌面应用里发送、提交或删除内容,需要你先确认。",
|
||
recommendedAction: "allow_once",
|
||
availableActions: ["allow_once", "deny"],
|
||
platform: "macos",
|
||
appName: targetApp?.label || targetApp?.name,
|
||
};
|
||
}
|
||
|
||
function buildFailure(requestId, error, detail) {
|
||
return {
|
||
status: "failed",
|
||
requestId: normalizeText(requestId) || undefined,
|
||
error,
|
||
detail: normalizeText(detail) || undefined,
|
||
};
|
||
}
|
||
|
||
export async function runCuaDriverComputerUseTask(payload, options = {}) {
|
||
const env = options.env || process.env;
|
||
const requestId = normalizeText(payload?.requestId);
|
||
const objective = normalizeText(payload?.objective);
|
||
const platform = normalizePlatform(payload?.platform || payload?.context?.controlPlatform);
|
||
const provider = normalizeProvider(payload?.provider || payload?.context?.computerUseProvider);
|
||
|
||
if (platform !== "macos") {
|
||
return buildFailure(requestId, "UNSUPPORTED_CONTROL_PLATFORM");
|
||
}
|
||
if (provider !== "cua-driver-computer-use") {
|
||
return buildFailure(requestId, "UNSUPPORTED_COMPUTER_USE_PROVIDER");
|
||
}
|
||
if (!objective) {
|
||
return buildFailure(requestId, "CUA_OBJECTIVE_REQUIRED");
|
||
}
|
||
|
||
const targetApp = detectCuaTargetApp(objective);
|
||
if (!targetApp) {
|
||
return buildFailure(
|
||
requestId,
|
||
"CUA_TARGET_APP_REQUIRED",
|
||
"请在指令里明确要控制的 macOS 应用,例如 Chrome、Safari、QQ、微信、Finder 或系统设置。",
|
||
);
|
||
}
|
||
|
||
if (isSubmitLikeObjective(objective) && !isSubmitAllowed(env, payload)) {
|
||
return buildConfirmationResult(payload, targetApp);
|
||
}
|
||
|
||
const toolTrace = [];
|
||
try {
|
||
const targetSession = await resolveTargetAppSession(targetApp, objective, {
|
||
...options,
|
||
env,
|
||
}, toolTrace);
|
||
|
||
const pid = targetSession.pid;
|
||
if (!pid) {
|
||
return buildFailure(requestId, "CUA_TARGET_PID_NOT_FOUND", targetSession.sourceText);
|
||
}
|
||
|
||
let window = targetSession.window;
|
||
if (!window) {
|
||
const windowsResult = await callCuaTool("list_windows", { pid }, { ...options, env });
|
||
toolTrace.push("list_windows");
|
||
window = selectWindow(windowsResult.structured?.windows);
|
||
}
|
||
const windowId = getWindowId(window);
|
||
if (!windowId) {
|
||
return buildFailure(requestId, "CUA_TARGET_WINDOW_NOT_FOUND", targetSession.sourceText);
|
||
}
|
||
|
||
const beforeState = await callCuaTool("get_window_state", { pid, window_id: windowId }, { ...options, env });
|
||
toolTrace.push("get_window_state");
|
||
|
||
const typedText = extractQuotedText(objective);
|
||
if (typedText) {
|
||
await callCuaTool("type_text", { pid, window_id: windowId, text: typedText, delay_ms: 20 }, { ...options, env });
|
||
toolTrace.push("type_text");
|
||
if (isSubmitLikeObjective(objective) && isSubmitAllowed(env, payload)) {
|
||
await callCuaTool("press_key", { pid, window_id: windowId, key: "return" }, { ...options, env });
|
||
toolTrace.push("press_key");
|
||
}
|
||
await callCuaTool("get_window_state", { pid, window_id: windowId }, { ...options, env });
|
||
toolTrace.push("get_window_state");
|
||
}
|
||
|
||
const observation = beforeState.text ? `窗口观测:${beforeState.text.split(/\r?\n/)[0]}` : "已完成窗口观测。";
|
||
const actionSummary = typedText ? `并已向目标应用写入 ${typedText.length} 个字符。` : "已打开并读取目标窗口。";
|
||
return {
|
||
status: "completed",
|
||
requestId: requestId || undefined,
|
||
replyBody: `已通过 Cua Driver 接入 ${targetApp.label},${actionSummary}${observation}`,
|
||
targetApp: targetApp.label,
|
||
executionSummary: toolTrace.join(" -> "),
|
||
};
|
||
} catch (error) {
|
||
return buildFailure(requestId, error?.message || "CUA_DRIVER_EXECUTION_FAILED");
|
||
}
|
||
}
|
||
|
||
async function main() {
|
||
const raw = await readStdin();
|
||
const payload = parseJsonPayload(raw);
|
||
const result = await runCuaDriverComputerUseTask(payload, {
|
||
env: process.env,
|
||
cwd: process.cwd(),
|
||
});
|
||
writeJson(result);
|
||
}
|
||
|
||
const currentFile = fileURLToPath(import.meta.url);
|
||
if (process.argv[1] && path.resolve(process.argv[1]) === currentFile) {
|
||
main().catch((error) => {
|
||
writeJson({
|
||
status: "failed",
|
||
error: error?.message || "CUA_DRIVER_RUNTIME_FAILED",
|
||
});
|
||
});
|
||
}
|