feat: harden enterprise control plane
This commit is contained in:
528
scripts/cua-driver-computer-use-runtime.mjs
Executable file
528
scripts/cua-driver-computer-use-runtime.mjs
Executable file
@@ -0,0 +1,528 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { access } from "node:fs/promises";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import path from "node:path";
|
||||
|
||||
const DEFAULT_CUA_TIMEOUT_MS = 45000;
|
||||
|
||||
const TARGET_APPS = [
|
||||
{
|
||||
label: "Google Chrome",
|
||||
name: "Google Chrome",
|
||||
bundleId: "com.google.Chrome",
|
||||
browser: true,
|
||||
aliases: ["chrome", "google chrome", "谷歌浏览器", "谷歌"],
|
||||
},
|
||||
{
|
||||
label: "Safari",
|
||||
name: "Safari",
|
||||
bundleId: "com.apple.Safari",
|
||||
browser: true,
|
||||
aliases: ["safari"],
|
||||
},
|
||||
{
|
||||
label: "QQ",
|
||||
name: "QQ",
|
||||
aliases: ["qq"],
|
||||
},
|
||||
{
|
||||
label: "微信",
|
||||
name: "微信",
|
||||
aliases: ["微信", "wechat"],
|
||||
},
|
||||
{
|
||||
label: "飞书",
|
||||
name: "飞书",
|
||||
aliases: ["飞书", "lark", "feishu"],
|
||||
},
|
||||
{
|
||||
label: "Telegram",
|
||||
name: "Telegram",
|
||||
aliases: ["telegram", "tg"],
|
||||
},
|
||||
{
|
||||
label: "Finder",
|
||||
name: "Finder",
|
||||
bundleId: "com.apple.finder",
|
||||
aliases: ["finder", "访达"],
|
||||
},
|
||||
{
|
||||
label: "系统设置",
|
||||
name: "System Settings",
|
||||
bundleId: "com.apple.systempreferences",
|
||||
aliases: ["系统设置", "system settings", "settings"],
|
||||
},
|
||||
{
|
||||
label: "终端",
|
||||
name: "Terminal",
|
||||
bundleId: "com.apple.Terminal",
|
||||
aliases: ["terminal", "终端"],
|
||||
},
|
||||
{
|
||||
label: "Codex",
|
||||
name: "Codex",
|
||||
aliases: ["codex"],
|
||||
},
|
||||
];
|
||||
|
||||
function writeJson(payload) {
|
||||
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
||||
}
|
||||
|
||||
async function readStdin() {
|
||||
const chunks = [];
|
||||
for await (const chunk of process.stdin) {
|
||||
chunks.push(typeof chunk === "string" ? chunk : chunk.toString("utf8"));
|
||||
}
|
||||
return chunks.join("").trim();
|
||||
}
|
||||
|
||||
function parseJsonPayload(raw) {
|
||||
try {
|
||||
const parsed = JSON.parse(String(raw || "{}"));
|
||||
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function parseArgs(value) {
|
||||
return String(value || "")
|
||||
.trim()
|
||||
.split(/\s+/)
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function parseArgsJson(value) {
|
||||
const raw = String(value || "").trim();
|
||||
if (!raw) return undefined;
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
return Array.isArray(parsed) ? parsed.map((item) => String(item)).filter(Boolean) : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function parseTimeoutMs(value) {
|
||||
const parsed = Number.parseInt(String(value || ""), 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_CUA_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
function normalizeText(value) {
|
||||
return String(value || "").trim();
|
||||
}
|
||||
|
||||
function normalizePlatform(value) {
|
||||
const platform = normalizeText(value).toLowerCase();
|
||||
return !platform || platform === "macos" || platform === "darwin" ? "macos" : platform;
|
||||
}
|
||||
|
||||
function normalizeProvider(value) {
|
||||
const provider = normalizeText(value);
|
||||
return provider || "cua-driver-computer-use";
|
||||
}
|
||||
|
||||
export function detectCuaTargetApp(objective) {
|
||||
const text = normalizeText(objective).toLowerCase();
|
||||
if (!text) return undefined;
|
||||
return TARGET_APPS.find((candidate) =>
|
||||
candidate.aliases.some((alias) => text.includes(alias.toLowerCase())),
|
||||
);
|
||||
}
|
||||
|
||||
function extractTargetUrl(objective) {
|
||||
const text = normalizeText(objective);
|
||||
return text.match(/https?:\/\/[^\s,。;、))"”]+/i)?.[0];
|
||||
}
|
||||
|
||||
function extractQuotedText(objective) {
|
||||
const text = normalizeText(objective);
|
||||
const patterns = [
|
||||
/[“"]([^“”"]+)[”"]/,
|
||||
/[「『]([^」』]+)[」』]/,
|
||||
/输入[::]\s*([^\n。;;]+)/,
|
||||
/打字[::]\s*([^\n。;;]+)/,
|
||||
];
|
||||
for (const pattern of patterns) {
|
||||
const match = text.match(pattern);
|
||||
const value = match?.[1]?.trim();
|
||||
if (value) return value;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function isSubmitLikeObjective(objective) {
|
||||
const text = normalizeText(objective).toLowerCase();
|
||||
return [
|
||||
"发送",
|
||||
"提交",
|
||||
"发出去",
|
||||
"回车发送",
|
||||
"删除",
|
||||
"购买",
|
||||
"下单",
|
||||
"支付",
|
||||
"转账",
|
||||
"send",
|
||||
"submit",
|
||||
"delete",
|
||||
"purchase",
|
||||
"pay",
|
||||
].some((keyword) => text.includes(keyword));
|
||||
}
|
||||
|
||||
function isSubmitAllowed(env, payload) {
|
||||
if (String(env.BOSS_CUA_ALLOW_SUBMIT || "").trim().toLowerCase() === "true") {
|
||||
return true;
|
||||
}
|
||||
return payload?.context?.desktopActionConfirmed === true || payload?.desktopActionConfirmed === true;
|
||||
}
|
||||
|
||||
export function buildCuaLaunchArgs(targetApp, objective) {
|
||||
if (!targetApp) return {};
|
||||
const launchArgs = targetApp.bundleId ? { bundle_id: targetApp.bundleId } : { name: targetApp.name };
|
||||
const url = extractTargetUrl(objective);
|
||||
if (targetApp.browser) {
|
||||
launchArgs.urls = [url || "about:blank"];
|
||||
} else if (url) {
|
||||
launchArgs.urls = [url];
|
||||
}
|
||||
return launchArgs;
|
||||
}
|
||||
|
||||
function selectWindow(windows) {
|
||||
const candidates = Array.isArray(windows) ? windows : [];
|
||||
return (
|
||||
candidates.find((window) => window?.is_on_screen === true && window?.on_current_space !== false) ||
|
||||
candidates.find((window) => window?.on_current_space !== false) ||
|
||||
candidates[0]
|
||||
);
|
||||
}
|
||||
|
||||
function getPid(launchResult) {
|
||||
const value = launchResult?.structured?.pid ?? launchResult?.structured?.process_id ?? launchResult?.pid;
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
|
||||
}
|
||||
|
||||
function getWindowId(window) {
|
||||
const value = window?.window_id ?? window?.id;
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
|
||||
}
|
||||
|
||||
function extractTextContent(parsed, raw) {
|
||||
if (Array.isArray(parsed?.content)) {
|
||||
return parsed.content
|
||||
.map((item) => (typeof item?.text === "string" ? item.text.trim() : ""))
|
||||
.filter(Boolean)
|
||||
.join("\n")
|
||||
.trim();
|
||||
}
|
||||
if (typeof parsed?.text === "string") return parsed.text.trim();
|
||||
if (typeof raw === "string") return raw.trim();
|
||||
return "";
|
||||
}
|
||||
|
||||
function normalizeCuaToolOutput(rawOutput) {
|
||||
const raw = String(rawOutput || "").trim();
|
||||
if (!raw) {
|
||||
return { raw: "", text: "", structured: {} };
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
const structured =
|
||||
parsed?.structuredContent && typeof parsed.structuredContent === "object"
|
||||
? parsed.structuredContent
|
||||
: parsed && typeof parsed === "object" && !Array.isArray(parsed)
|
||||
? parsed
|
||||
: {};
|
||||
return {
|
||||
raw,
|
||||
text: extractTextContent(parsed, raw),
|
||||
structured,
|
||||
isError: parsed?.isError === true,
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
raw,
|
||||
text: raw,
|
||||
structured: {},
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function buildExecutableCandidates(command, env, cwd) {
|
||||
const normalizedCommand = normalizeText(command);
|
||||
if (!normalizedCommand) return [];
|
||||
if (normalizedCommand.includes("/") || path.isAbsolute(normalizedCommand)) {
|
||||
return [path.isAbsolute(normalizedCommand) ? normalizedCommand : path.resolve(cwd || process.cwd(), normalizedCommand)];
|
||||
}
|
||||
|
||||
const pathCandidates = String(env.PATH || "")
|
||||
.split(path.delimiter)
|
||||
.filter(Boolean)
|
||||
.map((item) => path.join(item, normalizedCommand));
|
||||
const home = normalizeText(env.HOME);
|
||||
return [
|
||||
...pathCandidates,
|
||||
home ? path.join(home, ".local", "bin", normalizedCommand) : undefined,
|
||||
path.join("/usr/local/bin", normalizedCommand),
|
||||
path.join("/opt/homebrew/bin", normalizedCommand),
|
||||
normalizedCommand === "cua-driver" ? "/Applications/CuaDriver.app/Contents/MacOS/cua-driver" : undefined,
|
||||
].filter(Boolean);
|
||||
}
|
||||
|
||||
async function resolveExecutableCommand(command, env, cwd) {
|
||||
for (const candidate of buildExecutableCandidates(command, env, cwd)) {
|
||||
try {
|
||||
await access(candidate);
|
||||
return candidate;
|
||||
} catch {
|
||||
// Try the next well-known install location.
|
||||
}
|
||||
}
|
||||
throw new Error("CUA_DRIVER_COMMAND_NOT_FOUND");
|
||||
}
|
||||
|
||||
async function callCuaTool(toolName, args, options) {
|
||||
const env = options.env || process.env;
|
||||
const command = await resolveExecutableCommand(
|
||||
normalizeText(env.BOSS_CUA_DRIVER_COMMAND) || "cua-driver",
|
||||
env,
|
||||
options.cwd || process.cwd(),
|
||||
);
|
||||
const prefixArgs = parseArgsJson(env.BOSS_CUA_DRIVER_ARGS_JSON) ?? parseArgs(env.BOSS_CUA_DRIVER_ARGS);
|
||||
const timeoutMs = parseTimeoutMs(env.BOSS_CUA_DRIVER_TIMEOUT_MS);
|
||||
const childArgs = [...prefixArgs, "call", toolName, JSON.stringify(args || {}), "--raw", "--compact"];
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = spawn(command, childArgs, {
|
||||
cwd: options.cwd || process.cwd(),
|
||||
env: {
|
||||
...process.env,
|
||||
...env,
|
||||
},
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let timedOut = false;
|
||||
const timer = setTimeout(() => {
|
||||
timedOut = true;
|
||||
child.kill("SIGKILL");
|
||||
}, timeoutMs);
|
||||
|
||||
child.stdout.setEncoding("utf8");
|
||||
child.stderr.setEncoding("utf8");
|
||||
child.stdout.on("data", (chunk) => {
|
||||
stdout += chunk;
|
||||
});
|
||||
child.stderr.on("data", (chunk) => {
|
||||
stderr += chunk;
|
||||
});
|
||||
child.on("error", (error) => {
|
||||
clearTimeout(timer);
|
||||
if (error?.code === "ENOENT") {
|
||||
reject(new Error("CUA_DRIVER_COMMAND_NOT_FOUND"));
|
||||
return;
|
||||
}
|
||||
reject(error);
|
||||
});
|
||||
child.on("close", (code) => {
|
||||
clearTimeout(timer);
|
||||
if (timedOut) {
|
||||
reject(new Error("CUA_DRIVER_TIMEOUT"));
|
||||
return;
|
||||
}
|
||||
if (code !== 0) {
|
||||
const detail = stderr.trim() || stdout.trim() || `cua-driver exit code ${code}`;
|
||||
reject(new Error(`CUA_DRIVER_TOOL_FAILED: ${toolName}: ${detail}`));
|
||||
return;
|
||||
}
|
||||
const result = normalizeCuaToolOutput(stdout);
|
||||
if (result.isError) {
|
||||
reject(new Error(`CUA_DRIVER_TOOL_ERROR: ${toolName}: ${result.text || result.raw}`));
|
||||
return;
|
||||
}
|
||||
resolve(result);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function matchesTargetApp(app, targetApp) {
|
||||
const bundleId = normalizeText(app?.bundle_id).toLowerCase();
|
||||
const name = normalizeText(app?.name).toLowerCase();
|
||||
const targetBundleId = normalizeText(targetApp?.bundleId).toLowerCase();
|
||||
const targetName = normalizeText(targetApp?.name).toLowerCase();
|
||||
const targetLabel = normalizeText(targetApp?.label).toLowerCase();
|
||||
if (targetBundleId && bundleId === targetBundleId) return true;
|
||||
if (targetName && name === targetName) return true;
|
||||
if (targetLabel && name === targetLabel) return true;
|
||||
return targetApp?.aliases?.some((alias) => name.includes(alias.toLowerCase())) === true;
|
||||
}
|
||||
|
||||
function selectRunningApp(apps, targetApp) {
|
||||
const candidates = Array.isArray(apps) ? apps : [];
|
||||
return candidates.find((app) => app?.running === true && matchesTargetApp(app, targetApp));
|
||||
}
|
||||
|
||||
async function resolveTargetAppSession(targetApp, objective, options, toolTrace) {
|
||||
try {
|
||||
const launchResult = await callCuaTool("launch_app", buildCuaLaunchArgs(targetApp, objective), options);
|
||||
toolTrace.push("launch_app");
|
||||
const pid = getPid(launchResult);
|
||||
return {
|
||||
pid,
|
||||
window: selectWindow(launchResult.structured?.windows),
|
||||
sourceText: launchResult.text || launchResult.raw,
|
||||
};
|
||||
} catch (error) {
|
||||
toolTrace.push("launch_app_failed");
|
||||
const appsResult = await callCuaTool("list_apps", {}, options);
|
||||
toolTrace.push("list_apps");
|
||||
const runningApp = selectRunningApp(appsResult.structured?.apps, targetApp);
|
||||
const pid = getPid({ structured: runningApp });
|
||||
if (!pid) {
|
||||
throw error;
|
||||
}
|
||||
const windowsResult = await callCuaTool("list_windows", { pid }, options);
|
||||
toolTrace.push("list_windows");
|
||||
return {
|
||||
pid,
|
||||
window: selectWindow(windowsResult.structured?.windows),
|
||||
sourceText: windowsResult.text || appsResult.text || error?.message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function buildConfirmationResult(payload, targetApp) {
|
||||
return {
|
||||
status: "needs_user_action",
|
||||
requestId: normalizeText(payload.requestId) || undefined,
|
||||
kind: "desktop_submit_confirmation_required",
|
||||
risk: "high",
|
||||
summary: "这条指令会在桌面应用里发送、提交或删除内容,需要你先确认。",
|
||||
recommendedAction: "allow_once",
|
||||
availableActions: ["allow_once", "deny"],
|
||||
platform: "macos",
|
||||
appName: targetApp?.label || targetApp?.name,
|
||||
};
|
||||
}
|
||||
|
||||
function buildFailure(requestId, error, detail) {
|
||||
return {
|
||||
status: "failed",
|
||||
requestId: normalizeText(requestId) || undefined,
|
||||
error,
|
||||
detail: normalizeText(detail) || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runCuaDriverComputerUseTask(payload, options = {}) {
|
||||
const env = options.env || process.env;
|
||||
const requestId = normalizeText(payload?.requestId);
|
||||
const objective = normalizeText(payload?.objective);
|
||||
const platform = normalizePlatform(payload?.platform || payload?.context?.controlPlatform);
|
||||
const provider = normalizeProvider(payload?.provider || payload?.context?.computerUseProvider);
|
||||
|
||||
if (platform !== "macos") {
|
||||
return buildFailure(requestId, "UNSUPPORTED_CONTROL_PLATFORM");
|
||||
}
|
||||
if (provider !== "cua-driver-computer-use") {
|
||||
return buildFailure(requestId, "UNSUPPORTED_COMPUTER_USE_PROVIDER");
|
||||
}
|
||||
if (!objective) {
|
||||
return buildFailure(requestId, "CUA_OBJECTIVE_REQUIRED");
|
||||
}
|
||||
|
||||
const targetApp = detectCuaTargetApp(objective);
|
||||
if (!targetApp) {
|
||||
return buildFailure(
|
||||
requestId,
|
||||
"CUA_TARGET_APP_REQUIRED",
|
||||
"请在指令里明确要控制的 macOS 应用,例如 Chrome、Safari、QQ、微信、Finder 或系统设置。",
|
||||
);
|
||||
}
|
||||
|
||||
if (isSubmitLikeObjective(objective) && !isSubmitAllowed(env, payload)) {
|
||||
return buildConfirmationResult(payload, targetApp);
|
||||
}
|
||||
|
||||
const toolTrace = [];
|
||||
try {
|
||||
const targetSession = await resolveTargetAppSession(targetApp, objective, {
|
||||
...options,
|
||||
env,
|
||||
}, toolTrace);
|
||||
|
||||
const pid = targetSession.pid;
|
||||
if (!pid) {
|
||||
return buildFailure(requestId, "CUA_TARGET_PID_NOT_FOUND", targetSession.sourceText);
|
||||
}
|
||||
|
||||
let window = targetSession.window;
|
||||
if (!window) {
|
||||
const windowsResult = await callCuaTool("list_windows", { pid }, { ...options, env });
|
||||
toolTrace.push("list_windows");
|
||||
window = selectWindow(windowsResult.structured?.windows);
|
||||
}
|
||||
const windowId = getWindowId(window);
|
||||
if (!windowId) {
|
||||
return buildFailure(requestId, "CUA_TARGET_WINDOW_NOT_FOUND", targetSession.sourceText);
|
||||
}
|
||||
|
||||
const beforeState = await callCuaTool("get_window_state", { pid, window_id: windowId }, { ...options, env });
|
||||
toolTrace.push("get_window_state");
|
||||
|
||||
const typedText = extractQuotedText(objective);
|
||||
if (typedText) {
|
||||
await callCuaTool("type_text", { pid, window_id: windowId, text: typedText, delay_ms: 20 }, { ...options, env });
|
||||
toolTrace.push("type_text");
|
||||
if (isSubmitLikeObjective(objective) && isSubmitAllowed(env, payload)) {
|
||||
await callCuaTool("press_key", { pid, window_id: windowId, key: "return" }, { ...options, env });
|
||||
toolTrace.push("press_key");
|
||||
}
|
||||
await callCuaTool("get_window_state", { pid, window_id: windowId }, { ...options, env });
|
||||
toolTrace.push("get_window_state");
|
||||
}
|
||||
|
||||
const observation = beforeState.text ? `窗口观测:${beforeState.text.split(/\r?\n/)[0]}` : "已完成窗口观测。";
|
||||
const actionSummary = typedText ? `并已向目标应用写入 ${typedText.length} 个字符。` : "已打开并读取目标窗口。";
|
||||
return {
|
||||
status: "completed",
|
||||
requestId: requestId || undefined,
|
||||
replyBody: `已通过 Cua Driver 接入 ${targetApp.label},${actionSummary}${observation}`,
|
||||
targetApp: targetApp.label,
|
||||
executionSummary: toolTrace.join(" -> "),
|
||||
};
|
||||
} catch (error) {
|
||||
return buildFailure(requestId, error?.message || "CUA_DRIVER_EXECUTION_FAILED");
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const raw = await readStdin();
|
||||
const payload = parseJsonPayload(raw);
|
||||
const result = await runCuaDriverComputerUseTask(payload, {
|
||||
env: process.env,
|
||||
cwd: process.cwd(),
|
||||
});
|
||||
writeJson(result);
|
||||
}
|
||||
|
||||
const currentFile = fileURLToPath(import.meta.url);
|
||||
if (process.argv[1] && path.resolve(process.argv[1]) === currentFile) {
|
||||
main().catch((error) => {
|
||||
writeJson({
|
||||
status: "failed",
|
||||
error: error?.message || "CUA_DRIVER_RUNTIME_FAILED",
|
||||
});
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user