Files
boss/local-agent/computer-use-task-runner.mjs
2026-05-17 02:20:08 +08:00

441 lines
17 KiB
JavaScript

import { spawn } from "node:child_process";
import path from "node:path";
function parseBoolean(value) {
return String(value || "").trim().toLowerCase() === "true";
}
function parseArgs(value) {
return String(value || "")
.trim()
.split(/\s+/)
.filter(Boolean);
}
function parseCsv(value) {
return String(value || "")
.split(",")
.map((item) => item.trim())
.filter(Boolean);
}
function parseTimeoutMs(value) {
const parsed = Number.parseInt(String(value || ""), 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : 45000;
}
function normalizeControlPlatform(value) {
const platform = String(value || "").trim().toLowerCase();
if (!platform || platform === "macos") return "macos";
throw new Error("UNSUPPORTED_CONTROL_PLATFORM");
}
function normalizeComputerUseProvider(value) {
const provider = String(value || "").trim();
return provider === "boss-native-computer-use" ||
provider === "codex-computer-use" ||
provider === "cua-driver-computer-use" ||
provider === "openai-computer-use"
? provider
: "codex-computer-use";
}
function normalizeMacDialogGuardPlatformAdapters(value) {
const adapters = Array.isArray(value) ? value : [];
const macAdapters = adapters
.map((item) => String(item).trim())
.filter((item) => {
const normalized = item.toLowerCase();
return normalized === "darwin" || normalized === "macos";
});
return macAdapters.length > 0 ? macAdapters : ["darwin"];
}
function pickConfigValue(config, key, fallback) {
if (config && config[key] !== undefined && config[key] !== null && `${config[key]}`.trim() !== "") {
return config[key];
}
return fallback;
}
function resolveCommandArgs(command, args, cwd) {
const runtimeName = path.basename(command || "").toLowerCase();
const scriptRuntimes = new Set([
"node",
"node.exe",
"tsx",
"tsx.cmd",
"bun",
"bun.exe",
"deno",
"deno.exe",
]);
if (!scriptRuntimes.has(runtimeName) || args.length === 0) {
return args;
}
const [first, ...rest] = args;
if (!first || first.startsWith("-")) {
return args;
}
return [path.isAbsolute(first) ? first : path.resolve(cwd || process.cwd(), first), ...rest];
}
function parseJsonLine(rawOutput) {
const lines = String(rawOutput || "")
.trim()
.split(/\r?\n/)
.map((line) => line.trim())
.filter(Boolean);
return JSON.parse(lines.at(-1) || "");
}
export function getComputerUseTaskRunnerConfig(env = process.env, config = {}) {
const enabled = parseBoolean(pickConfigValue(config, "computerUseEnabled", env.BOSS_COMPUTER_USE_ENABLED));
const command = String(pickConfigValue(config, "computerUseCommand", env.BOSS_COMPUTER_USE_COMMAND) || "").trim() || undefined;
const args = Array.isArray(config?.computerUseArgs)
? config.computerUseArgs.map((item) => String(item)).filter(Boolean)
: parseArgs(pickConfigValue(config, "computerUseArgs", env.BOSS_COMPUTER_USE_ARGS));
const cwd = String(pickConfigValue(config, "computerUseWorkdir", env.BOSS_COMPUTER_USE_WORKDIR) || "").trim() || undefined;
const timeoutMs = parseTimeoutMs(pickConfigValue(config, "computerUseTimeoutMs", env.BOSS_COMPUTER_USE_TIMEOUT_MS));
const dialogGuardEnabled = parseBoolean(pickConfigValue(config, "dialogGuardEnabled", env.BOSS_DIALOG_GUARD_ENABLED));
const dialogGuardConsentRequired = parseBoolean(
pickConfigValue(config, "dialogGuardConsentRequired", env.BOSS_DIALOG_GUARD_CONSENT_REQUIRED),
);
const dialogGuardPlatformAdapters = Array.isArray(config?.dialogGuardPlatformAdapters)
? config.dialogGuardPlatformAdapters.map((item) => String(item).trim()).filter(Boolean)
: parseCsv(pickConfigValue(config, "dialogGuardPlatformAdapters", env.BOSS_DIALOG_GUARD_PLATFORM_ADAPTERS));
const dialogGuardMacActionCommand = String(
pickConfigValue(config, "dialogGuardMacActionCommand", env.BOSS_MAC_DIALOG_GUARD_ACTION_COMMAND) || "",
).trim();
const dialogGuardMacActionArgs = Array.isArray(config?.dialogGuardMacActionArgs)
? config.dialogGuardMacActionArgs.map((item) => String(item)).filter(Boolean)
: parseArgs(pickConfigValue(config, "dialogGuardMacActionArgs", env.BOSS_MAC_DIALOG_GUARD_ACTION_ARGS));
const cuaDriverCommand = String(
pickConfigValue(config, "cuaDriverCommand", env.BOSS_CUA_DRIVER_COMMAND) || "",
).trim();
const cuaDriverArgs = Array.isArray(config?.cuaDriverArgs)
? config.cuaDriverArgs.map((item) => String(item)).filter(Boolean)
: parseArgs(pickConfigValue(config, "cuaDriverArgs", env.BOSS_CUA_DRIVER_ARGS));
const cuaDriverTimeoutMs = parseTimeoutMs(pickConfigValue(config, "cuaDriverTimeoutMs", env.BOSS_CUA_DRIVER_TIMEOUT_MS));
const codexComputerUseEnabled = parseBoolean(
pickConfigValue(config, "codexComputerUseEnabled", env.BOSS_CODEX_COMPUTER_USE_ENABLED),
);
const codexComputerUseCommand = String(
pickConfigValue(config, "codexComputerUseCommand", env.BOSS_CODEX_COMPUTER_USE_COMMAND) || "",
).trim();
const codexComputerUseArgs = Array.isArray(config?.codexComputerUseArgs)
? config.codexComputerUseArgs.map((item) => String(item)).filter(Boolean)
: parseArgs(pickConfigValue(config, "codexComputerUseArgs", env.BOSS_CODEX_COMPUTER_USE_ARGS));
const codexComputerUseWorkdir = String(
pickConfigValue(config, "codexComputerUseWorkdir", env.BOSS_CODEX_COMPUTER_USE_WORKDIR) || "",
).trim();
const codexComputerUseTimeoutMs = parseTimeoutMs(
pickConfigValue(config, "codexComputerUseTimeoutMs", env.BOSS_CODEX_COMPUTER_USE_TIMEOUT_MS),
);
const codexComputerUseFallbackToCua =
pickConfigValue(config, "codexComputerUseFallbackToCua", env.BOSS_CODEX_COMPUTER_USE_FALLBACK_TO_CUA) === undefined
? true
: parseBoolean(pickConfigValue(config, "codexComputerUseFallbackToCua", env.BOSS_CODEX_COMPUTER_USE_FALLBACK_TO_CUA));
return {
enabled,
command,
args,
cwd,
timeoutMs,
dialogGuardEnabled,
dialogGuardConsentRequired,
dialogGuardPlatformAdapters: normalizeMacDialogGuardPlatformAdapters(dialogGuardPlatformAdapters),
dialogGuardMacActionCommand,
dialogGuardMacActionArgs,
cuaDriverCommand,
cuaDriverArgs,
cuaDriverTimeoutMs,
codexComputerUseEnabled,
codexComputerUseCommand,
codexComputerUseArgs,
codexComputerUseWorkdir,
codexComputerUseTimeoutMs,
codexComputerUseFallbackToCua,
};
}
export function canHandleComputerUseTask(task) {
return String(task?.taskType || "").trim() === "desktop_control";
}
export function buildComputerUseTaskExecution(config, task) {
if (!config?.enabled) {
throw new Error("COMPUTER_USE_RUNTIME_DISABLED");
}
if (!config?.command) {
throw new Error("COMPUTER_USE_COMMAND_REQUIRED");
}
const cwd = config.cwd || process.cwd();
const controlPlatform = normalizeControlPlatform(task?.controlPlatform);
const computerUseProvider = normalizeComputerUseProvider(task?.computerUseProvider);
const dialogGuardPlatformAdapters = normalizeMacDialogGuardPlatformAdapters(config.dialogGuardPlatformAdapters);
return {
command: config.command,
args: resolveCommandArgs(config.command, config.args || [], cwd),
cwd,
timeoutMs: config.timeoutMs || 45000,
env: {
BOSS_DIALOG_GUARD_ENABLED: config.dialogGuardEnabled ? "true" : "false",
BOSS_DIALOG_GUARD_CONSENT_REQUIRED: config.dialogGuardConsentRequired ? "true" : "false",
BOSS_DIALOG_GUARD_PLATFORM_ADAPTERS: dialogGuardPlatformAdapters.join(","),
BOSS_MAC_DIALOG_GUARD_ACTION_COMMAND: config.dialogGuardMacActionCommand || "",
BOSS_MAC_DIALOG_GUARD_ACTION_ARGS_JSON: JSON.stringify(config.dialogGuardMacActionArgs || []),
BOSS_CUA_DRIVER_COMMAND: config.cuaDriverCommand || "",
BOSS_CUA_DRIVER_ARGS_JSON: JSON.stringify(config.cuaDriverArgs || []),
BOSS_CUA_DRIVER_TIMEOUT_MS: String(config.cuaDriverTimeoutMs || 45000),
BOSS_CONTROL_PLATFORM: controlPlatform,
BOSS_COMPUTER_USE_PROVIDER: computerUseProvider,
},
stdinPayload: {
requestKind: "desktop_control",
requestId: String(task?.taskId || "").trim(),
objective: String(task?.requestText || task?.executionPrompt || "").trim(),
platform: controlPlatform,
provider: computerUseProvider,
context: {
projectId: String(task?.projectId || "").trim() || undefined,
threadId: String(task?.threadId || task?.targetThreadId || "").trim() || undefined,
requestedBy: String(task?.requestedByAccount || task?.requestedBy || "").trim() || undefined,
requestedAt: String(task?.requestedAt || "").trim() || undefined,
confirmationScopeKey: String(task?.confirmationScopeKey || "").trim() || undefined,
riskLevel: String(task?.riskLevel || "").trim() || undefined,
controlPlatform,
computerUseProvider,
},
},
};
}
function buildCodexComputerUseTaskExecution(config, task) {
if (!config?.codexComputerUseEnabled) {
throw new Error("CODEX_COMPUTER_USE_RUNTIME_DISABLED");
}
if (!config?.codexComputerUseCommand) {
throw new Error("CODEX_COMPUTER_USE_COMMAND_REQUIRED");
}
const cwd = config.codexComputerUseWorkdir || config.cwd || process.cwd();
const controlPlatform = normalizeControlPlatform(task?.controlPlatform);
return {
command: config.codexComputerUseCommand,
args: resolveCommandArgs(config.codexComputerUseCommand, config.codexComputerUseArgs || [], cwd),
cwd,
timeoutMs: config.codexComputerUseTimeoutMs || 45000,
env: {
BOSS_CONTROL_PLATFORM: controlPlatform,
BOSS_COMPUTER_USE_PROVIDER: "codex-computer-use",
},
stdinPayload: {
requestKind: "desktop_control",
requestId: String(task?.taskId || "").trim(),
objective: String(task?.requestText || task?.executionPrompt || "").trim(),
platform: controlPlatform,
provider: "codex-computer-use",
context: {
projectId: String(task?.projectId || "").trim() || undefined,
threadId: String(task?.threadId || task?.targetThreadId || "").trim() || undefined,
requestedBy: String(task?.requestedByAccount || task?.requestedBy || "").trim() || undefined,
requestedAt: String(task?.requestedAt || "").trim() || undefined,
confirmationScopeKey: String(task?.confirmationScopeKey || "").trim() || undefined,
riskLevel: String(task?.riskLevel || "").trim() || undefined,
controlPlatform,
computerUseProvider: "codex-computer-use",
},
},
};
}
export function parseComputerUseTaskResult(rawOutput) {
const parsed = parseJsonLine(rawOutput);
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
throw new Error("INVALID_COMPUTER_USE_RUNTIME_PAYLOAD");
}
if (parsed.status === "failed") {
return {
status: "failed",
requestId: typeof parsed.requestId === "string" ? parsed.requestId.trim() || undefined : undefined,
errorMessage:
typeof parsed.error === "string" && parsed.error.trim()
? parsed.error.trim()
: "COMPUTER_USE_FAILED",
};
}
if (parsed.status === "needs_user_action") {
return {
status: "needs_user_action",
requestId: typeof parsed.requestId === "string" ? parsed.requestId.trim() || undefined : undefined,
kind: typeof parsed.kind === "string" && parsed.kind.trim() ? parsed.kind.trim() : "user_action_required",
dialogId: typeof parsed.dialogId === "string" && parsed.dialogId.trim() ? parsed.dialogId.trim() : undefined,
risk: typeof parsed.risk === "string" && parsed.risk.trim() ? parsed.risk.trim() : "medium",
summary: typeof parsed.summary === "string" && parsed.summary.trim() ? parsed.summary.trim() : "",
recommendedAction:
typeof parsed.recommendedAction === "string" && parsed.recommendedAction.trim()
? parsed.recommendedAction.trim()
: undefined,
availableActions: Array.isArray(parsed.availableActions)
? parsed.availableActions.map((item) => String(item).trim()).filter(Boolean)
: [],
platform: typeof parsed.platform === "string" && parsed.platform.trim() ? parsed.platform.trim() : undefined,
appName: typeof parsed.appName === "string" && parsed.appName.trim() ? parsed.appName.trim() : undefined,
};
}
const replyBody =
typeof parsed.replyBody === "string" && parsed.replyBody.trim()
? parsed.replyBody.trim()
: typeof parsed.summary === "string" && parsed.summary.trim()
? parsed.summary.trim()
: "";
if (!replyBody) {
throw new Error("INVALID_COMPUTER_USE_RUNTIME_PAYLOAD");
}
return {
status: "completed",
requestId: typeof parsed.requestId === "string" ? parsed.requestId.trim() || undefined : undefined,
replyBody,
computerUseProvider:
parsed.computerUseProvider === "boss-native-computer-use" ||
parsed.computerUseProvider === "codex-computer-use" ||
parsed.computerUseProvider === "cua-driver-computer-use" ||
parsed.computerUseProvider === "openai-computer-use"
? parsed.computerUseProvider
: undefined,
targetApp:
typeof parsed.targetApp === "string" && parsed.targetApp.trim()
? parsed.targetApp.trim()
: undefined,
executionSummary:
typeof parsed.executionSummary === "string" && parsed.executionSummary.trim()
? parsed.executionSummary.trim()
: undefined,
};
}
function shouldTryCodexComputerUse(runnerConfig, task) {
const provider = normalizeComputerUseProvider(task?.computerUseProvider);
return (
runnerConfig.codexComputerUseEnabled === true &&
Boolean(runnerConfig.codexComputerUseCommand) &&
(provider === "codex-computer-use" || provider === "openai-computer-use")
);
}
function withComputerUseProvider(result, provider) {
return result && typeof result === "object" && !Array.isArray(result)
? {
...result,
computerUseProvider: result.computerUseProvider || provider,
}
: result;
}
function executeComputerUseRuntime(execution) {
return new Promise((resolve, reject) => {
const child = spawn(execution.command, execution.args, {
cwd: execution.cwd,
env: {
...process.env,
...(execution.env || {}),
},
stdio: ["pipe", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
child.kill("SIGKILL");
}, execution.timeoutMs);
child.stdout.setEncoding("utf8");
child.stderr.setEncoding("utf8");
child.stdout.on("data", (chunk) => {
stdout += chunk;
});
child.stderr.on("data", (chunk) => {
stderr += chunk;
});
child.on("error", (error) => {
clearTimeout(timer);
reject(error);
});
child.on("close", (code) => {
clearTimeout(timer);
if (timedOut) {
reject(new Error("COMPUTER_USE_TIMEOUT"));
return;
}
if (code !== 0) {
reject(new Error(stderr.trim() || `computer use exit code ${code}`));
return;
}
try {
resolve(parseComputerUseTaskResult(stdout));
} catch (error) {
reject(error);
}
});
child.stdin.write(JSON.stringify(execution.stdinPayload));
child.stdin.end();
});
}
export async function executeComputerUseTask(task, config = {}) {
const runnerConfig = getComputerUseTaskRunnerConfig(process.env, config);
if (!runnerConfig.enabled && !shouldTryCodexComputerUse(runnerConfig, task)) {
return {
status: "failed",
errorMessage: "COMPUTER_USE_RUNTIME_DISABLED",
};
}
if (shouldTryCodexComputerUse(runnerConfig, task)) {
try {
const codexResult = await executeComputerUseRuntime(buildCodexComputerUseTaskExecution(runnerConfig, task));
if (codexResult.status !== "failed") {
return withComputerUseProvider(codexResult, "codex-computer-use");
}
if (!runnerConfig.codexComputerUseFallbackToCua) {
return withComputerUseProvider(codexResult, "codex-computer-use");
}
} catch (error) {
if (!runnerConfig.codexComputerUseFallbackToCua) {
return {
status: "failed",
errorMessage: error instanceof Error ? error.message : String(error),
computerUseProvider: "codex-computer-use",
};
}
}
}
if (!runnerConfig.enabled) {
return {
status: "failed",
errorMessage: "COMPUTER_USE_RUNTIME_DISABLED",
};
}
const fallbackTask = {
...task,
computerUseProvider:
normalizeComputerUseProvider(task?.computerUseProvider) === "codex-computer-use"
? "cua-driver-computer-use"
: task?.computerUseProvider,
};
const execution = buildComputerUseTaskExecution(runnerConfig, fallbackTask);
const result = await executeComputerUseRuntime(execution);
return withComputerUseProvider(result, normalizeComputerUseProvider(fallbackTask.computerUseProvider));
}