436 lines
12 KiB
JavaScript
436 lines
12 KiB
JavaScript
#!/usr/bin/env node
|
||
|
||
import { spawn } from "node:child_process";
|
||
import { mkdir, writeFile } from "node:fs/promises";
|
||
import path from "node:path";
|
||
import {
|
||
buildDialogAuditEntry,
|
||
buildDialogInterventionResult,
|
||
evaluateDialogSnapshot,
|
||
readDialogSnapshotFromEnv,
|
||
} from "../local-agent/desktop-dialog-guard.mjs";
|
||
|
||
function writeJson(payload) {
|
||
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
||
}
|
||
|
||
async function readStdin() {
|
||
const chunks = [];
|
||
for await (const chunk of process.stdin) {
|
||
chunks.push(typeof chunk === "string" ? chunk : chunk.toString("utf8"));
|
||
}
|
||
return chunks.join("").trim();
|
||
}
|
||
|
||
function normalizePayload(raw) {
|
||
try {
|
||
const parsed = JSON.parse(raw);
|
||
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
||
return {
|
||
ok: false,
|
||
error: "INVALID_COMPUTER_USE_PAYLOAD: expected object",
|
||
};
|
||
}
|
||
return {
|
||
ok: true,
|
||
payload: parsed,
|
||
};
|
||
} catch {
|
||
return {
|
||
ok: false,
|
||
error: "INVALID_COMPUTER_USE_PAYLOAD: invalid json",
|
||
};
|
||
}
|
||
}
|
||
|
||
function detectTargetApp(objective) {
|
||
const text = String(objective || "").toLowerCase();
|
||
const candidates = [
|
||
["微信", ["微信", "wechat"]],
|
||
["飞书", ["飞书", "lark", "feishu"]],
|
||
["Telegram", ["telegram"]],
|
||
["QQ", ["qq"]],
|
||
["Finder", ["finder", "访达"]],
|
||
["系统设置", ["系统设置", "system settings", "settings"]],
|
||
["Chrome", ["chrome", "谷歌浏览器"]],
|
||
["Safari", ["safari"]],
|
||
];
|
||
|
||
for (const [label, aliases] of candidates) {
|
||
if (aliases.some((alias) => text.includes(alias.toLowerCase()))) {
|
||
return label;
|
||
}
|
||
}
|
||
return undefined;
|
||
}
|
||
|
||
function detectDesktopAction(objective) {
|
||
const text = String(objective || "").toLowerCase();
|
||
if (text.includes("系统设置") || text.includes("settings")) {
|
||
return "open_settings";
|
||
}
|
||
if (text.includes("访达") || text.includes("finder")) {
|
||
return "open_finder";
|
||
}
|
||
if (text.includes("微信") || text.includes("wechat")) {
|
||
return "open_wechat";
|
||
}
|
||
if (text.includes("飞书") || text.includes("lark") || text.includes("feishu")) {
|
||
return "open_feishu";
|
||
}
|
||
if (text.includes("telegram")) {
|
||
return "open_telegram";
|
||
}
|
||
if (text.includes("qq")) {
|
||
return "open_qq";
|
||
}
|
||
return "open_app";
|
||
}
|
||
|
||
function extractQuotedText(objective) {
|
||
const text = String(objective || "");
|
||
const patterns = [
|
||
/[“"]([^“”"]+)[”"]/,
|
||
/[「『]([^」』]+)[」』]/,
|
||
/输入[::]\s*([^\n。;;]+)/,
|
||
/打字[::]\s*([^\n。;;]+)/,
|
||
];
|
||
for (const pattern of patterns) {
|
||
const match = text.match(pattern);
|
||
const value = match?.[1]?.trim();
|
||
if (value) {
|
||
return value;
|
||
}
|
||
}
|
||
return undefined;
|
||
}
|
||
|
||
function shouldSubmitAfterTyping(objective) {
|
||
const text = String(objective || "").toLowerCase();
|
||
return (
|
||
text.includes("发送") ||
|
||
text.includes("提交") ||
|
||
text.includes("回车") ||
|
||
text.includes("enter") ||
|
||
text.includes("submit")
|
||
);
|
||
}
|
||
|
||
function parseArgs(value) {
|
||
return String(value || "")
|
||
.trim()
|
||
.split(/\s+/)
|
||
.filter(Boolean);
|
||
}
|
||
|
||
function parseArgsJson(value) {
|
||
const raw = String(value || "").trim();
|
||
if (!raw) {
|
||
return undefined;
|
||
}
|
||
try {
|
||
const parsed = JSON.parse(raw);
|
||
return Array.isArray(parsed) ? parsed.map((item) => String(item)).filter(Boolean) : undefined;
|
||
} catch {
|
||
return undefined;
|
||
}
|
||
}
|
||
|
||
function resolveOpenAppPrefixArgs(command) {
|
||
const rawJson = String(process.env.BOSS_COMPUTER_USE_OPEN_APP_ARGS_JSON || "").trim();
|
||
const rawArgs = String(process.env.BOSS_COMPUTER_USE_OPEN_APP_ARGS || "").trim();
|
||
if (rawJson || rawArgs) {
|
||
return parseArgsJson(rawJson) ?? parseArgs(rawArgs);
|
||
}
|
||
return path.basename(command || "").toLowerCase() === "open" ? ["-a"] : [];
|
||
}
|
||
|
||
async function writeArtifact(payload) {
|
||
const artifactDir = String(process.env.BOSS_CONTROL_ARTIFACT_DIR || "").trim();
|
||
if (!artifactDir) {
|
||
return [];
|
||
}
|
||
|
||
await mkdir(artifactDir, { recursive: true });
|
||
const requestId =
|
||
typeof payload.requestId === "string" && payload.requestId.trim()
|
||
? payload.requestId.trim()
|
||
: `desktop-${Date.now()}`;
|
||
const artifactPath = path.join(artifactDir, `${requestId}.json`);
|
||
await writeFile(artifactPath, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
|
||
return [
|
||
{
|
||
kind: "json",
|
||
path: artifactPath,
|
||
},
|
||
];
|
||
}
|
||
|
||
function getDesktopAutomationMode() {
|
||
const raw = String(process.env.BOSS_COMPUTER_USE_MODE || "").trim().toLowerCase();
|
||
if (raw === "off" || raw === "open" || raw === "osascript" || raw === "auto") {
|
||
return raw;
|
||
}
|
||
return "auto";
|
||
}
|
||
|
||
function getDialogGuardEnabled() {
|
||
return String(process.env.BOSS_DIALOG_GUARD_ENABLED || "").trim().toLowerCase() === "true";
|
||
}
|
||
|
||
function readDialogGuardSnapshot() {
|
||
return readDialogSnapshotFromEnv(process.env, process.env.BOSS_DIALOG_GUARD_PLATFORM || process.platform);
|
||
}
|
||
|
||
function resolveDialogGuardActionCommand(platform) {
|
||
const normalizedPlatform = String(platform || "").trim();
|
||
if (normalizedPlatform === "darwin") {
|
||
const command = String(process.env.BOSS_MAC_DIALOG_GUARD_ACTION_COMMAND || "").trim();
|
||
if (command) {
|
||
return {
|
||
command,
|
||
args: parseArgsJson(process.env.BOSS_MAC_DIALOG_GUARD_ACTION_ARGS_JSON) ??
|
||
parseArgs(process.env.BOSS_MAC_DIALOG_GUARD_ACTION_ARGS),
|
||
};
|
||
}
|
||
}
|
||
if (normalizedPlatform === "win32") {
|
||
const command = String(process.env.BOSS_WINDOWS_DIALOG_GUARD_ACTION_COMMAND || "").trim();
|
||
if (command) {
|
||
return {
|
||
command,
|
||
args: parseArgsJson(process.env.BOSS_WINDOWS_DIALOG_GUARD_ACTION_ARGS_JSON) ??
|
||
parseArgs(process.env.BOSS_WINDOWS_DIALOG_GUARD_ACTION_ARGS),
|
||
};
|
||
}
|
||
}
|
||
const command = String(process.env.BOSS_DIALOG_GUARD_ACTION_COMMAND || "").trim();
|
||
if (!command) {
|
||
return undefined;
|
||
}
|
||
return {
|
||
command,
|
||
args: parseArgsJson(process.env.BOSS_DIALOG_GUARD_ACTION_ARGS_JSON) ??
|
||
parseArgs(process.env.BOSS_DIALOG_GUARD_ACTION_ARGS),
|
||
};
|
||
}
|
||
|
||
function buildDialogGuardActionArgs(snapshot, decision) {
|
||
return [
|
||
"--platform",
|
||
snapshot.platform,
|
||
"--app",
|
||
snapshot.appName,
|
||
"--dialog-id",
|
||
decision.signature?.id || "",
|
||
"--action",
|
||
decision.action || "",
|
||
"--button",
|
||
decision.button || "",
|
||
].filter((item) => item !== "");
|
||
}
|
||
|
||
async function applyDialogGuardAutoAction(snapshot, decision) {
|
||
if (decision?.disposition !== "auto_action") {
|
||
return false;
|
||
}
|
||
const actionCommand = resolveDialogGuardActionCommand(snapshot.platform);
|
||
if (!actionCommand?.command) {
|
||
return false;
|
||
}
|
||
await runCommand(actionCommand.command, [
|
||
...(actionCommand.args || []),
|
||
...buildDialogGuardActionArgs(snapshot, decision),
|
||
]);
|
||
return true;
|
||
}
|
||
|
||
async function runDialogGuardPreflight(payload) {
|
||
if (!getDialogGuardEnabled()) {
|
||
return {};
|
||
}
|
||
const snapshot = readDialogGuardSnapshot();
|
||
if (!snapshot) {
|
||
return {};
|
||
}
|
||
const decision = evaluateDialogSnapshot(snapshot);
|
||
if (decision.disposition === "needs_user_action") {
|
||
return {
|
||
pausedResult: buildDialogInterventionResult({
|
||
requestId: typeof payload.requestId === "string" ? payload.requestId : undefined,
|
||
snapshot,
|
||
decision,
|
||
}),
|
||
decision,
|
||
snapshot,
|
||
};
|
||
}
|
||
const actionApplied = await applyDialogGuardAutoAction(snapshot, decision);
|
||
const auditEntry = buildDialogAuditEntry({
|
||
requestId: typeof payload.requestId === "string" ? payload.requestId : undefined,
|
||
snapshot,
|
||
decision,
|
||
});
|
||
auditEntry.actionApplied = actionApplied;
|
||
return {
|
||
decision,
|
||
snapshot,
|
||
actionApplied,
|
||
auditEntry,
|
||
};
|
||
}
|
||
|
||
function escapeAppleScriptString(value) {
|
||
return String(value || "")
|
||
.replaceAll("\\", "\\\\")
|
||
.replaceAll('"', '\\"');
|
||
}
|
||
|
||
function buildAppleScript(targetApp, objective) {
|
||
const app = escapeAppleScriptString(targetApp);
|
||
const script = [
|
||
`tell application "${app}"`,
|
||
"activate",
|
||
"end tell",
|
||
];
|
||
const textToType = extractQuotedText(objective);
|
||
if (textToType) {
|
||
script.push("delay 0.2");
|
||
script.push("tell application \"System Events\"");
|
||
script.push(`keystroke "${escapeAppleScriptString(textToType)}"`);
|
||
if (shouldSubmitAfterTyping(objective)) {
|
||
script.push("key code 36");
|
||
}
|
||
script.push("end tell");
|
||
}
|
||
return script.join("\n");
|
||
}
|
||
|
||
async function runCommand(command, args) {
|
||
return new Promise((resolve, reject) => {
|
||
const child = spawn(command, args, {
|
||
stdio: ["ignore", "pipe", "pipe"],
|
||
});
|
||
|
||
let stdout = "";
|
||
let stderr = "";
|
||
child.stdout.setEncoding("utf8");
|
||
child.stderr.setEncoding("utf8");
|
||
child.stdout.on("data", (chunk) => {
|
||
stdout += chunk;
|
||
});
|
||
child.stderr.on("data", (chunk) => {
|
||
stderr += chunk;
|
||
});
|
||
child.on("error", reject);
|
||
child.on("close", (code) => {
|
||
if (code !== 0) {
|
||
reject(new Error(stderr.trim() || `computer use open exit code ${code}`));
|
||
return;
|
||
}
|
||
resolve({ stdout: stdout.trim(), stderr: stderr.trim() });
|
||
});
|
||
});
|
||
}
|
||
|
||
async function runAppleScript(targetApp, objective) {
|
||
if (!targetApp) {
|
||
return undefined;
|
||
}
|
||
const script = buildAppleScript(targetApp, objective);
|
||
await runCommand("osascript", ["-e", script]);
|
||
return `osascript activated ${targetApp}`;
|
||
}
|
||
|
||
const raw = await readStdin();
|
||
const normalized = normalizePayload(raw);
|
||
|
||
if (!normalized.ok) {
|
||
writeJson({
|
||
status: "failed",
|
||
error: normalized.error,
|
||
});
|
||
process.exit(0);
|
||
}
|
||
|
||
const payload = normalized.payload;
|
||
const objective =
|
||
typeof payload.objective === "string" && payload.objective.trim()
|
||
? payload.objective.trim()
|
||
: "桌面控制 smoke 链路正常";
|
||
const targetApp = detectTargetApp(objective);
|
||
const desktopAction = detectDesktopAction(objective);
|
||
const riskLevel =
|
||
typeof payload.context?.riskLevel === "string" && payload.context.riskLevel.trim()
|
||
? payload.context.riskLevel.trim()
|
||
: "unknown";
|
||
const dryRun = payload.context?.dryRun === true;
|
||
let dialogGuardState = {};
|
||
try {
|
||
dialogGuardState = await runDialogGuardPreflight(payload);
|
||
} catch (error) {
|
||
writeJson({
|
||
status: "failed",
|
||
requestId: typeof payload.requestId === "string" ? payload.requestId : undefined,
|
||
error: error?.message || "DIALOG_GUARD_FAILED",
|
||
});
|
||
process.exit(0);
|
||
}
|
||
|
||
if (dialogGuardState.pausedResult) {
|
||
writeJson(dialogGuardState.pausedResult);
|
||
process.exit(0);
|
||
}
|
||
|
||
let action = targetApp ? desktopAction : "computer_use_smoke";
|
||
const configuredMode = getDesktopAutomationMode();
|
||
const automationMode =
|
||
configuredMode === "auto" ? (process.platform === "darwin" ? "osascript" : "open") : configuredMode;
|
||
if (targetApp && !dryRun) {
|
||
if (automationMode === "osascript") {
|
||
await runAppleScript(targetApp, objective);
|
||
action = `${desktopAction}_executed`;
|
||
} else if (automationMode !== "off") {
|
||
const command = String(process.env.BOSS_COMPUTER_USE_OPEN_APP_COMMAND || "").trim() || "open";
|
||
const prefixArgs = resolveOpenAppPrefixArgs(command);
|
||
await runCommand(command, [...prefixArgs, targetApp]);
|
||
action = `${desktopAction}_executed`;
|
||
}
|
||
}
|
||
|
||
const artifacts = await writeArtifact({
|
||
requestKind: payload.requestKind,
|
||
requestId: payload.requestId,
|
||
action,
|
||
objective,
|
||
targetApp,
|
||
typedText: extractQuotedText(objective),
|
||
dryRun,
|
||
riskLevel,
|
||
mode: automationMode,
|
||
dialogGuard: dialogGuardState.auditEntry,
|
||
capturedAt: new Date().toISOString(),
|
||
});
|
||
|
||
writeJson({
|
||
status: "completed",
|
||
requestId: typeof payload.requestId === "string" ? payload.requestId : undefined,
|
||
replyBody: `桌面控制已完成:${objective}`,
|
||
executionSummary: `${action} completed (risk=${riskLevel}, mode=${automationMode}${
|
||
dialogGuardState.decision?.disposition ? `, dialogGuard=${dialogGuardState.decision.disposition}` : ""
|
||
})`,
|
||
targetApp,
|
||
typedText: extractQuotedText(objective),
|
||
dialogGuard: dialogGuardState.decision
|
||
? {
|
||
disposition: dialogGuardState.decision.disposition,
|
||
kind: dialogGuardState.decision.kind,
|
||
risk: dialogGuardState.decision.risk,
|
||
action: dialogGuardState.decision.action,
|
||
button: dialogGuardState.decision.button,
|
||
actionApplied: dialogGuardState.actionApplied,
|
||
}
|
||
: undefined,
|
||
artifacts,
|
||
});
|