Files
boss/scripts/computer-use-smoke.mjs
2026-05-17 02:20:08 +08:00

482 lines
14 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
import { spawn } from "node:child_process";
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
import {
buildDialogAuditEntry,
buildDialogInterventionResult,
evaluateDialogSnapshot,
readDialogSnapshotFromEnv,
} from "../local-agent/desktop-dialog-guard.mjs";
function writeJson(payload) {
process.stdout.write(`${JSON.stringify(payload)}\n`);
}
async function readStdin() {
const chunks = [];
for await (const chunk of process.stdin) {
chunks.push(typeof chunk === "string" ? chunk : chunk.toString("utf8"));
}
return chunks.join("").trim();
}
function normalizePayload(raw) {
try {
const parsed = JSON.parse(raw);
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
return {
ok: false,
error: "INVALID_COMPUTER_USE_PAYLOAD: expected object",
};
}
return {
ok: true,
payload: parsed,
};
} catch {
return {
ok: false,
error: "INVALID_COMPUTER_USE_PAYLOAD: invalid json",
};
}
}
function detectTargetApp(objective) {
const text = String(objective || "").toLowerCase();
const candidates = [
["微信", ["微信", "wechat"]],
["飞书", ["飞书", "lark", "feishu"]],
["Telegram", ["telegram"]],
["QQ", ["qq"]],
["Finder", ["finder", "访达"]],
["系统设置", ["系统设置", "system settings", "settings"]],
["Chrome", ["chrome", "谷歌浏览器"]],
["Safari", ["safari"]],
];
for (const [label, aliases] of candidates) {
if (aliases.some((alias) => text.includes(alias.toLowerCase()))) {
return label;
}
}
return undefined;
}
function resolvePlatformAppName(targetApp) {
if (process.platform === "darwin" && targetApp === "Chrome") {
return "Google Chrome";
}
return targetApp;
}
function isBrowserApp(targetApp) {
return ["Chrome", "Google Chrome", "Safari"].includes(String(targetApp || ""));
}
function extractTargetUrl(objective) {
const text = String(objective || "");
const quotedText = extractQuotedText(text);
if (/^https?:\/\//i.test(String(quotedText || ""))) {
return quotedText;
}
return text.match(/https?:\/\/[^\s、)"”]+/i)?.[0];
}
function detectDesktopAction(objective) {
const text = String(objective || "").toLowerCase();
if (text.includes("系统设置") || text.includes("settings")) {
return "open_settings";
}
if (text.includes("访达") || text.includes("finder")) {
return "open_finder";
}
if (text.includes("微信") || text.includes("wechat")) {
return "open_wechat";
}
if (text.includes("飞书") || text.includes("lark") || text.includes("feishu")) {
return "open_feishu";
}
if (text.includes("telegram")) {
return "open_telegram";
}
if (text.includes("qq")) {
return "open_qq";
}
return "open_app";
}
function extractQuotedText(objective) {
const text = String(objective || "");
const patterns = [
/[“"]([^“”"]+)[”"]/,
/[「『]([^」』]+)[」』]/,
/输入[:]\s*([^\n。;]+)/,
/打字[:]\s*([^\n。;]+)/,
];
for (const pattern of patterns) {
const match = text.match(pattern);
const value = match?.[1]?.trim();
if (value) {
return value;
}
}
return undefined;
}
function shouldSubmitAfterTyping(objective) {
const text = String(objective || "").toLowerCase();
return (
text.includes("发送") ||
text.includes("提交") ||
text.includes("回车") ||
text.includes("enter") ||
text.includes("submit")
);
}
function parseArgs(value) {
return String(value || "")
.trim()
.split(/\s+/)
.filter(Boolean);
}
function parseArgsJson(value) {
const raw = String(value || "").trim();
if (!raw) {
return undefined;
}
try {
const parsed = JSON.parse(raw);
return Array.isArray(parsed) ? parsed.map((item) => String(item)).filter(Boolean) : undefined;
} catch {
return undefined;
}
}
function resolveOpenAppPrefixArgs(command) {
const rawJson = String(process.env.BOSS_COMPUTER_USE_OPEN_APP_ARGS_JSON || "").trim();
const rawArgs = String(process.env.BOSS_COMPUTER_USE_OPEN_APP_ARGS || "").trim();
if (rawJson || rawArgs) {
return parseArgsJson(rawJson) ?? parseArgs(rawArgs);
}
return path.basename(command || "").toLowerCase() === "open" ? ["-a"] : [];
}
async function writeArtifact(payload) {
const artifactDir = String(process.env.BOSS_CONTROL_ARTIFACT_DIR || "").trim();
if (!artifactDir) {
return [];
}
await mkdir(artifactDir, { recursive: true });
const requestId =
typeof payload.requestId === "string" && payload.requestId.trim()
? payload.requestId.trim()
: `desktop-${Date.now()}`;
const artifactPath = path.join(artifactDir, `${requestId}.json`);
await writeFile(artifactPath, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
return [
{
kind: "json",
path: artifactPath,
},
];
}
function getDesktopAutomationMode() {
const raw = String(process.env.BOSS_COMPUTER_USE_MODE || "").trim().toLowerCase();
if (raw === "off" || raw === "open" || raw === "osascript" || raw === "auto") {
return raw;
}
return "auto";
}
function getDialogGuardEnabled() {
return String(process.env.BOSS_DIALOG_GUARD_ENABLED || "").trim().toLowerCase() === "true";
}
function readDialogGuardSnapshot() {
return readDialogSnapshotFromEnv(process.env, process.env.BOSS_DIALOG_GUARD_PLATFORM || process.platform);
}
function resolveDialogGuardActionCommand(platform) {
const normalizedPlatform = String(platform || "").trim();
if (normalizedPlatform === "darwin") {
const command = String(process.env.BOSS_MAC_DIALOG_GUARD_ACTION_COMMAND || "").trim();
if (command) {
return {
command,
args: parseArgsJson(process.env.BOSS_MAC_DIALOG_GUARD_ACTION_ARGS_JSON) ??
parseArgs(process.env.BOSS_MAC_DIALOG_GUARD_ACTION_ARGS),
};
}
}
if (normalizedPlatform === "win32") {
const command = String(process.env.BOSS_WINDOWS_DIALOG_GUARD_ACTION_COMMAND || "").trim();
if (command) {
return {
command,
args: parseArgsJson(process.env.BOSS_WINDOWS_DIALOG_GUARD_ACTION_ARGS_JSON) ??
parseArgs(process.env.BOSS_WINDOWS_DIALOG_GUARD_ACTION_ARGS),
};
}
}
const command = String(process.env.BOSS_DIALOG_GUARD_ACTION_COMMAND || "").trim();
if (!command) {
return undefined;
}
return {
command,
args: parseArgsJson(process.env.BOSS_DIALOG_GUARD_ACTION_ARGS_JSON) ??
parseArgs(process.env.BOSS_DIALOG_GUARD_ACTION_ARGS),
};
}
function buildDialogGuardActionArgs(snapshot, decision) {
return [
"--platform",
snapshot.platform,
"--app",
snapshot.appName,
"--dialog-id",
decision.signature?.id || "",
"--action",
decision.action || "",
"--button",
decision.button || "",
].filter((item) => item !== "");
}
async function applyDialogGuardAutoAction(snapshot, decision) {
if (decision?.disposition !== "auto_action") {
return false;
}
const actionCommand = resolveDialogGuardActionCommand(snapshot.platform);
if (!actionCommand?.command) {
return false;
}
await runCommand(actionCommand.command, [
...(actionCommand.args || []),
...buildDialogGuardActionArgs(snapshot, decision),
]);
return true;
}
async function runDialogGuardPreflight(payload) {
if (!getDialogGuardEnabled()) {
return {};
}
const snapshot = readDialogGuardSnapshot();
if (!snapshot) {
return {};
}
const decision = evaluateDialogSnapshot(snapshot);
if (decision.disposition === "needs_user_action") {
return {
pausedResult: buildDialogInterventionResult({
requestId: typeof payload.requestId === "string" ? payload.requestId : undefined,
snapshot,
decision,
}),
decision,
snapshot,
};
}
const actionApplied = await applyDialogGuardAutoAction(snapshot, decision);
const auditEntry = buildDialogAuditEntry({
requestId: typeof payload.requestId === "string" ? payload.requestId : undefined,
snapshot,
decision,
});
auditEntry.actionApplied = actionApplied;
return {
decision,
snapshot,
actionApplied,
auditEntry,
};
}
function escapeAppleScriptString(value) {
return String(value || "")
.replaceAll("\\", "\\\\")
.replaceAll('"', '\\"');
}
function buildAppleScript(targetApp, objective) {
const app = escapeAppleScriptString(targetApp);
const script = [
`tell application "${app}"`,
"activate",
"end tell",
];
const textToType = extractQuotedText(objective);
if (textToType) {
script.push("delay 0.2");
script.push("tell application \"System Events\"");
script.push(`keystroke "${escapeAppleScriptString(textToType)}"`);
if (shouldSubmitAfterTyping(objective)) {
script.push("key code 36");
}
script.push("end tell");
}
return script.join("\n");
}
async function runCommand(command, args) {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
child.stdout.setEncoding("utf8");
child.stderr.setEncoding("utf8");
child.stdout.on("data", (chunk) => {
stdout += chunk;
});
child.stderr.on("data", (chunk) => {
stderr += chunk;
});
child.on("error", reject);
child.on("close", (code) => {
if (code !== 0) {
reject(new Error(stderr.trim() || `computer use open exit code ${code}`));
return;
}
resolve({ stdout: stdout.trim(), stderr: stderr.trim() });
});
});
}
async function runAppleScript(targetApp, objective) {
if (!targetApp) {
return undefined;
}
const script = buildAppleScript(targetApp, objective);
await runCommand("osascript", ["-e", script]);
return `osascript activated ${targetApp}`;
}
async function runOpenApp(targetApp) {
const command = String(process.env.BOSS_COMPUTER_USE_OPEN_APP_COMMAND || "").trim() || "open";
const prefixArgs = resolveOpenAppPrefixArgs(command);
await runCommand(command, [...prefixArgs, targetApp]);
return `open activated ${targetApp}`;
}
async function runOpenBrowserUrl(targetApp, targetUrl) {
const command = String(process.env.BOSS_COMPUTER_USE_OPEN_APP_COMMAND || "").trim() || "open";
const prefixArgs = resolveOpenAppPrefixArgs(command);
const commandName = path.basename(command || "").toLowerCase();
const args =
commandName === "open" || prefixArgs.includes("-a")
? [...prefixArgs, targetApp, targetUrl]
: [...prefixArgs, targetUrl];
await runCommand(command, args);
return `open url in ${targetApp}`;
}
const raw = await readStdin();
const normalized = normalizePayload(raw);
if (!normalized.ok) {
writeJson({
status: "failed",
error: normalized.error,
});
process.exit(0);
}
const payload = normalized.payload;
const objective =
typeof payload.objective === "string" && payload.objective.trim()
? payload.objective.trim()
: "桌面控制 smoke 链路正常";
const targetApp = detectTargetApp(objective);
const automationTargetApp = resolvePlatformAppName(targetApp);
const targetUrl = extractTargetUrl(objective);
const desktopAction = detectDesktopAction(objective);
const riskLevel =
typeof payload.context?.riskLevel === "string" && payload.context.riskLevel.trim()
? payload.context.riskLevel.trim()
: "unknown";
const dryRun = payload.context?.dryRun === true;
let dialogGuardState = {};
try {
dialogGuardState = await runDialogGuardPreflight(payload);
} catch (error) {
writeJson({
status: "failed",
requestId: typeof payload.requestId === "string" ? payload.requestId : undefined,
error: error?.message || "DIALOG_GUARD_FAILED",
});
process.exit(0);
}
if (dialogGuardState.pausedResult) {
writeJson(dialogGuardState.pausedResult);
process.exit(0);
}
let action = targetApp ? desktopAction : "computer_use_smoke";
const configuredMode = getDesktopAutomationMode();
const automationMode =
configuredMode === "auto" ? (process.platform === "darwin" ? "osascript" : "open") : configuredMode;
if (targetApp && !dryRun) {
if (targetUrl && isBrowserApp(automationTargetApp)) {
await runOpenBrowserUrl(automationTargetApp, targetUrl);
action = `${desktopAction}_url_executed`;
} else if (automationMode === "osascript") {
await runAppleScript(automationTargetApp, objective);
action = `${desktopAction}_executed`;
} else if (automationMode !== "off") {
await runOpenApp(automationTargetApp);
action = `${desktopAction}_executed`;
}
}
const artifacts = await writeArtifact({
requestKind: payload.requestKind,
requestId: payload.requestId,
action,
objective,
targetApp,
automationTargetApp,
targetUrl,
typedText: extractQuotedText(objective),
dryRun,
riskLevel,
mode: automationMode,
dialogGuard: dialogGuardState.auditEntry,
capturedAt: new Date().toISOString(),
});
writeJson({
status: "completed",
requestId: typeof payload.requestId === "string" ? payload.requestId : undefined,
replyBody: `桌面控制已完成:${objective}`,
executionSummary: `${action} completed (risk=${riskLevel}, mode=${automationMode}${
dialogGuardState.decision?.disposition ? `, dialogGuard=${dialogGuardState.decision.disposition}` : ""
})`,
targetApp,
automationTargetApp,
targetUrl,
typedText: extractQuotedText(objective),
dialogGuard: dialogGuardState.decision
? {
disposition: dialogGuardState.decision.disposition,
kind: dialogGuardState.decision.kind,
risk: dialogGuardState.decision.risk,
action: dialogGuardState.decision.action,
button: dialogGuardState.decision.button,
actionApplied: dialogGuardState.actionApplied,
}
: undefined,
artifacts,
});