Files
boss/tests/master-agent-task-reliability.test.ts
2026-05-31 03:25:30 +08:00

183 lines
6.8 KiB
TypeScript

import test from "node:test";
import assert from "node:assert/strict";
import os from "node:os";
import path from "node:path";
import { mkdtemp, rm } from "node:fs/promises";
let runtimeRoot = "";
let data: typeof import("../src/lib/boss-data");
async function setup() {
if (runtimeRoot) return;
runtimeRoot = await mkdtemp(path.join(os.tmpdir(), "boss-master-task-reliability-"));
process.env.BOSS_RUNTIME_ROOT = runtimeRoot;
process.env.BOSS_STATE_FILE = path.join(runtimeRoot, "boss-state.json");
data = await import("../src/lib/boss-data.ts");
}
test.after(async () => {
if (runtimeRoot) await rm(runtimeRoot, { recursive: true, force: true });
});
test.beforeEach(async () => {
await setup();
await rm(runtimeRoot, { recursive: true, force: true });
});
async function queueDesktopTask(taskId: string) {
return data.queueMasterAgentTask({
taskId,
projectId: "master-agent",
taskType: "desktop_control",
requestMessageId: `${taskId}-message`,
requestText: "打开 Chrome",
executionPrompt: "打开 Chrome",
requestedBy: "krisolo",
requestedByAccount: "krisolo",
deviceId: "mac-studio",
runtimeKind: "computer-use-runtime",
controlPlatform: "macos",
});
}
test("claiming a desktop control task records attempt count and a server lease", async () => {
await queueDesktopTask("lease-task");
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed?.taskId, "lease-task");
assert.equal(claimed?.status, "running");
assert.equal(claimed?.attemptCount, 1);
assert.ok(claimed?.leaseExpiresAt);
assert.ok(Date.parse(claimed.leaseExpiresAt) > Date.parse(claimed.claimedAt ?? ""));
});
test("an expired running desktop control task can be reclaimed with a new attempt", async () => {
await queueDesktopTask("reclaim-task");
const firstClaim = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(firstClaim?.attemptCount, 1);
const state = await data.readState();
const task = state.masterAgentTasks.find((item) => item.taskId === "reclaim-task");
assert.ok(task);
task.claimedAt = "2000-01-01T00:00:00.000Z";
task.leaseExpiresAt = "2000-01-01T00:01:00.000Z";
await data.writeState(state);
const reclaimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(reclaimed?.taskId, "reclaim-task");
assert.equal(reclaimed?.status, "running");
assert.equal(reclaimed?.attemptCount, 2);
assert.notEqual(reclaimed?.claimedAt, firstClaim?.claimedAt);
});
test("an expired running task is timed out after max attempts instead of being claimed forever", async () => {
await queueDesktopTask("timeout-task");
await data.claimNextMasterAgentTask("mac-studio");
const state = await data.readState();
const task = state.masterAgentTasks.find((item) => item.taskId === "timeout-task");
assert.ok(task);
task.status = "running";
task.attemptCount = 3;
task.maxAttempts = 3;
task.leaseExpiresAt = "2000-01-01T00:01:00.000Z";
await data.writeState(state);
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed, null);
const nextTask = (await data.readState()).masterAgentTasks.find((item) => item.taskId === "timeout-task");
assert.equal(nextTask?.status, "timed_out");
assert.match(nextTask?.errorMessage ?? "", /timed out/i);
});
test("canceling a running task prevents late success completion from overwriting the terminal state", async () => {
await queueDesktopTask("cancel-task");
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed?.status, "running");
const canceled = await data.cancelMasterAgentTask({
taskId: "cancel-task",
actorAccount: "krisolo",
reason: "用户取消演示任务",
});
assert.equal(canceled.status, "canceled");
assert.ok(canceled.canceledAt);
const late = await data.completeMasterAgentTask({
taskId: "cancel-task",
deviceId: "mac-studio",
status: "completed",
replyBody: "迟到的成功结果",
});
assert.equal(late.status, "canceled");
assert.equal(late.replyBody, undefined);
const finalTask = (await data.readState()).masterAgentTasks.find((item) => item.taskId === "cancel-task");
assert.equal(finalTask?.status, "canceled");
assert.equal(finalTask?.replyBody, undefined);
});
test("streaming task progress updates mutate the progress card without completing the task", async () => {
await queueDesktopTask("live-progress-task");
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed?.status, "running");
const updated = await data.updateMasterAgentTaskProgress({
taskId: "live-progress-task",
deviceId: "mac-studio",
status: "running",
executionProgress: {
steps: [
{ text: "读取 app-server 事件流", status: "done" },
{ text: "等待目标线程回复", status: "running" },
],
artifacts: [{ label: "codex_app_server_protocol.schemas.json", kind: "file" }],
},
});
assert.equal(updated.status, "running");
assert.equal(updated.completedAt, undefined);
const state = await data.readState();
const progressMessage = state.projects
.find((project) => project.id === "master-agent")
?.messages.find((message) => message.executionProgress?.taskId === "live-progress-task");
assert.equal(progressMessage?.executionProgress?.status, "running");
assert.equal(progressMessage?.executionProgress?.steps[0]?.text, "读取 app-server 事件流");
assert.equal(progressMessage?.executionProgress?.steps[1]?.status, "running");
assert.equal(progressMessage?.executionProgress?.artifacts?.[0]?.label, "codex_app_server_protocol.schemas.json");
});
test("queued thread collaboration tasks retain source and target thread references", async () => {
const task = await data.queueMasterAgentTask({
taskId: "thread-collaboration-task",
projectId: "master-agent",
taskType: "conversation_reply",
requestMessageId: "msg-thread-collaboration",
requestText: "让源线程和目标线程对一下方案",
executionPrompt: "让源线程和目标线程对一下方案",
requestedBy: "krisolo",
requestedByAccount: "krisolo",
deviceId: "mac-studio",
intentCategory: "thread_collaboration",
sourceThreadId: "source-thread-id",
sourceThreadDisplayName: "源线程",
sourceCodexThreadRef: "019d-source-codex",
targetThreadId: "target-thread-id",
targetThreadDisplayName: "目标线程",
targetCodexThreadRef: "019d-target-codex",
});
assert.equal(task.intentCategory, "thread_collaboration");
assert.equal(task.sourceThreadId, "source-thread-id");
assert.equal(task.sourceThreadDisplayName, "源线程");
assert.equal(task.sourceCodexThreadRef, "019d-source-codex");
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed?.sourceCodexThreadRef, "019d-source-codex");
assert.equal(claimed?.targetCodexThreadRef, "019d-target-codex");
});