feat: add master agent task recovery endpoint
This commit is contained in:
@@ -273,6 +273,27 @@ test.beforeEach(async () => {
|
||||
createdAt: now,
|
||||
},
|
||||
];
|
||||
state.masterAgentTasks = [
|
||||
{
|
||||
taskId: "task-stale",
|
||||
projectId: "project-acme",
|
||||
taskType: "conversation_reply",
|
||||
requestMessageId: "message-stale-request",
|
||||
requestText: "请继续处理 Acme 生产项目的等待回复。",
|
||||
executionPrompt: "继续 Acme 生产项目的 conversation_reply,并回写安全摘要。",
|
||||
requestedBy: "开发同事",
|
||||
requestedByAccount: "dev@acme.com",
|
||||
deviceId: "win-1",
|
||||
status: "running",
|
||||
phase: "awaiting_reply",
|
||||
requestedAt: "2026-04-30T08:00:00+08:00",
|
||||
claimedAt: "2026-04-30T08:01:00+08:00",
|
||||
lastProgressAt: "2026-04-30T08:01:00+08:00",
|
||||
leaseExpiresAt: "2026-04-30T08:02:00+08:00",
|
||||
attemptCount: 1,
|
||||
maxAttempts: 2,
|
||||
},
|
||||
];
|
||||
await data.writeState(state);
|
||||
});
|
||||
|
||||
@@ -370,6 +391,13 @@ test("backoffice bff exposes yudao style management contract without secrets", a
|
||||
["Boss API", "OTA", "Codex Provider", "Computer Use", "Skill Hub"],
|
||||
);
|
||||
assert.equal(payload.insights.riskAggregates.some((item: { label: string }) => item.label === "设备离线"), true);
|
||||
assert.equal(payload.insights.dataSafetySummary.restorePointCount >= 0, true);
|
||||
assert.match(payload.insights.dataSafetySummary.rpoLabel, /文件 MVP|企业标准/);
|
||||
assert.equal(Array.isArray(payload.insights.taskRiskSummary.rows), true);
|
||||
assert.equal(typeof payload.insights.taskRiskSummary.counts.stale, "number");
|
||||
const staleTask = payload.insights.taskRiskSummary.rows.find((row: { taskId: string }) => row.taskId === "task-stale");
|
||||
assert.equal(staleTask?.stale, true);
|
||||
assert.equal(staleTask?.phase, "awaiting_reply");
|
||||
assert.equal(payload.yudaoMapping.tenant, "adminCompanies");
|
||||
assert.equal(payload.yudaoMapping.user, "authAccounts");
|
||||
assert.equal(payload.yudaoMapping.role, "BOSS_PERMISSION_TEMPLATES");
|
||||
|
||||
122
tests/master-agent-task-recovery-route.test.ts
Normal file
122
tests/master-agent-task-recovery-route.test.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { mkdtemp, rm } from "node:fs/promises";
|
||||
import { NextRequest } from "next/server";
|
||||
import type { MasterAgentTask } from "../src/lib/boss-data";
|
||||
|
||||
let runtimeRoot = "";
|
||||
let data: typeof import("../src/lib/boss-data.ts");
|
||||
let authCookie = "";
|
||||
let getRecovery: (typeof import("../src/app/api/v1/master-agent/tasks/[taskId]/recovery/route.ts"))["GET"];
|
||||
let postRecovery: (typeof import("../src/app/api/v1/master-agent/tasks/[taskId]/recovery/route.ts"))["POST"];
|
||||
let baseState: Awaited<ReturnType<typeof import("../src/lib/boss-data.ts")["readState"]>>;
|
||||
|
||||
async function setup() {
|
||||
if (runtimeRoot) return;
|
||||
runtimeRoot = await mkdtemp(path.join(os.tmpdir(), "boss-task-recovery-"));
|
||||
process.env.BOSS_RUNTIME_ROOT = runtimeRoot;
|
||||
process.env.BOSS_STATE_FILE = path.join(runtimeRoot, "boss-state.json");
|
||||
const [dataModule, authModule, routeModule] = await Promise.all([
|
||||
import("../src/lib/boss-data.ts"),
|
||||
import("../src/lib/boss-auth.ts"),
|
||||
import("../src/app/api/v1/master-agent/tasks/[taskId]/recovery/route.ts"),
|
||||
]);
|
||||
data = dataModule;
|
||||
authCookie = authModule.AUTH_SESSION_COOKIE;
|
||||
getRecovery = routeModule.GET;
|
||||
postRecovery = routeModule.POST;
|
||||
baseState = structuredClone(await data.readState());
|
||||
}
|
||||
|
||||
test.after(async () => {
|
||||
if (runtimeRoot) await rm(runtimeRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function task(overrides: Partial<MasterAgentTask>): MasterAgentTask {
|
||||
return {
|
||||
taskId: "task-recoverable",
|
||||
projectId: "project-1",
|
||||
taskType: "conversation_reply",
|
||||
requestMessageId: "msg-1",
|
||||
requestText: "继续执行",
|
||||
executionPrompt: "继续执行",
|
||||
requestedBy: "Boss",
|
||||
requestedByAccount: "owner@boss.com",
|
||||
deviceId: "mac-1",
|
||||
status: "running",
|
||||
phase: "executor_starting",
|
||||
requestedAt: "2026-06-06T08:00:00.000Z",
|
||||
lastProgressAt: "2026-06-06T08:01:00.000Z",
|
||||
attemptCount: 1,
|
||||
maxAttempts: 2,
|
||||
recoverable: true,
|
||||
lastErrorCode: "EXECUTOR_START_FAILED",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
async function authedRequest(method = "GET", body?: unknown) {
|
||||
const session = await data.createAuthSession({
|
||||
account: "owner@boss.com",
|
||||
role: "highest_admin",
|
||||
displayName: "Owner",
|
||||
loginMethod: "password",
|
||||
});
|
||||
return new NextRequest("http://127.0.0.1:3000/api/v1/master-agent/tasks/task-recoverable/recovery", {
|
||||
method,
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
cookie: `${authCookie}=${session.sessionToken}`,
|
||||
},
|
||||
body: body ? JSON.stringify(body) : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
test.beforeEach(async () => {
|
||||
await setup();
|
||||
const state = structuredClone(baseState);
|
||||
state.authAccounts = [
|
||||
{
|
||||
id: "account-owner",
|
||||
account: "owner@boss.com",
|
||||
passwordHash: "secret",
|
||||
displayName: "Owner",
|
||||
role: "highest_admin",
|
||||
createdAt: "2026-06-06T08:00:00.000Z",
|
||||
updatedAt: "2026-06-06T08:00:00.000Z",
|
||||
},
|
||||
];
|
||||
state.masterAgentTasks = [task({})];
|
||||
await data.writeState(state);
|
||||
});
|
||||
|
||||
test("task recovery GET returns safe diagnosis", async () => {
|
||||
const response = await getRecovery(
|
||||
await authedRequest(),
|
||||
{ params: Promise.resolve({ taskId: "task-recoverable" }) },
|
||||
);
|
||||
assert.equal(response.status, 200);
|
||||
const payload = await response.json();
|
||||
assert.equal(payload.ok, true);
|
||||
assert.equal(payload.recovery.taskId, "task-recoverable");
|
||||
assert.equal(payload.recovery.canRetry, true);
|
||||
assert.equal(payload.recovery.safeNextAction, "retry");
|
||||
assert.equal(payload.recovery.diagnosis.includes("executor_starting"), true);
|
||||
});
|
||||
|
||||
test("task recovery POST retry requeues only recoverable pre-turn task", async () => {
|
||||
const response = await postRecovery(
|
||||
await authedRequest("POST", { action: "retry", reason: "executor recovered" }),
|
||||
{ params: Promise.resolve({ taskId: "task-recoverable" }) },
|
||||
);
|
||||
assert.equal(response.status, 200);
|
||||
const payload = await response.json();
|
||||
assert.equal(payload.ok, true);
|
||||
assert.equal(payload.task.status, "queued");
|
||||
assert.equal(payload.task.phase, "queued");
|
||||
|
||||
const state = await data.readState();
|
||||
assert.equal(state.permissionAuditLogs.some((log) => log.action === "master_agent.task_retried"), true);
|
||||
});
|
||||
@@ -3,6 +3,7 @@ import assert from "node:assert/strict";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { mkdtemp, rm } from "node:fs/promises";
|
||||
import type { Device, MasterAgentTask } from "../src/lib/boss-data";
|
||||
|
||||
let runtimeRoot = "";
|
||||
let data: typeof import("../src/lib/boss-data");
|
||||
@@ -19,164 +20,181 @@ test.after(async () => {
|
||||
if (runtimeRoot) await rm(runtimeRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test.beforeEach(async () => {
|
||||
await setup();
|
||||
await rm(runtimeRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function queueDesktopTask(taskId: string) {
|
||||
return data.queueMasterAgentTask({
|
||||
function makeQueuedTask(taskId: string, overrides: Partial<MasterAgentTask> = {}): MasterAgentTask {
|
||||
return {
|
||||
taskId,
|
||||
projectId: "master-agent",
|
||||
taskType: "desktop_control",
|
||||
requestMessageId: `${taskId}-message`,
|
||||
requestText: "打开 Chrome",
|
||||
executionPrompt: "打开 Chrome",
|
||||
requestedBy: "krisolo",
|
||||
taskType: "conversation_reply",
|
||||
requestMessageId: `${taskId}-request`,
|
||||
requestText: "回复一句收到",
|
||||
executionPrompt: "回复一句收到",
|
||||
requestedBy: "Boss 测试",
|
||||
requestedByAccount: "krisolo",
|
||||
deviceId: "mac-studio",
|
||||
runtimeKind: "computer-use-runtime",
|
||||
controlPlatform: "macos",
|
||||
});
|
||||
status: "queued",
|
||||
requestedAt: new Date().toISOString(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
test("claiming a desktop control task records attempt count and a server lease", async () => {
|
||||
await queueDesktopTask("lease-task");
|
||||
|
||||
const claimed = await data.claimNextMasterAgentTask("mac-studio");
|
||||
|
||||
assert.equal(claimed?.taskId, "lease-task");
|
||||
assert.equal(claimed?.status, "running");
|
||||
assert.equal(claimed?.attemptCount, 1);
|
||||
assert.ok(claimed?.leaseExpiresAt);
|
||||
assert.ok(Date.parse(claimed.leaseExpiresAt) > Date.parse(claimed.claimedAt ?? ""));
|
||||
});
|
||||
|
||||
test("an expired running desktop control task can be reclaimed with a new attempt", async () => {
|
||||
await queueDesktopTask("reclaim-task");
|
||||
const firstClaim = await data.claimNextMasterAgentTask("mac-studio");
|
||||
assert.equal(firstClaim?.attemptCount, 1);
|
||||
|
||||
test("master agent task claim, progress, and complete maintain reliability phase", async () => {
|
||||
await setup();
|
||||
const state = await data.readState();
|
||||
const task = state.masterAgentTasks.find((item) => item.taskId === "reclaim-task");
|
||||
assert.ok(task);
|
||||
task.claimedAt = "2000-01-01T00:00:00.000Z";
|
||||
task.leaseExpiresAt = "2000-01-01T00:01:00.000Z";
|
||||
await data.writeState(state);
|
||||
|
||||
const reclaimed = await data.claimNextMasterAgentTask("mac-studio");
|
||||
|
||||
assert.equal(reclaimed?.taskId, "reclaim-task");
|
||||
assert.equal(reclaimed?.status, "running");
|
||||
assert.equal(reclaimed?.attemptCount, 2);
|
||||
assert.notEqual(reclaimed?.claimedAt, firstClaim?.claimedAt);
|
||||
});
|
||||
|
||||
test("an expired running task is timed out after max attempts instead of being claimed forever", async () => {
|
||||
await queueDesktopTask("timeout-task");
|
||||
await data.claimNextMasterAgentTask("mac-studio");
|
||||
|
||||
const state = await data.readState();
|
||||
const task = state.masterAgentTasks.find((item) => item.taskId === "timeout-task");
|
||||
assert.ok(task);
|
||||
task.status = "running";
|
||||
task.attemptCount = 3;
|
||||
task.maxAttempts = 3;
|
||||
task.leaseExpiresAt = "2000-01-01T00:01:00.000Z";
|
||||
state.masterAgentTasks.unshift(makeQueuedTask("task-phase-normal"));
|
||||
await data.writeState(state);
|
||||
|
||||
const claimed = await data.claimNextMasterAgentTask("mac-studio");
|
||||
|
||||
assert.equal(claimed, null);
|
||||
const nextTask = (await data.readState()).masterAgentTasks.find((item) => item.taskId === "timeout-task");
|
||||
assert.equal(nextTask?.status, "timed_out");
|
||||
assert.match(nextTask?.errorMessage ?? "", /timed out/i);
|
||||
});
|
||||
|
||||
test("canceling a running task prevents late success completion from overwriting the terminal state", async () => {
|
||||
await queueDesktopTask("cancel-task");
|
||||
const claimed = await data.claimNextMasterAgentTask("mac-studio");
|
||||
assert.equal(claimed?.taskId, "task-phase-normal");
|
||||
assert.equal(claimed?.status, "running");
|
||||
assert.equal(claimed?.phase, "claimed");
|
||||
assert.ok(claimed?.lastProgressAt);
|
||||
|
||||
const canceled = await data.cancelMasterAgentTask({
|
||||
taskId: "cancel-task",
|
||||
actorAccount: "krisolo",
|
||||
reason: "用户取消演示任务",
|
||||
});
|
||||
assert.equal(canceled.status, "canceled");
|
||||
assert.ok(canceled.canceledAt);
|
||||
|
||||
const late = await data.completeMasterAgentTask({
|
||||
taskId: "cancel-task",
|
||||
deviceId: "mac-studio",
|
||||
status: "completed",
|
||||
replyBody: "迟到的成功结果",
|
||||
});
|
||||
|
||||
assert.equal(late.status, "canceled");
|
||||
assert.equal(late.replyBody, undefined);
|
||||
const finalTask = (await data.readState()).masterAgentTasks.find((item) => item.taskId === "cancel-task");
|
||||
assert.equal(finalTask?.status, "canceled");
|
||||
assert.equal(finalTask?.replyBody, undefined);
|
||||
});
|
||||
|
||||
test("streaming task progress updates mutate the progress card without completing the task", async () => {
|
||||
await queueDesktopTask("live-progress-task");
|
||||
const claimed = await data.claimNextMasterAgentTask("mac-studio");
|
||||
assert.equal(claimed?.status, "running");
|
||||
|
||||
const updated = await data.updateMasterAgentTaskProgress({
|
||||
taskId: "live-progress-task",
|
||||
const progressed = await data.updateMasterAgentTaskProgress({
|
||||
taskId: "task-phase-normal",
|
||||
deviceId: "mac-studio",
|
||||
status: "running",
|
||||
executionProgress: {
|
||||
steps: [
|
||||
{ text: "读取 app-server 事件流", status: "done" },
|
||||
{ text: "等待目标线程回复", status: "running" },
|
||||
],
|
||||
artifacts: [{ label: "codex_app_server_protocol.schemas.json", kind: "file" }],
|
||||
},
|
||||
phase: "awaiting_reply",
|
||||
});
|
||||
assert.equal(progressed.status, "running");
|
||||
assert.equal(progressed.phase, "awaiting_reply");
|
||||
assert.ok(progressed.leaseExpiresAt);
|
||||
|
||||
assert.equal(updated.status, "running");
|
||||
assert.equal(updated.completedAt, undefined);
|
||||
|
||||
const state = await data.readState();
|
||||
const progressMessage = state.projects
|
||||
.find((project) => project.id === "master-agent")
|
||||
?.messages.find((message) => message.executionProgress?.taskId === "live-progress-task");
|
||||
assert.equal(progressMessage?.executionProgress?.status, "running");
|
||||
assert.equal(progressMessage?.executionProgress?.steps[0]?.text, "读取 app-server 事件流");
|
||||
assert.equal(progressMessage?.executionProgress?.steps[1]?.status, "running");
|
||||
assert.equal(progressMessage?.executionProgress?.artifacts?.[0]?.label, "codex_app_server_protocol.schemas.json");
|
||||
const completed = await data.completeMasterAgentTask({
|
||||
taskId: "task-phase-normal",
|
||||
deviceId: "mac-studio",
|
||||
status: "completed",
|
||||
replyBody: "收到。",
|
||||
});
|
||||
assert.equal(completed.status, "completed");
|
||||
assert.equal(completed.phase, "completed");
|
||||
assert.equal(completed.recoverable, false);
|
||||
});
|
||||
|
||||
test("queued thread collaboration tasks retain source and target thread references", async () => {
|
||||
const task = await data.queueMasterAgentTask({
|
||||
taskId: "thread-collaboration-task",
|
||||
projectId: "master-agent",
|
||||
taskType: "conversation_reply",
|
||||
requestMessageId: "msg-thread-collaboration",
|
||||
requestText: "让源线程和目标线程对一下方案",
|
||||
executionPrompt: "让源线程和目标线程对一下方案",
|
||||
requestedBy: "krisolo",
|
||||
requestedByAccount: "krisolo",
|
||||
deviceId: "mac-studio",
|
||||
intentCategory: "thread_collaboration",
|
||||
sourceThreadId: "source-thread-id",
|
||||
sourceThreadDisplayName: "源线程",
|
||||
sourceCodexThreadRef: "019d-source-codex",
|
||||
targetThreadId: "target-thread-id",
|
||||
targetThreadDisplayName: "目标线程",
|
||||
targetCodexThreadRef: "019d-target-codex",
|
||||
});
|
||||
|
||||
assert.equal(task.intentCategory, "thread_collaboration");
|
||||
assert.equal(task.sourceThreadId, "source-thread-id");
|
||||
assert.equal(task.sourceThreadDisplayName, "源线程");
|
||||
assert.equal(task.sourceCodexThreadRef, "019d-source-codex");
|
||||
test("expired pre-turn task is safely requeued and claimed again", async () => {
|
||||
await setup();
|
||||
const state = await data.readState();
|
||||
state.masterAgentTasks.unshift(
|
||||
makeQueuedTask("task-phase-retry", {
|
||||
status: "running",
|
||||
phase: "executor_starting",
|
||||
claimedAt: "2020-01-01T08:00:00.000Z",
|
||||
lastProgressAt: "2020-01-01T08:00:00.000Z",
|
||||
leaseExpiresAt: "2020-01-01T08:01:00.000Z",
|
||||
attemptCount: 1,
|
||||
maxAttempts: 2,
|
||||
}),
|
||||
);
|
||||
await data.writeState(state);
|
||||
|
||||
const claimed = await data.claimNextMasterAgentTask("mac-studio");
|
||||
assert.equal(claimed?.sourceCodexThreadRef, "019d-source-codex");
|
||||
assert.equal(claimed?.targetCodexThreadRef, "019d-target-codex");
|
||||
assert.equal(claimed?.taskId, "task-phase-retry");
|
||||
assert.equal(claimed?.status, "running");
|
||||
assert.equal(claimed?.phase, "claimed");
|
||||
assert.equal(claimed?.attemptCount, 2);
|
||||
assert.equal(claimed?.recoverable, false);
|
||||
});
|
||||
|
||||
test("expired task after turn start is timed out instead of duplicated", async () => {
|
||||
await setup();
|
||||
const state = await data.readState();
|
||||
state.masterAgentTasks.unshift(
|
||||
makeQueuedTask("task-phase-no-duplicate", {
|
||||
status: "running",
|
||||
phase: "turn_started",
|
||||
claimedAt: "2020-01-01T08:00:00.000Z",
|
||||
lastProgressAt: "2020-01-01T08:00:00.000Z",
|
||||
leaseExpiresAt: "2020-01-01T08:01:00.000Z",
|
||||
attemptCount: 1,
|
||||
maxAttempts: 2,
|
||||
}),
|
||||
);
|
||||
await data.writeState(state);
|
||||
|
||||
const claimed = await data.claimNextMasterAgentTask("mac-studio");
|
||||
assert.notEqual(claimed?.taskId, "task-phase-no-duplicate");
|
||||
|
||||
const nextState = await data.readState();
|
||||
const task = nextState.masterAgentTasks.find((item) => item.taskId === "task-phase-no-duplicate");
|
||||
assert.equal(task?.status, "timed_out");
|
||||
assert.equal(task?.phase, "timed_out");
|
||||
assert.equal(task?.recoverable, false);
|
||||
});
|
||||
|
||||
test("codex app server health distinguishes available, degraded, and unavailable", async () => {
|
||||
await setup();
|
||||
assert.equal(data.resolveCodexAppServerHealth(undefined), "unavailable");
|
||||
assert.equal(
|
||||
data.resolveCodexAppServerHealth({
|
||||
id: "device-offline",
|
||||
name: "离线设备",
|
||||
avatar: "D",
|
||||
account: "krisolo",
|
||||
source: "production",
|
||||
status: "offline",
|
||||
projects: [],
|
||||
quota5h: 0,
|
||||
quota7d: 0,
|
||||
lastSeenAt: "2026-06-06T08:00:00.000Z",
|
||||
endpoint: "",
|
||||
token: "",
|
||||
note: "",
|
||||
capabilities: {
|
||||
codexAppServer: {
|
||||
connected: false,
|
||||
lastSeenAt: "2026-06-06T08:00:00.000Z",
|
||||
},
|
||||
},
|
||||
} satisfies Device),
|
||||
"unavailable",
|
||||
);
|
||||
assert.equal(
|
||||
data.resolveCodexAppServerHealth({
|
||||
id: "device-degraded",
|
||||
name: "降级设备",
|
||||
avatar: "D",
|
||||
account: "krisolo",
|
||||
source: "production",
|
||||
status: "online",
|
||||
projects: [],
|
||||
quota5h: 0,
|
||||
quota7d: 0,
|
||||
lastSeenAt: new Date().toISOString(),
|
||||
endpoint: "",
|
||||
token: "",
|
||||
note: "",
|
||||
capabilities: {
|
||||
codexAppServer: {
|
||||
connected: true,
|
||||
lastSeenAt: new Date().toISOString(),
|
||||
metadata: { errors: ["thread/turns/list:STDIN_CLOSED"] },
|
||||
},
|
||||
},
|
||||
} satisfies Device),
|
||||
"degraded",
|
||||
);
|
||||
assert.equal(
|
||||
data.resolveCodexAppServerHealth({
|
||||
id: "device-available",
|
||||
name: "可用设备",
|
||||
avatar: "D",
|
||||
account: "krisolo",
|
||||
source: "production",
|
||||
status: "online",
|
||||
projects: [],
|
||||
quota5h: 0,
|
||||
quota7d: 0,
|
||||
lastSeenAt: new Date().toISOString(),
|
||||
endpoint: "",
|
||||
token: "",
|
||||
note: "",
|
||||
capabilities: {
|
||||
codexAppServer: {
|
||||
connected: true,
|
||||
lastSeenAt: new Date().toISOString(),
|
||||
metadata: {},
|
||||
},
|
||||
},
|
||||
} satisfies Device),
|
||||
"available",
|
||||
);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user