feat: add master agent task recovery endpoint

This commit is contained in:
AI Bot
2026-06-06 19:05:42 +08:00
parent 755e30612c
commit 643da5b738
6 changed files with 945 additions and 168 deletions

View File

@@ -273,6 +273,27 @@ test.beforeEach(async () => {
createdAt: now,
},
];
state.masterAgentTasks = [
{
taskId: "task-stale",
projectId: "project-acme",
taskType: "conversation_reply",
requestMessageId: "message-stale-request",
requestText: "请继续处理 Acme 生产项目的等待回复。",
executionPrompt: "继续 Acme 生产项目的 conversation_reply并回写安全摘要。",
requestedBy: "开发同事",
requestedByAccount: "dev@acme.com",
deviceId: "win-1",
status: "running",
phase: "awaiting_reply",
requestedAt: "2026-04-30T08:00:00+08:00",
claimedAt: "2026-04-30T08:01:00+08:00",
lastProgressAt: "2026-04-30T08:01:00+08:00",
leaseExpiresAt: "2026-04-30T08:02:00+08:00",
attemptCount: 1,
maxAttempts: 2,
},
];
await data.writeState(state);
});
@@ -370,6 +391,13 @@ test("backoffice bff exposes yudao style management contract without secrets", a
["Boss API", "OTA", "Codex Provider", "Computer Use", "Skill Hub"],
);
assert.equal(payload.insights.riskAggregates.some((item: { label: string }) => item.label === "设备离线"), true);
assert.equal(payload.insights.dataSafetySummary.restorePointCount >= 0, true);
assert.match(payload.insights.dataSafetySummary.rpoLabel, /文件 MVP|企业标准/);
assert.equal(Array.isArray(payload.insights.taskRiskSummary.rows), true);
assert.equal(typeof payload.insights.taskRiskSummary.counts.stale, "number");
const staleTask = payload.insights.taskRiskSummary.rows.find((row: { taskId: string }) => row.taskId === "task-stale");
assert.equal(staleTask?.stale, true);
assert.equal(staleTask?.phase, "awaiting_reply");
assert.equal(payload.yudaoMapping.tenant, "adminCompanies");
assert.equal(payload.yudaoMapping.user, "authAccounts");
assert.equal(payload.yudaoMapping.role, "BOSS_PERMISSION_TEMPLATES");

View File

@@ -0,0 +1,122 @@
import test from "node:test";
import assert from "node:assert/strict";
import os from "node:os";
import path from "node:path";
import { mkdtemp, rm } from "node:fs/promises";
import { NextRequest } from "next/server";
import type { MasterAgentTask } from "../src/lib/boss-data";
let runtimeRoot = "";
let data: typeof import("../src/lib/boss-data.ts");
let authCookie = "";
let getRecovery: (typeof import("../src/app/api/v1/master-agent/tasks/[taskId]/recovery/route.ts"))["GET"];
let postRecovery: (typeof import("../src/app/api/v1/master-agent/tasks/[taskId]/recovery/route.ts"))["POST"];
let baseState: Awaited<ReturnType<typeof import("../src/lib/boss-data.ts")["readState"]>>;
async function setup() {
if (runtimeRoot) return;
runtimeRoot = await mkdtemp(path.join(os.tmpdir(), "boss-task-recovery-"));
process.env.BOSS_RUNTIME_ROOT = runtimeRoot;
process.env.BOSS_STATE_FILE = path.join(runtimeRoot, "boss-state.json");
const [dataModule, authModule, routeModule] = await Promise.all([
import("../src/lib/boss-data.ts"),
import("../src/lib/boss-auth.ts"),
import("../src/app/api/v1/master-agent/tasks/[taskId]/recovery/route.ts"),
]);
data = dataModule;
authCookie = authModule.AUTH_SESSION_COOKIE;
getRecovery = routeModule.GET;
postRecovery = routeModule.POST;
baseState = structuredClone(await data.readState());
}
test.after(async () => {
if (runtimeRoot) await rm(runtimeRoot, { recursive: true, force: true });
});
function task(overrides: Partial<MasterAgentTask>): MasterAgentTask {
return {
taskId: "task-recoverable",
projectId: "project-1",
taskType: "conversation_reply",
requestMessageId: "msg-1",
requestText: "继续执行",
executionPrompt: "继续执行",
requestedBy: "Boss",
requestedByAccount: "owner@boss.com",
deviceId: "mac-1",
status: "running",
phase: "executor_starting",
requestedAt: "2026-06-06T08:00:00.000Z",
lastProgressAt: "2026-06-06T08:01:00.000Z",
attemptCount: 1,
maxAttempts: 2,
recoverable: true,
lastErrorCode: "EXECUTOR_START_FAILED",
...overrides,
};
}
async function authedRequest(method = "GET", body?: unknown) {
const session = await data.createAuthSession({
account: "owner@boss.com",
role: "highest_admin",
displayName: "Owner",
loginMethod: "password",
});
return new NextRequest("http://127.0.0.1:3000/api/v1/master-agent/tasks/task-recoverable/recovery", {
method,
headers: {
"content-type": "application/json",
cookie: `${authCookie}=${session.sessionToken}`,
},
body: body ? JSON.stringify(body) : undefined,
});
}
test.beforeEach(async () => {
await setup();
const state = structuredClone(baseState);
state.authAccounts = [
{
id: "account-owner",
account: "owner@boss.com",
passwordHash: "secret",
displayName: "Owner",
role: "highest_admin",
createdAt: "2026-06-06T08:00:00.000Z",
updatedAt: "2026-06-06T08:00:00.000Z",
},
];
state.masterAgentTasks = [task({})];
await data.writeState(state);
});
test("task recovery GET returns safe diagnosis", async () => {
const response = await getRecovery(
await authedRequest(),
{ params: Promise.resolve({ taskId: "task-recoverable" }) },
);
assert.equal(response.status, 200);
const payload = await response.json();
assert.equal(payload.ok, true);
assert.equal(payload.recovery.taskId, "task-recoverable");
assert.equal(payload.recovery.canRetry, true);
assert.equal(payload.recovery.safeNextAction, "retry");
assert.equal(payload.recovery.diagnosis.includes("executor_starting"), true);
});
test("task recovery POST retry requeues only recoverable pre-turn task", async () => {
const response = await postRecovery(
await authedRequest("POST", { action: "retry", reason: "executor recovered" }),
{ params: Promise.resolve({ taskId: "task-recoverable" }) },
);
assert.equal(response.status, 200);
const payload = await response.json();
assert.equal(payload.ok, true);
assert.equal(payload.task.status, "queued");
assert.equal(payload.task.phase, "queued");
const state = await data.readState();
assert.equal(state.permissionAuditLogs.some((log) => log.action === "master_agent.task_retried"), true);
});

View File

@@ -3,6 +3,7 @@ import assert from "node:assert/strict";
import os from "node:os";
import path from "node:path";
import { mkdtemp, rm } from "node:fs/promises";
import type { Device, MasterAgentTask } from "../src/lib/boss-data";
let runtimeRoot = "";
let data: typeof import("../src/lib/boss-data");
@@ -19,164 +20,181 @@ test.after(async () => {
if (runtimeRoot) await rm(runtimeRoot, { recursive: true, force: true });
});
test.beforeEach(async () => {
await setup();
await rm(runtimeRoot, { recursive: true, force: true });
});
async function queueDesktopTask(taskId: string) {
return data.queueMasterAgentTask({
function makeQueuedTask(taskId: string, overrides: Partial<MasterAgentTask> = {}): MasterAgentTask {
return {
taskId,
projectId: "master-agent",
taskType: "desktop_control",
requestMessageId: `${taskId}-message`,
requestText: "打开 Chrome",
executionPrompt: "打开 Chrome",
requestedBy: "krisolo",
taskType: "conversation_reply",
requestMessageId: `${taskId}-request`,
requestText: "回复一句收到",
executionPrompt: "回复一句收到",
requestedBy: "Boss 测试",
requestedByAccount: "krisolo",
deviceId: "mac-studio",
runtimeKind: "computer-use-runtime",
controlPlatform: "macos",
});
status: "queued",
requestedAt: new Date().toISOString(),
...overrides,
};
}
test("claiming a desktop control task records attempt count and a server lease", async () => {
await queueDesktopTask("lease-task");
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed?.taskId, "lease-task");
assert.equal(claimed?.status, "running");
assert.equal(claimed?.attemptCount, 1);
assert.ok(claimed?.leaseExpiresAt);
assert.ok(Date.parse(claimed.leaseExpiresAt) > Date.parse(claimed.claimedAt ?? ""));
});
test("an expired running desktop control task can be reclaimed with a new attempt", async () => {
await queueDesktopTask("reclaim-task");
const firstClaim = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(firstClaim?.attemptCount, 1);
test("master agent task claim, progress, and complete maintain reliability phase", async () => {
await setup();
const state = await data.readState();
const task = state.masterAgentTasks.find((item) => item.taskId === "reclaim-task");
assert.ok(task);
task.claimedAt = "2000-01-01T00:00:00.000Z";
task.leaseExpiresAt = "2000-01-01T00:01:00.000Z";
await data.writeState(state);
const reclaimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(reclaimed?.taskId, "reclaim-task");
assert.equal(reclaimed?.status, "running");
assert.equal(reclaimed?.attemptCount, 2);
assert.notEqual(reclaimed?.claimedAt, firstClaim?.claimedAt);
});
test("an expired running task is timed out after max attempts instead of being claimed forever", async () => {
await queueDesktopTask("timeout-task");
await data.claimNextMasterAgentTask("mac-studio");
const state = await data.readState();
const task = state.masterAgentTasks.find((item) => item.taskId === "timeout-task");
assert.ok(task);
task.status = "running";
task.attemptCount = 3;
task.maxAttempts = 3;
task.leaseExpiresAt = "2000-01-01T00:01:00.000Z";
state.masterAgentTasks.unshift(makeQueuedTask("task-phase-normal"));
await data.writeState(state);
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed, null);
const nextTask = (await data.readState()).masterAgentTasks.find((item) => item.taskId === "timeout-task");
assert.equal(nextTask?.status, "timed_out");
assert.match(nextTask?.errorMessage ?? "", /timed out/i);
});
test("canceling a running task prevents late success completion from overwriting the terminal state", async () => {
await queueDesktopTask("cancel-task");
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed?.taskId, "task-phase-normal");
assert.equal(claimed?.status, "running");
assert.equal(claimed?.phase, "claimed");
assert.ok(claimed?.lastProgressAt);
const canceled = await data.cancelMasterAgentTask({
taskId: "cancel-task",
actorAccount: "krisolo",
reason: "用户取消演示任务",
});
assert.equal(canceled.status, "canceled");
assert.ok(canceled.canceledAt);
const late = await data.completeMasterAgentTask({
taskId: "cancel-task",
deviceId: "mac-studio",
status: "completed",
replyBody: "迟到的成功结果",
});
assert.equal(late.status, "canceled");
assert.equal(late.replyBody, undefined);
const finalTask = (await data.readState()).masterAgentTasks.find((item) => item.taskId === "cancel-task");
assert.equal(finalTask?.status, "canceled");
assert.equal(finalTask?.replyBody, undefined);
});
test("streaming task progress updates mutate the progress card without completing the task", async () => {
await queueDesktopTask("live-progress-task");
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed?.status, "running");
const updated = await data.updateMasterAgentTaskProgress({
taskId: "live-progress-task",
const progressed = await data.updateMasterAgentTaskProgress({
taskId: "task-phase-normal",
deviceId: "mac-studio",
status: "running",
executionProgress: {
steps: [
{ text: "读取 app-server 事件流", status: "done" },
{ text: "等待目标线程回复", status: "running" },
],
artifacts: [{ label: "codex_app_server_protocol.schemas.json", kind: "file" }],
},
phase: "awaiting_reply",
});
assert.equal(progressed.status, "running");
assert.equal(progressed.phase, "awaiting_reply");
assert.ok(progressed.leaseExpiresAt);
assert.equal(updated.status, "running");
assert.equal(updated.completedAt, undefined);
const state = await data.readState();
const progressMessage = state.projects
.find((project) => project.id === "master-agent")
?.messages.find((message) => message.executionProgress?.taskId === "live-progress-task");
assert.equal(progressMessage?.executionProgress?.status, "running");
assert.equal(progressMessage?.executionProgress?.steps[0]?.text, "读取 app-server 事件流");
assert.equal(progressMessage?.executionProgress?.steps[1]?.status, "running");
assert.equal(progressMessage?.executionProgress?.artifacts?.[0]?.label, "codex_app_server_protocol.schemas.json");
const completed = await data.completeMasterAgentTask({
taskId: "task-phase-normal",
deviceId: "mac-studio",
status: "completed",
replyBody: "收到。",
});
assert.equal(completed.status, "completed");
assert.equal(completed.phase, "completed");
assert.equal(completed.recoverable, false);
});
test("queued thread collaboration tasks retain source and target thread references", async () => {
const task = await data.queueMasterAgentTask({
taskId: "thread-collaboration-task",
projectId: "master-agent",
taskType: "conversation_reply",
requestMessageId: "msg-thread-collaboration",
requestText: "让源线程和目标线程对一下方案",
executionPrompt: "让源线程和目标线程对一下方案",
requestedBy: "krisolo",
requestedByAccount: "krisolo",
deviceId: "mac-studio",
intentCategory: "thread_collaboration",
sourceThreadId: "source-thread-id",
sourceThreadDisplayName: "源线程",
sourceCodexThreadRef: "019d-source-codex",
targetThreadId: "target-thread-id",
targetThreadDisplayName: "目标线程",
targetCodexThreadRef: "019d-target-codex",
});
assert.equal(task.intentCategory, "thread_collaboration");
assert.equal(task.sourceThreadId, "source-thread-id");
assert.equal(task.sourceThreadDisplayName, "源线程");
assert.equal(task.sourceCodexThreadRef, "019d-source-codex");
test("expired pre-turn task is safely requeued and claimed again", async () => {
await setup();
const state = await data.readState();
state.masterAgentTasks.unshift(
makeQueuedTask("task-phase-retry", {
status: "running",
phase: "executor_starting",
claimedAt: "2020-01-01T08:00:00.000Z",
lastProgressAt: "2020-01-01T08:00:00.000Z",
leaseExpiresAt: "2020-01-01T08:01:00.000Z",
attemptCount: 1,
maxAttempts: 2,
}),
);
await data.writeState(state);
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.equal(claimed?.sourceCodexThreadRef, "019d-source-codex");
assert.equal(claimed?.targetCodexThreadRef, "019d-target-codex");
assert.equal(claimed?.taskId, "task-phase-retry");
assert.equal(claimed?.status, "running");
assert.equal(claimed?.phase, "claimed");
assert.equal(claimed?.attemptCount, 2);
assert.equal(claimed?.recoverable, false);
});
test("expired task after turn start is timed out instead of duplicated", async () => {
await setup();
const state = await data.readState();
state.masterAgentTasks.unshift(
makeQueuedTask("task-phase-no-duplicate", {
status: "running",
phase: "turn_started",
claimedAt: "2020-01-01T08:00:00.000Z",
lastProgressAt: "2020-01-01T08:00:00.000Z",
leaseExpiresAt: "2020-01-01T08:01:00.000Z",
attemptCount: 1,
maxAttempts: 2,
}),
);
await data.writeState(state);
const claimed = await data.claimNextMasterAgentTask("mac-studio");
assert.notEqual(claimed?.taskId, "task-phase-no-duplicate");
const nextState = await data.readState();
const task = nextState.masterAgentTasks.find((item) => item.taskId === "task-phase-no-duplicate");
assert.equal(task?.status, "timed_out");
assert.equal(task?.phase, "timed_out");
assert.equal(task?.recoverable, false);
});
test("codex app server health distinguishes available, degraded, and unavailable", async () => {
await setup();
assert.equal(data.resolveCodexAppServerHealth(undefined), "unavailable");
assert.equal(
data.resolveCodexAppServerHealth({
id: "device-offline",
name: "离线设备",
avatar: "D",
account: "krisolo",
source: "production",
status: "offline",
projects: [],
quota5h: 0,
quota7d: 0,
lastSeenAt: "2026-06-06T08:00:00.000Z",
endpoint: "",
token: "",
note: "",
capabilities: {
codexAppServer: {
connected: false,
lastSeenAt: "2026-06-06T08:00:00.000Z",
},
},
} satisfies Device),
"unavailable",
);
assert.equal(
data.resolveCodexAppServerHealth({
id: "device-degraded",
name: "降级设备",
avatar: "D",
account: "krisolo",
source: "production",
status: "online",
projects: [],
quota5h: 0,
quota7d: 0,
lastSeenAt: new Date().toISOString(),
endpoint: "",
token: "",
note: "",
capabilities: {
codexAppServer: {
connected: true,
lastSeenAt: new Date().toISOString(),
metadata: { errors: ["thread/turns/list:STDIN_CLOSED"] },
},
},
} satisfies Device),
"degraded",
);
assert.equal(
data.resolveCodexAppServerHealth({
id: "device-available",
name: "可用设备",
avatar: "D",
account: "krisolo",
source: "production",
status: "online",
projects: [],
quota5h: 0,
quota7d: 0,
lastSeenAt: new Date().toISOString(),
endpoint: "",
token: "",
note: "",
capabilities: {
codexAppServer: {
connected: true,
lastSeenAt: new Date().toISOString(),
metadata: {},
},
},
} satisfies Device),
"available",
);
});