chore: checkpoint Boss app v2.5.11

This commit is contained in:
AI Bot
2026-06-08 12:22:50 +08:00
parent bddbe8b5ba
commit 3b51641d99
78 changed files with 5706 additions and 954 deletions

View File

@@ -0,0 +1,27 @@
function trimToDefined(value) {
const text = typeof value === "string" ? value.trim() : "";
return text || undefined;
}
function isActiveMasterTask(runtime = {}) {
const active = runtime.activeMasterTask;
return (
runtime.masterTaskBusy === true ||
active?.status === "running" ||
active?.status === "active"
);
}
export function shouldSkipCodexAppServerDiscovery({ config = {}, runtime = {} } = {}) {
if (config.codexAppServerDiscoveryWhileMasterTaskBusy === true) {
return { skip: false };
}
if (!isActiveMasterTask(runtime)) {
return { skip: false };
}
return {
skip: true,
reason: "master_task_running",
activeTaskId: trimToDefined(runtime.activeMasterTask?.taskId),
};
}

View File

@@ -0,0 +1,53 @@
import test from "node:test";
import assert from "node:assert/strict";
import {
shouldSkipCodexAppServerDiscovery,
} from "./codex-app-server-discovery-guard.mjs";
test("codex app-server discovery is skipped while a master task is running", () => {
const decision = shouldSkipCodexAppServerDiscovery({
runtime: {
masterTaskBusy: true,
activeMasterTask: {
taskId: "mastertask-running",
status: "running",
startedAt: "2026-06-07T07:35:33.368Z",
},
},
});
assert.deepEqual(decision, {
skip: true,
reason: "master_task_running",
activeTaskId: "mastertask-running",
});
});
test("codex app-server discovery is allowed when explicit busy discovery is enabled", () => {
const decision = shouldSkipCodexAppServerDiscovery({
config: {
codexAppServerDiscoveryWhileMasterTaskBusy: true,
},
runtime: {
masterTaskBusy: true,
activeMasterTask: {
taskId: "mastertask-running",
status: "running",
},
},
});
assert.deepEqual(decision, { skip: false });
});
test("codex app-server discovery is allowed when no master task is active", () => {
const decision = shouldSkipCodexAppServerDiscovery({
runtime: {
masterTaskBusy: false,
activeMasterTask: null,
},
});
assert.deepEqual(decision, { skip: false });
});

View File

@@ -46,6 +46,39 @@ function resolveTaskTurnRef(task) {
return trimToDefined(task?.targetCodexTurnId || task?.targetTurnId);
}
function isActiveTurnStatus(status) {
const normalized = String(status ?? "").trim().toLowerCase().replace(/[\s_-]+/g, "");
return (
normalized === "active" ||
normalized === "running" ||
normalized === "streaming" ||
normalized === "inprogress"
);
}
function resolveActiveTurnRefFromThreadResult(threadResult) {
const activeTurns = asArray(threadResult?.thread?.turns)
.map((turn, index) => {
const id = trimToDefined(turn?.id ?? turn?.turnId);
if (!id) {
return null;
}
const status = extractDiscoveryTurnStatus(turn);
const completedAt = trimToDefined(turn?.completedAt);
if (completedAt || !isActiveTurnStatus(status)) {
return null;
}
const startedAt = Number(turn?.startedAt ?? turn?.createdAt ?? turn?.updatedAt ?? 0);
return {
id,
order: Number.isFinite(startedAt) && startedAt > 0 ? startedAt : index,
};
})
.filter(Boolean)
.sort((left, right) => right.order - left.order);
return activeTurns[0]?.id;
}
function resolveSourceThreadRef(task) {
return trimToDefined(task?.sourceCodexThreadRef || task?.sourceThreadId);
}
@@ -177,7 +210,7 @@ function waitForCompactNotificationSettle() {
function normalizeTimeoutMs(value) {
const numeric = Number(value);
return Number.isFinite(numeric) && numeric > 0 ? Math.floor(numeric) : 120_000;
return Number.isFinite(numeric) && numeric > 0 ? Math.floor(numeric) : 600_000;
}
function normalizePositiveInteger(value, fallback) {
@@ -471,6 +504,7 @@ function openStdioCodexAppServerTransport(runnerConfig, cwd, handlers) {
stdio: ["pipe", "pipe", "pipe"],
});
let stderr = "";
let closed = false;
const rl = readline.createInterface({ input: child.stdout });
rl.on("line", handlers.onLine);
child.stderr.on("data", (chunk) => {
@@ -478,18 +512,33 @@ function openStdioCodexAppServerTransport(runnerConfig, cwd, handlers) {
});
child.on("error", handlers.onError);
child.on("close", (code) => {
closed = true;
handlers.onClose({
code,
message: stderr.trim() || `CODEX_APP_SERVER_EXITED:${code ?? "unknown"}`,
});
});
child.stdin.on("error", (error) => {
closed = true;
handlers.onError(error);
});
return {
transport: "stdio",
send(line, callback) {
child.stdin.write(`${line}\n`, callback);
if (closed || child.stdin.destroyed || !child.stdin.writable) {
callback?.(new Error("CODEX_APP_SERVER_STDIN_CLOSED"));
return;
}
try {
child.stdin.write(`${line}\n`, callback);
} catch (error) {
callback?.(error);
handlers.onError(error);
}
},
close(signal = "SIGTERM") {
closed = true;
rl.close();
if (!child.killed) {
child.kill(signal);
@@ -3502,6 +3551,7 @@ export async function executeCodexAppServerTask(runnerConfig, task) {
if (!threadId) {
throw new Error("CODEX_APP_SERVER_THREAD_ID_MISSING");
}
const effectiveTurnRef = targetTurnRef || resolveActiveTurnRefFromThreadResult(threadResult);
if (isThreadRollbackTask(task)) {
const numTurns = resolveRollbackNumTurns(task);
@@ -3553,15 +3603,15 @@ export async function executeCodexAppServerTask(runnerConfig, task) {
request,
task,
targetThreadId: threadId,
targetTurnId: targetTurnRef,
targetTurnId: effectiveTurnRef,
hasExistingThreadRef: Boolean(targetThreadRef),
});
const turnControl = targetTurnRef ? "steer" : "start";
const turnResult = targetTurnRef
const turnControl = effectiveTurnRef ? "steer" : "start";
const turnResult = effectiveTurnRef
? await request("turn/steer", {
threadId,
expectedTurnId: targetTurnRef,
expectedTurnId: effectiveTurnRef,
input: [{ type: "text", text: prompt }],
})
: await request("turn/start", {
@@ -3571,7 +3621,7 @@ export async function executeCodexAppServerTask(runnerConfig, task) {
model: runnerConfig.model,
});
activeTurnStarted = true;
const activeTurnId = trimToDefined(turnResult?.turn?.id) || targetTurnRef;
const activeTurnId = trimToDefined(turnResult?.turn?.id) || effectiveTurnRef;
startActiveTurnInterruptPolling({ threadId, turnId: activeTurnId });
await turnCompleted;
if (progressEmits.length > 0) {

View File

@@ -590,22 +590,58 @@ export async function discoverCodexProjectCandidatesInWorker(options = {}) {
options,
},
});
const timeoutMs = Number(options.timeoutMs);
const effectiveTimeoutMs = Number.isFinite(timeoutMs) && timeoutMs > 0 ? timeoutMs : 0;
let settled = false;
let timeout;
const cleanup = () => {
if (timeout) {
clearTimeout(timeout);
}
};
const resolveOnce = (value) => {
if (settled) {
return;
}
settled = true;
cleanup();
resolvePromise(value);
};
const rejectOnce = (error) => {
if (settled) {
return;
}
settled = true;
cleanup();
rejectPromise(error);
};
if (effectiveTimeoutMs > 0) {
timeout = setTimeout(() => {
rejectOnce(new Error("DISCOVERY_WORKER_TIMEOUT"));
worker.terminate().catch(() => null);
}, effectiveTimeoutMs);
}
worker.once("message", (payload) => {
if (payload?.ok) {
resolvePromise(payload.result);
resolveOnce(payload.result);
return;
}
rejectPromise(new Error(payload?.error ?? "DISCOVERY_WORKER_FAILED"));
rejectOnce(new Error(payload?.error ?? "DISCOVERY_WORKER_FAILED"));
});
worker.once("error", rejectPromise);
worker.once("error", rejectOnce);
worker.once("exit", (code) => {
if (code === 0) {
cleanup();
return;
}
rejectPromise(new Error(`DISCOVERY_WORKER_EXIT_${code}`));
rejectOnce(new Error(`DISCOVERY_WORKER_EXIT_${code}`));
});
});
}

View File

@@ -2,6 +2,20 @@
"bindHost": "127.0.0.1",
"port": 4317,
"heartbeatIntervalMs": 15000,
"heartbeatTimeoutMs": 12000,
"heartbeatOutboxReplayLimit": 5,
"heartbeatOutboxRequestTimeoutMs": 1000,
"heartbeatOutboxReplayBudgetMs": 2500,
"heartbeatPostTimeoutMs": 4000,
"threadContextPostTimeoutMs": 1000,
"skillsPostTimeoutMs": 1000,
"reliableOutboxRequestTimeoutMs": 5000,
"codexSessionDiscoveryTimeoutMs": 3500,
"codexSessionDiscoveryWhileMasterTaskBusy": false,
"masterAgentClaimTimeoutPaddingMs": 5000,
"masterAgentControlStateTimeoutMs": 3000,
"skillLifecycleClaimTimeoutMs": 5000,
"skillLifecycleCompleteTimeoutMs": 5000,
"masterAgentPollIntervalMs": 1000,
"skillLifecycleEnabled": true,
"skillLifecyclePollIntervalMs": 5000,
@@ -30,11 +44,14 @@
"app-server"
],
"codexAppServerWorkdir": "/Users/kris/code/boss",
"codexAppServerTimeoutMs": 120000,
"codexAppServerTimeoutMs": 600000,
"codexAppServerDiscoveryEnabled": true,
"codexAppServerDiscoveryInlineInHeartbeat": false,
"codexAppServerDiscoveryWhileMasterTaskBusy": false,
"codexAppServerDiscoveryTtlMs": 300000,
"codexAppServerDiscoveryLimit": 20,
"codexAppServerFallbackToCli": true,
"masterAgentLongTaskProgressIntervalMs": 20000,
"codexRemoteControlEnabled": true,
"codexRemoteControlCommand": "codex",
"codexRemoteControlArgs": [

View File

@@ -2,6 +2,20 @@
"bindHost": "127.0.0.1",
"port": 4317,
"heartbeatIntervalMs": 15000,
"heartbeatTimeoutMs": 12000,
"heartbeatOutboxReplayLimit": 5,
"heartbeatOutboxRequestTimeoutMs": 1000,
"heartbeatOutboxReplayBudgetMs": 2500,
"heartbeatPostTimeoutMs": 4000,
"threadContextPostTimeoutMs": 1000,
"skillsPostTimeoutMs": 1000,
"reliableOutboxRequestTimeoutMs": 5000,
"codexSessionDiscoveryTimeoutMs": 3500,
"codexSessionDiscoveryWhileMasterTaskBusy": false,
"masterAgentClaimTimeoutPaddingMs": 5000,
"masterAgentControlStateTimeoutMs": 3000,
"skillLifecycleClaimTimeoutMs": 5000,
"skillLifecycleCompleteTimeoutMs": 5000,
"masterAgentPollIntervalMs": 1000,
"skillLifecycleEnabled": true,
"skillLifecyclePollIntervalMs": 5000,
@@ -34,9 +48,12 @@
"codexAppServerWorkdir": "/Users/kris/code/boss",
"codexAppServerTimeoutMs": 120000,
"codexAppServerDiscoveryEnabled": true,
"codexAppServerDiscoveryInlineInHeartbeat": false,
"codexAppServerDiscoveryWhileMasterTaskBusy": false,
"codexAppServerDiscoveryTtlMs": 300000,
"codexAppServerDiscoveryLimit": 20,
"codexAppServerFallbackToCli": true,
"masterAgentLongTaskProgressIntervalMs": 20000,
"codexRemoteControlEnabled": true,
"codexRemoteControlCommand": "codex",
"codexRemoteControlArgs": [

View File

@@ -0,0 +1,30 @@
export function normalizeFetchTimeoutMs(value, fallback = 5_000) {
const numeric = Number(value);
if (!Number.isFinite(numeric) || numeric <= 0) {
return fallback;
}
return Math.max(50, Math.min(60_000, Math.round(numeric)));
}
export async function fetchWithTimeout(url, init = {}, options = {}) {
const timeoutMs = normalizeFetchTimeoutMs(options.timeoutMs);
const timeoutMessage = String(options.timeoutMessage || "LOCAL_AGENT_FETCH_TIMEOUT");
const controller = new AbortController();
const timeout = setTimeout(() => {
controller.abort(new Error(timeoutMessage));
}, timeoutMs);
try {
return await fetch(url, {
...init,
signal: init.signal ?? controller.signal,
});
} catch (error) {
if (controller.signal.aborted) {
throw new Error(timeoutMessage);
}
throw error;
} finally {
clearTimeout(timeout);
}
}

View File

@@ -0,0 +1,41 @@
import test from "node:test";
import assert from "node:assert/strict";
import { createServer } from "node:http";
import { fetchWithTimeout } from "./fetch-timeout.mjs";
async function withServer(handler, run) {
const server = createServer(handler);
await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve));
const address = server.address();
const baseUrl = `http://127.0.0.1:${address.port}`;
try {
return await run(baseUrl);
} finally {
await new Promise((resolve) => server.close(resolve));
}
}
test("fetchWithTimeout aborts stalled requests with a stable error message", async () => {
await withServer(() => {
// Keep the request open to simulate a stalled control-plane request.
}, async (baseUrl) => {
const started = Date.now();
await assert.rejects(
() => fetchWithTimeout(`${baseUrl}/stall`, {}, { timeoutMs: 20, timeoutMessage: "TEST_FETCH_TIMEOUT" }),
/TEST_FETCH_TIMEOUT/,
);
assert.ok(Date.now() - started < 1_000);
});
});
test("fetchWithTimeout returns normal responses before the timeout", async () => {
await withServer((_request, response) => {
response.writeHead(200, { "Content-Type": "text/plain" });
response.end("ok");
}, async (baseUrl) => {
const response = await fetchWithTimeout(`${baseUrl}/ok`, {}, { timeoutMs: 1_000 });
assert.equal(response.ok, true);
assert.equal(await response.text(), "ok");
});
});

View File

@@ -0,0 +1,97 @@
function trimText(value, maxLength = 160) {
const text = typeof value === "string" ? value.trim() : "";
if (!text) {
return undefined;
}
return text.length > maxLength ? `${text.slice(0, maxLength)}...` : text;
}
function summarizePoll(poll) {
if (!poll || typeof poll !== "object") {
return null;
}
return {
at: trimText(poll.at, 80),
ok: poll.ok === true,
status: Number.isFinite(Number(poll.status)) ? Number(poll.status) : undefined,
};
}
function summarizeActiveMasterTask(activeMasterTask) {
if (!activeMasterTask || typeof activeMasterTask !== "object") {
return null;
}
return {
taskId: trimText(activeMasterTask.taskId, 80),
status: trimText(activeMasterTask.status, 80),
startedAt: trimText(activeMasterTask.startedAt, 80),
completedAt: trimText(activeMasterTask.completedAt, 80),
};
}
function summarizeReliableOutbox(runtime) {
const replay = runtime?.lastReliableOutboxReplay;
return {
busy: runtime?.reliableOutboxReplayBusy === true,
startedAt: trimText(runtime?.lastReliableOutboxReplayStartedAt, 80),
replayedAt: trimText(runtime?.lastReliableOutboxReplayAt, 80),
attempted: Number.isFinite(Number(replay?.attempted)) ? Number(replay.attempted) : 0,
sent: Number.isFinite(Number(replay?.sent)) ? Number(replay.sent) : 0,
retained: Number.isFinite(Number(replay?.retained)) ? Number(replay.retained) : 0,
stoppedByBudget: replay?.stoppedByBudget === true,
error: trimText(replay?.error),
};
}
function summarizeCodexAppServer(runtime) {
return {
metadataAtMs: Number.isFinite(Number(runtime?.codexAppServerCapabilityMetadataAtMs))
? Number(runtime.codexAppServerCapabilityMetadataAtMs)
: undefined,
refreshBusy: runtime?.codexAppServerCapabilityMetadataRefreshBusy === true,
lastError: trimText(runtime?.codexAppServerCapabilityMetadataError),
skippedAt: trimText(runtime?.codexAppServerCapabilityMetadataSkippedAt, 80),
skipReason: trimText(runtime?.codexAppServerCapabilityMetadataSkipReason, 120),
};
}
export function buildLocalAgentHealthSummary(config = {}, runtime = {}) {
return {
ok: runtime.lastHeartbeatOk === true,
service: "boss-local-agent",
deviceId: trimText(config.deviceId, 120),
now: new Date().toISOString(),
heartbeat: {
at: trimText(runtime.lastHeartbeatAt, 80),
ok: runtime.lastHeartbeatOk === true,
status: Number.isFinite(Number(runtime.lastHeartbeatStatus))
? Number(runtime.lastHeartbeatStatus)
: undefined,
},
masterTask: {
busy: runtime.masterTaskBusy === true,
active: summarizeActiveMasterTask(runtime.activeMasterTask),
lastPoll: summarizePoll(runtime.lastMasterTaskPoll),
},
outbox: summarizeReliableOutbox(runtime),
skills: {
syncBusy: runtime.skillSyncBusy === true,
syncAt: trimText(runtime.lastSkillSyncAt, 80),
syncOk: runtime.lastSkillSyncOk === true,
syncStatus: Number.isFinite(Number(runtime.lastSkillSyncStatus))
? Number(runtime.lastSkillSyncStatus)
: undefined,
count: Array.isArray(runtime.lastSkills) ? runtime.lastSkills.length : 0,
lifecycleBusy: runtime.skillLifecycleBusy === true,
lastLifecyclePoll: summarizePoll(runtime.lastSkillLifecyclePoll),
},
projectDiscovery: {
at: trimText(runtime.lastProjectDiscoveryAt, 80),
ok: runtime.lastProjectDiscoveryOk === true,
summary: trimText(runtime.lastProjectDiscoverySummary, 160),
skippedAt: trimText(runtime.lastProjectDiscoverySkippedAt, 80),
skipReason: trimText(runtime.lastProjectDiscoverySkipReason, 120),
},
codexAppServer: summarizeCodexAppServer(runtime),
};
}

View File

@@ -0,0 +1,57 @@
import test from "node:test";
import assert from "node:assert/strict";
import { buildLocalAgentHealthSummary } from "./health-summary.mjs";
test("buildLocalAgentHealthSummary excludes heavy runtime bodies and secrets", () => {
const summary = buildLocalAgentHealthSummary(
{
deviceId: "mac-studio",
token: "secret-token",
},
{
lastHeartbeatAt: "2026-06-07T01:00:00.000Z",
lastHeartbeatOk: true,
lastHeartbeatStatus: 200,
lastHeartbeatBody: JSON.stringify({ large: "body", token: "secret-token" }),
masterTaskBusy: true,
activeMasterTask: {
taskId: "task-1",
status: "running",
detail: "very sensitive long detail",
},
lastMasterTaskPoll: {
at: "2026-06-07T01:00:01.000Z",
ok: true,
status: 200,
body: JSON.stringify({ task: { prompt: "internal prompt" } }),
},
lastSkillSyncBody: "skill body",
lastReliableOutboxReplay: {
attempted: 3,
sent: 2,
retained: 1,
stoppedByBudget: true,
},
lastSkills: [{ name: "a" }, { name: "b" }],
codexAppServerCapabilityMetadata: {
huge: "metadata",
},
codexAppServerCapabilityMetadataError: "temporary error",
},
);
const encoded = JSON.stringify(summary);
assert.equal(summary.ok, true);
assert.equal(summary.deviceId, "mac-studio");
assert.equal(summary.masterTask.busy, true);
assert.equal(summary.masterTask.active.taskId, "task-1");
assert.equal(summary.outbox.retained, 1);
assert.equal(summary.skills.count, 2);
assert.equal(encoded.includes("secret-token"), false);
assert.equal(encoded.includes("internal prompt"), false);
assert.equal(encoded.includes("very sensitive long detail"), false);
assert.equal(encoded.includes("lastHeartbeatBody"), false);
assert.equal(encoded.includes("lastMasterTaskPoll"), false);
assert.equal(encoded.includes("codexAppServerCapabilityMetadata"), false);
});

View File

@@ -0,0 +1,15 @@
export function recordHeartbeatRunnerError(runtime, error) {
const body = error instanceof Error ? error.message : String(error || "LOCAL_AGENT_HEARTBEAT_FAILED");
const result = {
ok: false,
status: 0,
body,
};
if (runtime && typeof runtime === "object") {
runtime.lastHeartbeatAt = new Date().toISOString();
runtime.lastHeartbeatOk = false;
runtime.lastHeartbeatStatus = 0;
runtime.lastHeartbeatBody = body;
}
return result;
}

View File

@@ -0,0 +1,17 @@
import test from "node:test";
import assert from "node:assert/strict";
import { recordHeartbeatRunnerError } from "./heartbeat-error-state.mjs";
test("heartbeat runner error is recorded as a visible runtime failure", () => {
const runtime = {};
const result = recordHeartbeatRunnerError(runtime, new Error("LOCAL_AGENT_HEARTBEAT_TIMEOUT"));
assert.equal(result.ok, false);
assert.equal(result.status, 0);
assert.equal(result.body, "LOCAL_AGENT_HEARTBEAT_TIMEOUT");
assert.equal(runtime.lastHeartbeatOk, false);
assert.equal(runtime.lastHeartbeatStatus, 0);
assert.equal(runtime.lastHeartbeatBody, "LOCAL_AGENT_HEARTBEAT_TIMEOUT");
assert.match(runtime.lastHeartbeatAt, /^\d{4}-\d{2}-\d{2}T/);
});

View File

@@ -0,0 +1,117 @@
function asArray(value) {
return Array.isArray(value) ? value : [];
}
function normalizeProjectCandidate(candidate) {
if (!candidate || typeof candidate !== "object") {
return null;
}
return { ...candidate };
}
function normalizeProjects(value) {
return asArray(value)
.map((item) => String(item ?? "").trim())
.filter(Boolean);
}
function normalizeProjectCandidates(value) {
return asArray(value)
.map(normalizeProjectCandidate)
.filter(Boolean);
}
function normalizeTimeoutMs(value, fallback = 3_500) {
const numeric = Number(value);
if (!Number.isFinite(numeric) || numeric <= 0) {
return fallback;
}
return Math.max(500, Math.min(30_000, Math.round(numeric)));
}
function normalizeHeartbeatProjects(value = {}) {
return {
projects: normalizeProjects(value.projects),
projectCandidates: normalizeProjectCandidates(value.projectCandidates),
guiConnected: value.guiConnected === true,
};
}
function shouldUseSnapshot(config = {}, runtime = {}) {
if (config.codexSessionDiscoveryWhileMasterTaskBusy === true) {
return false;
}
return runtime.masterTaskBusy === true || runtime.activeMasterTask?.status === "running";
}
export function resolveHeartbeatProjectsFromSnapshot({ config = {}, runtime = {} } = {}) {
if (!shouldUseSnapshot(config, runtime)) {
return { shouldUseSnapshot: false };
}
const snapshot = runtime.lastHeartbeatProjectsSnapshot;
if (snapshot && typeof snapshot === "object") {
return {
shouldUseSnapshot: true,
projects: normalizeProjects(snapshot.projects),
projectCandidates: normalizeProjectCandidates(snapshot.projectCandidates),
guiConnected: snapshot.guiConnected === true,
};
}
return {
shouldUseSnapshot: true,
projects: normalizeProjects(config.projects),
projectCandidates: normalizeProjectCandidates(config.projectCandidates),
guiConnected: false,
};
}
export async function runHeartbeatProjectDiscoveryWithTimeout({
timeoutMs,
fallback = {},
discover,
} = {}) {
if (typeof discover !== "function") {
throw new TypeError("discover must be a function");
}
const effectiveTimeoutMs = normalizeTimeoutMs(timeoutMs);
let timeout;
let timedOut = false;
try {
const value = await Promise.race([
Promise.resolve().then(discover),
new Promise((_, reject) => {
timeout = setTimeout(() => {
timedOut = true;
reject(new Error("CODEX_SESSION_DISCOVERY_TIMEOUT"));
}, effectiveTimeoutMs);
}),
]);
return {
timedOut: false,
value: normalizeHeartbeatProjects(value),
};
} catch (error) {
return {
timedOut,
error,
value: normalizeHeartbeatProjects(fallback),
};
} finally {
if (timeout) {
clearTimeout(timeout);
}
}
}
export function storeHeartbeatProjectsSnapshot(runtime, heartbeatProjects = {}) {
if (!runtime || typeof runtime !== "object") {
return;
}
const snapshot = normalizeHeartbeatProjects(heartbeatProjects);
runtime.lastHeartbeatProjectsSnapshot = {
...snapshot,
capturedAt: new Date().toISOString(),
};
}

View File

@@ -0,0 +1,104 @@
import test from "node:test";
import assert from "node:assert/strict";
import {
resolveHeartbeatProjectsFromSnapshot,
runHeartbeatProjectDiscoveryWithTimeout,
storeHeartbeatProjectsSnapshot,
} from "./heartbeat-project-snapshot.mjs";
test("active master task heartbeat reuses the last discovered project snapshot", () => {
const runtime = {
masterTaskBusy: true,
lastHeartbeatProjectsSnapshot: {
projects: ["test"],
projectCandidates: [{ threadId: "thread-test", folderName: "test" }],
guiConnected: true,
capturedAt: "2026-06-07T10:39:00.000Z",
},
};
const result = resolveHeartbeatProjectsFromSnapshot({
config: {
projects: ["static"],
projectCandidates: [{ threadId: "static-thread", folderName: "static" }],
},
runtime,
});
assert.deepEqual(result, {
shouldUseSnapshot: true,
projects: ["test"],
projectCandidates: [{ threadId: "thread-test", folderName: "test" }],
guiConnected: true,
});
});
test("active master task heartbeat falls back to static projects without a snapshot", () => {
const result = resolveHeartbeatProjectsFromSnapshot({
config: {
projects: ["static"],
projectCandidates: [{ threadId: "static-thread", folderName: "static" }],
},
runtime: {
masterTaskBusy: true,
},
});
assert.deepEqual(result, {
shouldUseSnapshot: true,
projects: ["static"],
projectCandidates: [{ threadId: "static-thread", folderName: "static" }],
guiConnected: false,
});
});
test("idle heartbeat does not use the cached project snapshot", () => {
const result = resolveHeartbeatProjectsFromSnapshot({
config: {
projects: ["static"],
},
runtime: {
masterTaskBusy: false,
lastHeartbeatProjectsSnapshot: {
projects: ["cached"],
projectCandidates: [],
},
},
});
assert.equal(result.shouldUseSnapshot, false);
});
test("project snapshot stores only lightweight fields", () => {
const runtime = {};
storeHeartbeatProjectsSnapshot(runtime, {
projects: ["test"],
projectCandidates: [{ threadId: "thread-test", folderName: "test" }],
guiConnected: true,
privateField: "should-not-store",
});
assert.deepEqual(Object.keys(runtime.lastHeartbeatProjectsSnapshot).sort(), [
"capturedAt",
"guiConnected",
"projectCandidates",
"projects",
]);
assert.equal(runtime.lastHeartbeatProjectsSnapshot.privateField, undefined);
});
test("project discovery timeout falls back instead of blocking heartbeat", async () => {
const result = await runHeartbeatProjectDiscoveryWithTimeout({
timeoutMs: 10,
fallback: { projects: ["cached"], projectCandidates: [], guiConnected: false },
discover: () => new Promise(() => {}),
});
assert.equal(result.timedOut, true);
assert.deepEqual(result.value, {
projects: ["cached"],
projectCandidates: [],
guiConnected: false,
});
});

View File

@@ -0,0 +1,102 @@
function normalizeNumber(value, fallback) {
const numeric = Number(value);
return Number.isFinite(numeric) ? numeric : fallback;
}
function clamp(value, min, max) {
return Math.max(min, Math.min(max, value));
}
function formatElapsedSeconds(seconds) {
const safeSeconds = Math.max(0, Math.floor(seconds));
if (safeSeconds < 60) {
return `${safeSeconds}`;
}
const minutes = Math.floor(safeSeconds / 60);
const remainingSeconds = safeSeconds % 60;
return remainingSeconds > 0 ? `${minutes}${remainingSeconds}` : `${minutes} 分钟`;
}
function normalizeStepStatus(value, fallback = "pending") {
return value === "done" || value === "running" || value === "failed" || value === "pending"
? value
: fallback;
}
function normalizeSteps(steps) {
if (!Array.isArray(steps)) {
return [];
}
return steps
.map((step, index) => {
const text = typeof step?.text === "string" ? step.text.trim() : "";
if (!text) {
return null;
}
return {
id: typeof step?.id === "string" && step.id.trim() ? step.id.trim() : `step-${index + 1}`,
text,
status: normalizeStepStatus(step?.status),
};
})
.filter(Boolean)
.slice(0, 10);
}
function buildDefaultLongRunningSteps(elapsedSeconds) {
const elapsedText = formatElapsedSeconds(elapsedSeconds);
return [
{ id: "receive-task", text: "接收对话任务", status: "done" },
{ id: "locate-thread", text: "定位目标 Codex 线程", status: "done" },
{ id: "write-desktop-thread", text: "写入 Codex 桌面线程记录", status: "done" },
{ id: "await-thread-reply", text: `等待目标线程回复,已等待 ${elapsedText}`, status: "running" },
{ id: "write-back-boss", text: "回写 Boss 对话窗口", status: "pending" },
];
}
export function normalizeLongRunningProgressIntervalMs(value) {
const numeric = normalizeNumber(value, 20_000);
if (numeric <= 0) {
return 0;
}
return clamp(Math.floor(numeric), 5_000, 60_000);
}
export function buildLongRunningCodexProgressSnapshot({
task = {},
startedAtMs,
nowMs = Date.now(),
phase = "awaiting_reply",
baseProgress,
heartbeatCount = 0,
} = {}) {
const started = normalizeNumber(startedAtMs, nowMs);
const elapsedSeconds = Math.max(0, Math.round((nowMs - started) / 1000));
const liveSteps = normalizeSteps(baseProgress?.steps);
const steps = liveSteps.length > 0 ? liveSteps : buildDefaultLongRunningSteps(elapsedSeconds);
const warnings = Array.isArray(baseProgress?.warnings)
? baseProgress.warnings.filter(Boolean).slice(0, 8)
: [];
if (!warnings.some((warning) => warning?.id === "codex-turn-long-running")) {
warnings.unshift({
id: "codex-turn-long-running",
severity: "info",
message: `Codex 桌面线程仍在执行,已等待 ${formatElapsedSeconds(elapsedSeconds)}`,
});
}
return {
...(baseProgress && typeof baseProgress === "object" ? baseProgress : {}),
phase,
status: "running",
steps,
warnings,
longRunning: {
taskId: typeof task?.taskId === "string" ? task.taskId : undefined,
targetThreadDisplayName:
typeof task?.targetThreadDisplayName === "string" ? task.targetThreadDisplayName : undefined,
elapsedSeconds,
heartbeatCount: Math.max(0, Math.floor(normalizeNumber(heartbeatCount, 0))),
},
};
}

View File

@@ -0,0 +1,70 @@
import test from "node:test";
import assert from "node:assert/strict";
import {
buildLongRunningCodexProgressSnapshot,
normalizeLongRunningProgressIntervalMs,
} from "./master-task-progress-heartbeat.mjs";
test("long-running codex progress snapshot exposes visible waiting state", () => {
const snapshot = buildLongRunningCodexProgressSnapshot({
task: {
taskId: "mastertask-slow",
targetThreadDisplayName: "juyuwan",
},
startedAtMs: Date.parse("2026-06-07T07:35:33.000Z"),
nowMs: Date.parse("2026-06-07T07:37:03.000Z"),
phase: "awaiting_reply",
heartbeatCount: 3,
});
assert.equal(snapshot.phase, "awaiting_reply");
assert.equal(snapshot.status, "running");
assert.equal(snapshot.longRunning.elapsedSeconds, 90);
assert.equal(snapshot.longRunning.heartbeatCount, 3);
assert.equal(snapshot.steps.length, 5);
assert.deepEqual(snapshot.steps.map((step) => step.status), [
"done",
"done",
"done",
"running",
"pending",
]);
assert.equal(snapshot.steps[3].text, "等待目标线程回复,已等待 1 分 30 秒");
assert.equal(snapshot.warnings[0].id, "codex-turn-long-running");
});
test("long-running codex progress snapshot preserves live app-server steps when available", () => {
const snapshot = buildLongRunningCodexProgressSnapshot({
task: {
taskId: "mastertask-streaming",
targetThreadDisplayName: "boss",
},
startedAtMs: 1_000,
nowMs: 21_000,
baseProgress: {
steps: [
{ id: "plan-1", text: "读取项目文档", status: "done" },
{ id: "plan-2", text: "运行验证命令", status: "running" },
],
streamEvents: {
status: "streaming",
agentDeltaCount: 2,
},
},
});
assert.deepEqual(snapshot.steps, [
{ id: "plan-1", text: "读取项目文档", status: "done" },
{ id: "plan-2", text: "运行验证命令", status: "running" },
]);
assert.equal(snapshot.streamEvents.agentDeltaCount, 2);
assert.equal(snapshot.longRunning.elapsedSeconds, 20);
});
test("long-running progress interval defaults to fast but bounded updates", () => {
assert.equal(normalizeLongRunningProgressIntervalMs(undefined), 20_000);
assert.equal(normalizeLongRunningProgressIntervalMs(1_000), 5_000);
assert.equal(normalizeLongRunningProgressIntervalMs(120_000), 60_000);
assert.equal(normalizeLongRunningProgressIntervalMs(0), 0);
});

View File

@@ -0,0 +1,378 @@
import { mkdir, readFile, rename, writeFile } from "node:fs/promises";
import { dirname, join } from "node:path";
import os from "node:os";
const MAX_OUTBOX_RECORDS = 500;
const MAX_APP_LOG_RECORDS = 120;
const RETRYABLE_STATUS_CODES = new Set([408, 409, 425, 429, 500, 502, 503, 504]);
const outboxWriteQueues = new Map();
function reliableOutboxPriority(record) {
switch (record?.kind) {
case "task.complete":
return 0;
case "task.progress":
return 10;
case "app.log":
return 30;
default:
return 20;
}
}
function recordCreatedMs(record) {
const value = Date.parse(record?.createdAt || "");
return Number.isFinite(value) ? value : 0;
}
function parseRecordBody(record) {
if (!record || record.body == null) {
return {};
}
if (typeof record.body === "object") {
return record.body;
}
if (typeof record.body !== "string") {
return {};
}
try {
const parsed = JSON.parse(record.body);
return parsed && typeof parsed === "object" ? parsed : {};
} catch {
return {};
}
}
function taskProgressCoalescingKey(record) {
if (record?.kind !== "task.progress") {
return "";
}
const body = parseRecordBody(record);
const taskId = typeof body.taskId === "string" ? body.taskId.trim() : "";
if (taskId) {
return taskId;
}
const url = typeof record.url === "string" ? record.url : "";
const match = url.match(/\/master-agent\/tasks\/([^/]+)\/progress(?:\?|$)/);
return match ? decodeURIComponent(match[1]) : "";
}
function taskCompletionCoalescingKey(record) {
if (record?.kind !== "task.complete") {
return "";
}
const body = parseRecordBody(record);
const taskId = typeof body.taskId === "string" ? body.taskId.trim() : "";
if (taskId) {
return taskId;
}
const url = typeof record.url === "string" ? record.url : "";
const match = url.match(/\/master-agent\/tasks\/([^/]+)\/complete(?:\?|$)/);
return match ? decodeURIComponent(match[1]) : "";
}
function appLogCoalescingKey(record) {
if (record?.kind !== "app.log") {
return "";
}
const body = parseRecordBody(record);
const category = typeof body.category === "string" ? body.category.trim() : "";
const message = typeof body.message === "string" ? body.message.trim() : "";
if (!category && !message) {
return "";
}
const projectId = typeof body.projectId === "string" ? body.projectId.trim() : "";
return [record.url || "", projectId, category, message].join("|");
}
function orderReliableOutboxRecordsForReplay(records) {
return [...records].sort((left, right) => {
const priorityDiff = reliableOutboxPriority(left) - reliableOutboxPriority(right);
if (priorityDiff !== 0) return priorityDiff;
return recordCreatedMs(left) - recordCreatedMs(right);
});
}
function compactReliableOutboxRecords(records) {
const active = records.filter((record) => record && record.id && record.status !== "sent");
const completionTaskKeys = new Set(
active.map(taskCompletionCoalescingKey).filter(Boolean),
);
const progressByTask = new Map();
const retained = [];
for (const record of active) {
const progressKey = taskProgressCoalescingKey(record);
if (!progressKey) {
retained.push(record);
continue;
}
if (completionTaskKeys.has(progressKey)) {
continue;
}
const previous = progressByTask.get(progressKey);
if (!previous || recordCreatedMs(record) >= recordCreatedMs(previous)) {
progressByTask.set(progressKey, record);
}
}
const keyedAppLogs = new Map();
const unkeyedAppLogs = [];
for (const record of retained.filter((item) => item.kind === "app.log")) {
const appLogKey = appLogCoalescingKey(record);
if (!appLogKey) {
unkeyedAppLogs.push(record);
continue;
}
const previous = keyedAppLogs.get(appLogKey);
if (!previous || recordCreatedMs(record) >= recordCreatedMs(previous)) {
keyedAppLogs.set(appLogKey, record);
}
}
const appLogs = [...keyedAppLogs.values(), ...unkeyedAppLogs]
.sort((left, right) => recordCreatedMs(right) - recordCreatedMs(left))
.slice(0, MAX_APP_LOG_RECORDS);
const compacted = [
...retained.filter((record) => record.kind !== "app.log"),
...progressByTask.values(),
...appLogs,
];
if (compacted.length <= MAX_OUTBOX_RECORDS) {
return compacted.sort((left, right) => recordCreatedMs(left) - recordCreatedMs(right));
}
const taskCompletions = compacted.filter((record) => record.kind === "task.complete");
const remainingBudget = Math.max(0, MAX_OUTBOX_RECORDS - taskCompletions.length);
const otherRecords = compacted
.filter((record) => record.kind !== "task.complete")
.sort((left, right) => {
const priorityDiff = reliableOutboxPriority(left) - reliableOutboxPriority(right);
if (priorityDiff !== 0) return priorityDiff;
return recordCreatedMs(right) - recordCreatedMs(left);
})
.slice(0, remainingBudget);
return [...taskCompletions, ...otherRecords].sort(
(left, right) => recordCreatedMs(left) - recordCreatedMs(right),
);
}
function normalizeTimeoutMs(value, fallback = 5_000) {
const numeric = Number(value);
if (!Number.isFinite(numeric) || numeric <= 0) {
return fallback;
}
return Math.max(50, Math.min(60_000, Math.round(numeric)));
}
function normalizeDurationBudgetMs(value) {
const numeric = Number(value);
if (!Number.isFinite(numeric) || numeric <= 0) {
return 0;
}
return Math.max(50, Math.min(60_000, Math.round(numeric)));
}
function nowIso() {
return new Date().toISOString();
}
function randomId(prefix) {
return `${prefix}-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
}
export function resolveReliableOutboxPath(config = {}) {
if (config.reliableOutboxPath) {
return String(config.reliableOutboxPath);
}
return join(os.homedir(), ".boss-agent", `${config.deviceId || "device"}-outbox.json`);
}
async function readOutboxRecords(outboxPath) {
try {
const parsed = JSON.parse(await readFile(outboxPath, "utf8"));
return Array.isArray(parsed?.records) ? parsed.records : [];
} catch {
return [];
}
}
async function writeOutboxRecords(outboxPath, records) {
await mkdir(dirname(outboxPath), { recursive: true });
const compacted = compactReliableOutboxRecords(records);
const tmpPath = `${outboxPath}.${process.pid}.${Date.now()}.tmp`;
await writeFile(
tmpPath,
JSON.stringify({ version: 1, updatedAt: nowIso(), records: compacted }, null, 2),
);
await rename(tmpPath, outboxPath);
}
async function mutateOutboxRecords(outboxPath, mutator) {
const run = async () => {
const records = await readOutboxRecords(outboxPath);
const result = await mutator(records);
await writeOutboxRecords(outboxPath, result.records);
return result.value;
};
const previous = outboxWriteQueues.get(outboxPath) || Promise.resolve();
const next = previous.then(run, run);
outboxWriteQueues.set(outboxPath, next.catch(() => null));
return await next;
}
export async function appendReliableOutboxRecord(outboxPath, input) {
const record = {
id: input.id || randomId(input.kind || "outbox"),
kind: input.kind,
url: input.url,
method: input.method || "POST",
headers: input.headers || {},
body: input.body,
requestTimeoutMs: input.requestTimeoutMs,
status: "pending",
attemptCount: 0,
createdAt: nowIso(),
lastAttemptAt: undefined,
lastError: undefined,
};
return await mutateOutboxRecords(outboxPath, async (records) => ({
records: [...records, record],
value: record,
}));
}
export async function markReliableOutboxRecordSent(outboxPath, recordId) {
await mutateOutboxRecords(outboxPath, async (records) => ({
records: records.filter((record) => record.id !== recordId),
value: undefined,
}));
}
function shouldRetry(status) {
if (!status) return true;
return RETRYABLE_STATUS_CODES.has(status);
}
async function updateReliableOutboxRecordFailure(outboxPath, recordId, detail) {
await mutateOutboxRecords(outboxPath, async (records) => ({
records: records.map((record) => {
if (record.id !== recordId) return record;
return {
...record,
attemptCount: Number(record.attemptCount || 0) + 1,
lastAttemptAt: nowIso(),
lastError: String(detail || "OUTBOX_SEND_FAILED").slice(0, 240),
};
}),
value: undefined,
}));
}
export async function sendReliableOutboxRecord(record, options = {}) {
const timeoutMs = normalizeTimeoutMs(options.requestTimeoutMs ?? record.requestTimeoutMs);
const controller = new AbortController();
const timeout = setTimeout(() => {
controller.abort(new Error("RELIABLE_OUTBOX_SEND_TIMEOUT"));
}, timeoutMs);
try {
const response = await fetch(record.url, {
method: record.method || "POST",
headers: record.headers || {},
body: typeof record.body === "string" ? record.body : JSON.stringify(record.body ?? {}),
signal: controller.signal,
});
const body = await response.text();
return {
ok: response.ok,
retryable: !response.ok && shouldRetry(response.status),
status: response.status,
body,
};
} catch (error) {
if (controller.signal.aborted) {
throw new Error("RELIABLE_OUTBOX_SEND_TIMEOUT");
}
throw error;
} finally {
clearTimeout(timeout);
}
}
export async function postThroughReliableOutbox(config, recordInput) {
if (config.reliableOutboxEnabled === false) {
return await sendReliableOutboxRecord({
...recordInput,
id: recordInput.id || randomId(recordInput.kind || "direct"),
requestTimeoutMs: recordInput.requestTimeoutMs ?? config.reliableOutboxRequestTimeoutMs,
});
}
const outboxPath = resolveReliableOutboxPath(config);
const record = await appendReliableOutboxRecord(outboxPath, {
...recordInput,
requestTimeoutMs: recordInput.requestTimeoutMs ?? config.reliableOutboxRequestTimeoutMs,
});
try {
const result = await sendReliableOutboxRecord(record, {
requestTimeoutMs: record.requestTimeoutMs ?? config.reliableOutboxRequestTimeoutMs,
});
if (result.ok || !result.retryable) {
await markReliableOutboxRecordSent(outboxPath, record.id);
} else {
await updateReliableOutboxRecordFailure(outboxPath, record.id, result.body);
}
return result;
} catch (error) {
await updateReliableOutboxRecordFailure(
outboxPath,
record.id,
error instanceof Error ? error.message : String(error),
);
return {
ok: false,
retryable: true,
status: 0,
body: error instanceof Error ? error.message : String(error),
};
}
}
export async function replayReliableOutbox(config, options = {}) {
if (config.reliableOutboxEnabled === false) {
return { attempted: 0, sent: 0, retained: 0 };
}
const outboxPath = resolveReliableOutboxPath(config);
const records = (await readOutboxRecords(outboxPath)).filter(
(record) => record?.status !== "sent",
);
const limit = Math.max(1, Math.min(Number(options.limit || 50), 100));
const startedAt = Date.now();
const maxDurationMs = normalizeDurationBudgetMs(options.maxDurationMs ?? config.reliableOutboxReplayBudgetMs);
const requestTimeoutMs = options.requestTimeoutMs ?? config.reliableOutboxRequestTimeoutMs;
let attempted = 0;
let sent = 0;
let stoppedByBudget = false;
for (const record of orderReliableOutboxRecordsForReplay(records).slice(0, limit)) {
if (maxDurationMs > 0 && Date.now() - startedAt >= maxDurationMs) {
stoppedByBudget = true;
break;
}
attempted += 1;
try {
const result = await sendReliableOutboxRecord(record, {
requestTimeoutMs: record.requestTimeoutMs ?? requestTimeoutMs,
});
if (result.ok || !result.retryable) {
await markReliableOutboxRecordSent(outboxPath, record.id);
sent += 1;
} else {
await updateReliableOutboxRecordFailure(outboxPath, record.id, result.body);
}
} catch (error) {
await updateReliableOutboxRecordFailure(
outboxPath,
record.id,
error instanceof Error ? error.message : String(error),
);
}
}
const retained = (await readOutboxRecords(outboxPath)).length;
return { attempted, sent, retained, stoppedByBudget };
}

View File

@@ -0,0 +1,330 @@
import test from "node:test";
import assert from "node:assert/strict";
import { createServer } from "node:http";
import { mkdtemp, readFile, rm } from "node:fs/promises";
import os from "node:os";
import { join } from "node:path";
import {
appendReliableOutboxRecord,
postThroughReliableOutbox,
replayReliableOutbox,
resolveReliableOutboxPath,
} from "./reliable-outbox.mjs";
async function withServer(handler, run) {
const server = createServer(handler);
await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve));
const address = server.address();
const baseUrl = `http://127.0.0.1:${address.port}`;
try {
return await run(baseUrl);
} finally {
await new Promise((resolve) => server.close(resolve));
}
}
async function readRecords(outboxPath) {
const parsed = JSON.parse(await readFile(outboxPath, "utf8"));
return parsed.records;
}
test("postThroughReliableOutbox removes a record after successful send", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-success-"));
const config = { deviceId: "test-device", reliableOutboxPath: join(root, "outbox.json") };
let received = 0;
await withServer((request, response) => {
received += 1;
response.writeHead(200, { "Content-Type": "application/json" });
response.end(JSON.stringify({ ok: true }));
}, async (baseUrl) => {
const result = await postThroughReliableOutbox(config, {
kind: "task.progress",
url: `${baseUrl}/progress`,
headers: { "Content-Type": "application/json" },
body: { ok: true },
});
assert.equal(result.ok, true);
assert.equal(received, 1);
assert.deepEqual(await readRecords(resolveReliableOutboxPath(config)), []);
});
await rm(root, { recursive: true, force: true });
});
test("postThroughReliableOutbox times out stalled requests and keeps them retryable", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-timeout-"));
const config = {
deviceId: "test-device",
reliableOutboxPath: join(root, "outbox.json"),
reliableOutboxRequestTimeoutMs: 20,
};
await withServer(() => {
// Intentionally leave the request open to simulate a stalled network write.
}, async (baseUrl) => {
const started = Date.now();
const result = await postThroughReliableOutbox(config, {
kind: "task.progress",
url: `${baseUrl}/stall`,
headers: { "Content-Type": "application/json" },
body: { ok: true },
});
assert.equal(result.ok, false);
assert.equal(result.retryable, true);
assert.equal(result.status, 0);
assert.match(result.body, /RELIABLE_OUTBOX_SEND_TIMEOUT|aborted/i);
assert.ok(Date.now() - started < 1_000);
assert.equal((await readRecords(resolveReliableOutboxPath(config))).length, 1);
});
await rm(root, { recursive: true, force: true });
});
test("replayReliableOutbox respects a replay duration budget", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-budget-"));
const config = {
deviceId: "test-device",
reliableOutboxPath: join(root, "outbox.json"),
};
await withServer(() => {
// Keep every replay request pending; the replay budget must stop the loop.
}, async (baseUrl) => {
const outboxPath = resolveReliableOutboxPath(config);
for (let index = 0; index < 3; index += 1) {
await appendReliableOutboxRecord(outboxPath, {
kind: "task.progress",
url: `${baseUrl}/stall-${index}`,
body: { index },
});
}
const started = Date.now();
const replay = await replayReliableOutbox(config, {
limit: 3,
requestTimeoutMs: 30,
maxDurationMs: 50,
});
assert.ok(Date.now() - started < 1_000);
assert.ok(replay.attempted >= 1);
assert.ok(replay.attempted < 3);
assert.equal(replay.sent, 0);
assert.equal(replay.retained, 3);
assert.equal(replay.stoppedByBudget, true);
});
await rm(root, { recursive: true, force: true });
});
test("postThroughReliableOutbox retains retryable failures and replay clears them", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-retry-"));
const config = { deviceId: "test-device", reliableOutboxPath: join(root, "outbox.json") };
let fail = true;
let received = 0;
await withServer((request, response) => {
received += 1;
if (fail) {
response.writeHead(503, { "Content-Type": "text/plain" });
response.end("temporary failure");
return;
}
response.writeHead(200, { "Content-Type": "application/json" });
response.end(JSON.stringify({ ok: true }));
}, async (baseUrl) => {
const first = await postThroughReliableOutbox(config, {
kind: "task.complete",
url: `${baseUrl}/complete`,
headers: { "Content-Type": "application/json" },
body: { taskId: "task-1" },
});
assert.equal(first.ok, false);
assert.equal(first.retryable, true);
assert.equal((await readRecords(resolveReliableOutboxPath(config))).length, 1);
fail = false;
const replay = await replayReliableOutbox(config);
assert.equal(replay.attempted, 1);
assert.equal(replay.sent, 1);
assert.equal(replay.retained, 0);
assert.equal(received, 2);
});
await rm(root, { recursive: true, force: true });
});
test("replayReliableOutbox prioritizes task completion over progress and app logs", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-priority-"));
const config = {
deviceId: "test-device",
reliableOutboxPath: join(root, "outbox.json"),
};
const received = [];
await withServer((request, response) => {
received.push(request.url);
response.writeHead(200, { "Content-Type": "application/json" });
response.end(JSON.stringify({ ok: true }));
}, async (baseUrl) => {
const outboxPath = resolveReliableOutboxPath(config);
await appendReliableOutboxRecord(outboxPath, {
kind: "task.progress",
url: `${baseUrl}/progress`,
body: { taskId: "task-2" },
});
await appendReliableOutboxRecord(outboxPath, {
kind: "app.log",
url: `${baseUrl}/app-log`,
body: { category: "noise" },
});
await appendReliableOutboxRecord(outboxPath, {
kind: "task.complete",
url: `${baseUrl}/complete`,
body: { taskId: "task-1" },
});
const replay = await replayReliableOutbox(config, { limit: 1 });
assert.equal(replay.attempted, 1);
assert.deepEqual(received, ["/complete"]);
const retained = await readRecords(outboxPath);
assert.equal(retained.some((record) => record.kind === "task.complete"), false);
assert.equal(retained.some((record) => record.kind === "task.progress"), true);
assert.equal(retained.some((record) => record.kind === "app.log"), true);
});
await rm(root, { recursive: true, force: true });
});
test("reliable outbox compaction preserves pending task completion records before low priority logs", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-compact-priority-"));
const config = {
deviceId: "test-device",
reliableOutboxPath: join(root, "outbox.json"),
};
const outboxPath = resolveReliableOutboxPath(config);
await appendReliableOutboxRecord(outboxPath, {
kind: "task.complete",
url: "http://127.0.0.1/complete",
body: { taskId: "task-1" },
});
for (let index = 0; index < 510; index += 1) {
await appendReliableOutboxRecord(outboxPath, {
kind: "app.log",
url: `http://127.0.0.1/app-log-${index}`,
body: { index },
});
}
const records = await readRecords(outboxPath);
assert.equal(records.length, 121);
assert.equal(records.some((record) => record.kind === "task.complete"), true);
assert.equal(records.filter((record) => record.kind === "app.log").length, 120);
await rm(root, { recursive: true, force: true });
});
test("reliable outbox coalesces repeated task progress records for the same task", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-progress-coalesce-"));
const config = {
deviceId: "test-device",
reliableOutboxPath: join(root, "outbox.json"),
};
const outboxPath = resolveReliableOutboxPath(config);
for (let index = 0; index < 12; index += 1) {
await appendReliableOutboxRecord(outboxPath, {
kind: "task.progress",
url: "http://127.0.0.1/api/v1/master-agent/tasks/task-1/progress",
body: { taskId: "task-1", index },
});
}
const records = await readRecords(outboxPath);
assert.equal(records.length, 1);
assert.equal(records[0].kind, "task.progress");
assert.equal(records[0].body.index, 11);
await rm(root, { recursive: true, force: true });
});
test("reliable outbox caps noisy app logs while retaining completion and latest progress records", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-app-log-cap-"));
const config = {
deviceId: "test-device",
reliableOutboxPath: join(root, "outbox.json"),
};
const outboxPath = resolveReliableOutboxPath(config);
await appendReliableOutboxRecord(outboxPath, {
kind: "task.complete",
url: "http://127.0.0.1/complete",
body: { taskId: "task-1" },
});
for (let index = 0; index < 150; index += 1) {
await appendReliableOutboxRecord(outboxPath, {
kind: "app.log",
url: "http://127.0.0.1/app-log",
body: { index },
});
}
await appendReliableOutboxRecord(outboxPath, {
kind: "task.progress",
url: "http://127.0.0.1/api/v1/master-agent/tasks/task-2/progress",
body: { taskId: "task-2", index: 2 },
});
const records = await readRecords(outboxPath);
const appLogs = records.filter((record) => record.kind === "app.log");
assert.equal(records.some((record) => record.kind === "task.complete"), true);
assert.equal(records.some((record) => record.kind === "task.progress"), true);
assert.equal(appLogs.length, 120);
assert.equal(appLogs.some((record) => record.body.index === 149), true);
assert.equal(appLogs.some((record) => record.body.index === 0), false);
await rm(root, { recursive: true, force: true });
});
test("reliable outbox drops stale progress once a completion for the same task is pending", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-progress-after-complete-"));
const config = {
deviceId: "test-device",
reliableOutboxPath: join(root, "outbox.json"),
};
const outboxPath = resolveReliableOutboxPath(config);
await appendReliableOutboxRecord(outboxPath, {
kind: "task.progress",
url: "http://127.0.0.1/api/v1/master-agent/tasks/task-1/progress",
body: { taskId: "task-1", phase: "awaiting_reply" },
});
await appendReliableOutboxRecord(outboxPath, {
kind: "task.complete",
url: "http://127.0.0.1/api/v1/master-agent/tasks/task-1/complete",
body: { taskId: "task-1", status: "completed" },
});
const records = await readRecords(outboxPath);
assert.equal(records.length, 1);
assert.equal(records[0].kind, "task.complete");
await rm(root, { recursive: true, force: true });
});
test("reliable outbox coalesces duplicate app logs by category and message", async () => {
const root = await mkdtemp(join(os.tmpdir(), "boss-outbox-app-log-coalesce-"));
const config = {
deviceId: "test-device",
reliableOutboxPath: join(root, "outbox.json"),
};
const outboxPath = resolveReliableOutboxPath(config);
for (let index = 0; index < 20; index += 1) {
await appendReliableOutboxRecord(outboxPath, {
kind: "app.log",
url: "http://127.0.0.1/app-log",
body: {
projectId: "project-1",
category: "local_agent.codex_app_server_progress_failed",
message: "Codex App Server 进度实时回写失败,完成回写仍会携带最终进度。",
detail: `attempt-${index}`,
},
});
}
const records = await readRecords(outboxPath);
assert.equal(records.length, 1);
assert.equal(records[0].kind, "app.log");
assert.equal(records[0].body.detail, "attempt-19");
await rm(root, { recursive: true, force: true });
});

View File

@@ -0,0 +1,19 @@
import test from "node:test";
import assert from "node:assert/strict";
import { createSerializedRunner } from "./serialized-runner.mjs";
test("serialized runner releases active task after timeout", async () => {
let calls = 0;
const runner = createSerializedRunner(
() =>
new Promise(() => {
calls += 1;
}),
{ timeoutMs: 10, timeoutErrorMessage: "HEARTBEAT_TIMEOUT" },
);
await assert.rejects(() => runner(), /HEARTBEAT_TIMEOUT/);
await assert.rejects(() => runner(), /HEARTBEAT_TIMEOUT/);
assert.equal(calls, 2);
});

View File

@@ -1,4 +1,4 @@
export function createSerializedRunner(task) {
export function createSerializedRunner(task, options = {}) {
let activePromise = null;
return function runSerialized(...args) {
@@ -6,8 +6,25 @@ export function createSerializedRunner(task) {
return activePromise;
}
activePromise = Promise.resolve(task(...args))
const timeoutMs = Number(options.timeoutMs);
let timeout;
const taskPromise = Promise.resolve(task(...args));
const nextPromise = Number.isFinite(timeoutMs) && timeoutMs > 0
? Promise.race([
taskPromise,
new Promise((_, reject) => {
timeout = setTimeout(() => {
reject(new Error(options.timeoutErrorMessage || "SERIALIZED_RUNNER_TIMEOUT"));
}, timeoutMs);
}),
])
: taskPromise;
activePromise = nextPromise
.finally(() => {
if (timeout) {
clearTimeout(timeout);
}
activePromise = null;
});

View File

@@ -13,6 +13,21 @@ import {
getCodexAppServerRunnerConfig,
shouldUseCodexAppServerTaskRunner,
} from "./codex-app-server-runner.mjs";
import {
shouldSkipCodexAppServerDiscovery,
} from "./codex-app-server-discovery-guard.mjs";
import {
buildLongRunningCodexProgressSnapshot,
normalizeLongRunningProgressIntervalMs,
} from "./master-task-progress-heartbeat.mjs";
import {
resolveHeartbeatProjectsFromSnapshot,
runHeartbeatProjectDiscoveryWithTimeout,
storeHeartbeatProjectsSnapshot,
} from "./heartbeat-project-snapshot.mjs";
import {
recordHeartbeatRunnerError,
} from "./heartbeat-error-state.mjs";
import { appendBossUserMessageToCodexThreadRollout } from "./codex-thread-rollout-writer.mjs";
import {
executeOmxTeamTask,
@@ -65,7 +80,15 @@ import {
buildMasterAgentTaskCompletionRequestBody,
buildRemoteExecutionCompletionPayload,
} from "./master-task-completion.mjs";
import {
postThroughReliableOutbox,
replayReliableOutbox,
} from "./reliable-outbox.mjs";
import {
buildLocalAgentHealthSummary,
} from "./health-summary.mjs";
import { createSerializedRunner } from "./serialized-runner.mjs";
import { fetchWithTimeout } from "./fetch-timeout.mjs";
async function loadConfig(configPath) {
const raw = await readFile(resolve(configPath), "utf8");
@@ -75,6 +98,23 @@ async function loadConfig(configPath) {
async function resolveHeartbeatProjects(config, runtime) {
const staticProjects = Array.isArray(config.projects) ? config.projects : [];
const staticCandidates = Array.isArray(config.projectCandidates) ? config.projectCandidates : [];
const snapshotFallback = runtime.lastHeartbeatProjectsSnapshot && typeof runtime.lastHeartbeatProjectsSnapshot === "object"
? runtime.lastHeartbeatProjectsSnapshot
: {
projects: staticProjects,
projectCandidates: staticCandidates,
guiConnected: runtime.lastCodexGuiConnected === true,
};
const snapshotDecision = resolveHeartbeatProjectsFromSnapshot({ config, runtime });
if (snapshotDecision.shouldUseSnapshot) {
runtime.lastProjectDiscoverySkippedAt = new Date().toISOString();
runtime.lastProjectDiscoverySkipReason = "master_task_running";
return {
projects: snapshotDecision.projects,
projectCandidates: snapshotDecision.projectCandidates,
guiConnected: snapshotDecision.guiConnected,
};
}
if (config.codexSessionDiscoveryEnabled === false) {
return {
projects: staticProjects,
@@ -83,14 +123,38 @@ async function resolveHeartbeatProjects(config, runtime) {
}
try {
const discovered = await discoverCodexProjectCandidatesInWorker({
stateDbPath: config.codexStateDbPath,
logsDbPath: config.codexLogsDbPath,
sessionIndexPath: config.codexSessionIndexPath,
globalStatePath: config.codexGlobalStatePath,
sessionsDir: config.codexSessionsDir,
lookbackHours: config.codexSessionLookbackHours,
const discoveryTimeoutMs = config.codexSessionDiscoveryTimeoutMs ?? 3_500;
const discoveryResult = await runHeartbeatProjectDiscoveryWithTimeout({
timeoutMs: discoveryTimeoutMs,
fallback: snapshotFallback,
discover: () => discoverCodexProjectCandidatesInWorker({
stateDbPath: config.codexStateDbPath,
logsDbPath: config.codexLogsDbPath,
sessionIndexPath: config.codexSessionIndexPath,
globalStatePath: config.codexGlobalStatePath,
sessionsDir: config.codexSessionsDir,
lookbackHours: config.codexSessionLookbackHours,
timeoutMs: discoveryTimeoutMs,
}),
});
if (discoveryResult.error) {
runtime.lastProjectDiscoveryAt = new Date().toISOString();
runtime.lastProjectDiscoveryOk = false;
runtime.lastProjectDiscoverySummary = discoveryResult.error instanceof Error
? discoveryResult.error.message
: String(discoveryResult.error);
runtime.lastCodexGuiConnected = discoveryResult.value.guiConnected === true;
postAppLog(config, runtime, {
level: "warning",
category: "local_agent.codex_discovery_degraded",
message: "Codex 线程扫描超时或失败,已使用缓存项目继续心跳。",
detail: runtime.lastProjectDiscoverySummary,
mirrorToMaster: false,
}).catch(() => null);
return discoveryResult.value;
}
const discovered = discoveryResult.value;
const candidateMap = new Map();
for (const candidate of [...staticCandidates, ...discovered.projectCandidates]) {
candidateMap.set(candidate.codexThreadRef ?? candidate.threadId, candidate);
@@ -101,22 +165,24 @@ async function resolveHeartbeatProjects(config, runtime) {
runtime.lastProjectDiscoveryOk = true;
runtime.lastProjectDiscoverySummary = `${mergedCandidates.length} threads / ${mergedProjects.length} folders`;
runtime.lastCodexGuiConnected = discovered.guiConnected === true;
return {
const heartbeatProjects = {
projects: mergedProjects,
projectCandidates: mergedCandidates,
guiConnected: discovered.guiConnected === true,
};
storeHeartbeatProjectsSnapshot(runtime, heartbeatProjects);
return heartbeatProjects;
} catch (error) {
runtime.lastProjectDiscoveryAt = new Date().toISOString();
runtime.lastProjectDiscoveryOk = false;
runtime.lastProjectDiscoverySummary = error instanceof Error ? error.message : String(error);
await postAppLog(config, runtime, {
postAppLog(config, runtime, {
level: "error",
category: "local_agent.codex_discovery_failed",
message: "Codex 线程扫描失败,已退回静态项目配置。",
detail: runtime.lastProjectDiscoverySummary,
mirrorToMaster: true,
});
}).catch(() => null);
return {
projects: staticProjects,
projectCandidates: staticCandidates,
@@ -252,53 +318,60 @@ async function postHeartbeat(config, runtime, heartbeatProjects) {
...mergedProjectCandidates.map((candidate) => candidate.folderName).filter(Boolean),
]),
];
const response = await fetch(`${config.controlPlaneUrl.replace(/\/$/, "")}/api/device-heartbeat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
deviceId: config.deviceId,
token: runtime.issuedToken ?? config.token,
pairingCode: runtime.issuedToken ? undefined : config.pairingCode,
name: config.name,
avatar: config.avatar,
account: config.account,
status: config.status,
quota5h: config.quota5h,
quota7d: config.quota7d,
capabilities: {
gui: {
connected: guiConnected,
lastSeenAt: now,
lastActiveProjectId: "",
const response = await fetchWithTimeout(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/device-heartbeat`,
{
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
deviceId: config.deviceId,
token: runtime.issuedToken ?? config.token,
pairingCode: runtime.issuedToken ? undefined : config.pairingCode,
name: config.name,
avatar: config.avatar,
account: config.account,
status: config.status,
quota5h: config.quota5h,
quota7d: config.quota7d,
capabilities: {
gui: {
connected: guiConnected,
lastSeenAt: now,
lastActiveProjectId: "",
},
cli: {
connected: config.cliConnected !== false,
lastSeenAt: now,
lastActiveProjectId: "",
},
browserAutomation: {
connected: config.browserAutomationConnected !== false || Boolean(browserControlRuntime.enabled && browserControlRuntime.command),
lastSeenAt: now,
lastActiveProjectId: "",
},
computerUse: {
connected: computerUseConnected,
lastSeenAt: now,
lastActiveProjectId: "",
},
codexAppServer: {
connected: codexAppServerConnected,
lastSeenAt: now,
lastActiveProjectId: "",
metadata: codexAppServerMetadata,
},
},
cli: {
connected: config.cliConnected !== false,
lastSeenAt: now,
lastActiveProjectId: "",
},
browserAutomation: {
connected: config.browserAutomationConnected !== false || Boolean(browserControlRuntime.enabled && browserControlRuntime.command),
lastSeenAt: now,
lastActiveProjectId: "",
},
computerUse: {
connected: computerUseConnected,
lastSeenAt: now,
lastActiveProjectId: "",
},
codexAppServer: {
connected: codexAppServerConnected,
lastSeenAt: now,
lastActiveProjectId: "",
metadata: codexAppServerMetadata,
},
},
preferredExecutionMode,
projects: mergedProjects,
projectCandidates: mergedProjectCandidates,
endpoint: config.endpoint,
}),
});
preferredExecutionMode,
projects: mergedProjects,
projectCandidates: mergedProjectCandidates,
endpoint: config.endpoint,
}),
},
{
timeoutMs: config.heartbeatPostTimeoutMs ?? 4_000,
timeoutMessage: "LOCAL_AGENT_HEARTBEAT_POST_TIMEOUT",
},
);
const text = await response.text();
let json = null;
@@ -379,11 +452,48 @@ async function resolveCodexAppServerCapabilityConnected(codexAppServerRuntime) {
return canExecuteCommand(codexAppServerRuntime.command, codexAppServerRuntime.cwd || process.cwd());
}
function refreshCodexAppServerCapabilityMetadataInBackground(config, runtime, codexAppServerRuntime, now) {
if (runtime.codexAppServerCapabilityMetadataRefreshBusy) {
return;
}
runtime.codexAppServerCapabilityMetadataRefreshBusy = true;
runtime.codexAppServerCapabilityMetadataRefreshStartedAt = new Date(now).toISOString();
void (async () => {
try {
const metadata = await discoverCodexAppServerCapabilities(codexAppServerRuntime);
runtime.codexAppServerCapabilityMetadata = metadata;
runtime.codexAppServerCapabilityMetadataAtMs = Date.now();
runtime.codexAppServerCapabilityMetadataError = "";
runtime.codexAppServerCapabilityMetadataRefreshCompletedAt = new Date().toISOString();
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
runtime.codexAppServerCapabilityMetadataError = message;
runtime.codexAppServerCapabilityMetadataRefreshFailedAt = new Date().toISOString();
await postAppLog(config, runtime, {
level: "warn",
category: "local_agent.codex_app_server_capability_discovery_failed",
message: "Codex App Server 能力清单发现失败,设备心跳继续上报连接状态。",
detail: message,
mirrorToMaster: false,
});
} finally {
runtime.codexAppServerCapabilityMetadataRefreshBusy = false;
}
})();
}
async function resolveCodexAppServerCapabilityMetadata(config, runtime, codexAppServerRuntime, connected) {
if (!connected || !codexAppServerRuntime?.enabled || codexAppServerRuntime.discoveryEnabled === false) {
return undefined;
}
const now = Date.now();
const discoveryGuard = shouldSkipCodexAppServerDiscovery({ config, runtime });
if (discoveryGuard.skip) {
runtime.codexAppServerCapabilityMetadataSkippedAt = new Date(now).toISOString();
runtime.codexAppServerCapabilityMetadataSkipReason = discoveryGuard.reason;
runtime.codexAppServerCapabilityMetadataSkipTaskId = discoveryGuard.activeTaskId;
return runtime.codexAppServerCapabilityMetadata;
}
const ttlMs = codexAppServerRuntime.discoveryTtlMs ?? 300_000;
if (
runtime.codexAppServerCapabilityMetadata &&
@@ -393,24 +503,31 @@ async function resolveCodexAppServerCapabilityMetadata(config, runtime, codexApp
return runtime.codexAppServerCapabilityMetadata;
}
try {
const metadata = await discoverCodexAppServerCapabilities(codexAppServerRuntime);
runtime.codexAppServerCapabilityMetadata = metadata;
runtime.codexAppServerCapabilityMetadataAtMs = now;
runtime.codexAppServerCapabilityMetadataError = "";
return metadata;
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
runtime.codexAppServerCapabilityMetadataError = message;
await postAppLog(config, runtime, {
level: "warn",
category: "local_agent.codex_app_server_capability_discovery_failed",
message: "Codex App Server 能力清单发现失败,设备心跳继续上报连接状态。",
detail: message,
mirrorToMaster: false,
});
return runtime.codexAppServerCapabilityMetadata;
if (config.codexAppServerDiscoveryInlineInHeartbeat === true) {
try {
const metadata = await discoverCodexAppServerCapabilities(codexAppServerRuntime);
runtime.codexAppServerCapabilityMetadata = metadata;
runtime.codexAppServerCapabilityMetadataAtMs = now;
runtime.codexAppServerCapabilityMetadataError = "";
return metadata;
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
runtime.codexAppServerCapabilityMetadataError = message;
postAppLog(config, runtime, {
level: "warn",
category: "local_agent.codex_app_server_capability_discovery_failed",
message: "Codex App Server 能力清单发现失败,设备心跳继续上报连接状态。",
detail: message,
mirrorToMaster: false,
}).catch(() => null);
return runtime.codexAppServerCapabilityMetadata;
}
}
refreshCodexAppServerCapabilityMetadataInBackground(config, runtime, codexAppServerRuntime, now);
runtime.codexAppServerCapabilityMetadataSkippedAt = new Date(now).toISOString();
runtime.codexAppServerCapabilityMetadataSkipReason = "background_refresh";
return runtime.codexAppServerCapabilityMetadata;
}
function deviceTokenHeaders(config, runtime) {
@@ -420,7 +537,7 @@ function deviceTokenHeaders(config, runtime) {
async function postThreadContext(config, runtime, snapshot) {
const workerId = snapshot.workerId ?? config.workerId ?? `${config.deviceId}-worker`;
const response = await fetch(
const response = await fetchWithTimeout(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/workers/${workerId}/thread-context`,
{
method: "POST",
@@ -452,6 +569,10 @@ async function postThreadContext(config, runtime, snapshot) {
capturedAt: new Date().toISOString(),
}),
},
{
timeoutMs: config.threadContextPostTimeoutMs ?? 3_000,
timeoutMessage: "LOCAL_AGENT_THREAD_CONTEXT_POST_TIMEOUT",
},
);
return {
@@ -525,7 +646,7 @@ async function discoverSkills(config) {
}
async function postSkills(config, runtime, skills) {
const response = await fetch(
const response = await fetchWithTimeout(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/devices/${config.deviceId}/skills`,
{
method: "POST",
@@ -535,6 +656,10 @@ async function postSkills(config, runtime, skills) {
},
body: JSON.stringify({ skills }),
},
{
timeoutMs: config.skillsPostTimeoutMs ?? 3_000,
timeoutMessage: "LOCAL_AGENT_SKILLS_POST_TIMEOUT",
},
);
return {
@@ -547,17 +672,18 @@ async function postSkills(config, runtime, skills) {
async function postAppLog(config, runtime, payload) {
try {
await fetch(`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/app-logs`, {
method: "POST",
await postThroughReliableOutbox(config, {
kind: "app.log",
url: `${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/app-logs`,
headers: {
"Content-Type": "application/json",
...deviceTokenHeaders(config, runtime),
},
body: JSON.stringify({
body: {
deviceId: config.deviceId,
source: "local_agent",
...payload,
}),
},
});
} catch {
// Ignore log transport failures to avoid blocking the agent loop.
@@ -571,7 +697,7 @@ async function claimMasterAgentTask(config, runtime) {
const waitMs = Number.isFinite(configuredWaitMs)
? Math.max(0, Math.min(30_000, Math.floor(configuredWaitMs)))
: 25_000;
const response = await fetch(
const response = await fetchWithTimeout(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/claim`,
{
method: "POST",
@@ -581,6 +707,10 @@ async function claimMasterAgentTask(config, runtime) {
},
body: JSON.stringify({ deviceId: config.deviceId, waitMs }),
},
{
timeoutMs: waitMs + Number(config.masterAgentClaimTimeoutPaddingMs ?? 5_000),
timeoutMessage: "LOCAL_AGENT_MASTER_TASK_CLAIM_TIMEOUT",
},
);
return {
@@ -591,52 +721,41 @@ async function claimMasterAgentTask(config, runtime) {
}
async function completeMasterAgentTask(config, runtime, payload) {
const response = await fetch(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${payload.taskId}/complete`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
...deviceTokenHeaders(config, runtime),
},
body: JSON.stringify(buildMasterAgentTaskCompletionRequestBody(config, payload)),
const result = await postThroughReliableOutbox(config, {
kind: "task.complete",
url: `${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${payload.taskId}/complete`,
headers: {
"Content-Type": "application/json",
...deviceTokenHeaders(config, runtime),
},
);
body: buildMasterAgentTaskCompletionRequestBody(config, payload),
});
return {
ok: response.ok,
status: response.status,
body: await response.text(),
};
return result;
}
async function postMasterAgentTaskProgress(config, runtime, payload) {
const response = await fetch(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${payload.taskId}/progress`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
...deviceTokenHeaders(config, runtime),
},
body: JSON.stringify({
deviceId: config.deviceId,
status: payload.status || "running",
requestId: payload.requestId,
executionProgress: payload.executionProgress,
}),
const result = await postThroughReliableOutbox(config, {
kind: "task.progress",
url: `${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${payload.taskId}/progress`,
headers: {
"Content-Type": "application/json",
...deviceTokenHeaders(config, runtime),
},
);
body: {
deviceId: config.deviceId,
status: payload.status || "running",
phase: payload.phase,
requestId: payload.requestId,
executionProgress: payload.executionProgress,
},
});
return {
ok: response.ok,
status: response.status,
body: await response.text(),
};
return result;
}
async function fetchMasterAgentTaskControlState(config, runtime, task) {
const response = await fetch(
const response = await fetchWithTimeout(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${task.taskId}/control-state`,
{
method: "GET",
@@ -644,6 +763,10 @@ async function fetchMasterAgentTaskControlState(config, runtime, task) {
...deviceTokenHeaders(config, runtime),
},
},
{
timeoutMs: config.masterAgentControlStateTimeoutMs ?? 3_000,
timeoutMessage: "LOCAL_AGENT_MASTER_TASK_CONTROL_STATE_TIMEOUT",
},
);
if (!response.ok) {
return {
@@ -685,7 +808,7 @@ function buildCodexRemoteControlMaintenanceReply(task, result) {
}
async function claimSkillLifecycleRequest(config, runtime) {
const response = await fetch(
const response = await fetchWithTimeout(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/devices/${config.deviceId}/skill-requests/claim`,
{
method: "POST",
@@ -695,6 +818,10 @@ async function claimSkillLifecycleRequest(config, runtime) {
},
body: JSON.stringify({ deviceId: config.deviceId }),
},
{
timeoutMs: config.skillLifecycleClaimTimeoutMs ?? 5_000,
timeoutMessage: "LOCAL_AGENT_SKILL_REQUEST_CLAIM_TIMEOUT",
},
);
return {
@@ -705,7 +832,7 @@ async function claimSkillLifecycleRequest(config, runtime) {
}
async function completeSkillLifecycleRequest(config, runtime, request, result) {
const response = await fetch(
const response = await fetchWithTimeout(
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/devices/${config.deviceId}/skill-requests/${request.requestId}/complete`,
{
method: "POST",
@@ -719,6 +846,10 @@ async function completeSkillLifecycleRequest(config, runtime, request, result) {
error: result.error,
}),
},
{
timeoutMs: config.skillLifecycleCompleteTimeoutMs ?? 5_000,
timeoutMessage: "LOCAL_AGENT_SKILL_REQUEST_COMPLETE_TIMEOUT",
},
);
return {
@@ -897,9 +1028,60 @@ async function runMasterAgentTask(config, runtime, task) {
status: "running",
startedAt: new Date().toISOString(),
};
const emitTaskPhase = async (phase, executionProgress) => {
try {
const result = await postMasterAgentTaskProgress(config, runtime, {
taskId: task.taskId,
status: "running",
phase,
executionProgress: {
...(executionProgress || {}),
phase,
},
});
return result;
} catch (error) {
return {
ok: false,
status: 0,
body: error instanceof Error ? error.message : String(error),
};
}
};
const createLongRunningProgressHeartbeat = ({ phase = "awaiting_reply", getProgress } = {}) => {
const intervalMs = normalizeLongRunningProgressIntervalMs(
config.masterAgentLongTaskProgressIntervalMs ?? config.masterAgentProgressHeartbeatIntervalMs,
);
if (intervalMs <= 0) {
return () => {};
}
const startedAtMs = Date.now();
let heartbeatCount = 0;
const sendHeartbeat = async () => {
heartbeatCount += 1;
await emitTaskPhase(
phase,
buildLongRunningCodexProgressSnapshot({
task,
phase,
startedAtMs,
nowMs: Date.now(),
baseProgress: typeof getProgress === "function" ? getProgress() : undefined,
heartbeatCount,
}),
);
};
const timer = setInterval(() => {
void sendHeartbeat();
}, intervalMs);
return () => {
clearInterval(timer);
};
};
try {
let activeChild = null;
await emitTaskPhase("executor_starting");
const executionResult = await (async () => {
if (canHandleCodexRemoteControlMaintenanceTask(task)) {
const daemonResult = await runCodexRemoteControlDaemonAction(
@@ -966,43 +1148,55 @@ async function runMasterAgentTask(config, runtime, task) {
const codexAppServerRunner = getCodexAppServerRunnerConfig(process.env, config);
if (shouldUseCodexAppServerTaskRunner(codexAppServerRunner, task)) {
const appServerResult = await executeCodexAppServerTask(
{
...codexAppServerRunner,
interruptPollIntervalMs: normalizeInterruptPollIntervalMs(config),
shouldInterruptActiveTurn: async () => {
const controlState = await fetchMasterAgentTaskControlState(config, runtime, task);
if (!controlState.ok) {
let latestCodexExecutionProgress;
const stopLongRunningProgressHeartbeat = createLongRunningProgressHeartbeat({
phase: "awaiting_reply",
getProgress: () => latestCodexExecutionProgress,
});
let appServerResult;
try {
appServerResult = await executeCodexAppServerTask(
{
...codexAppServerRunner,
interruptPollIntervalMs: normalizeInterruptPollIntervalMs(config),
shouldInterruptActiveTurn: async () => {
const controlState = await fetchMasterAgentTaskControlState(config, runtime, task);
if (!controlState.ok) {
return false;
}
if (controlState.body?.canceled === true || controlState.body?.status === "canceled") {
return {
interrupt: true,
reason: controlState.body?.cancelReason || "USER_CANCELED_TASK",
};
}
return false;
}
if (controlState.body?.canceled === true || controlState.body?.status === "canceled") {
return {
interrupt: true,
reason: controlState.body?.cancelReason || "USER_CANCELED_TASK",
};
}
return false;
},
onProgress: async (executionProgress) => {
const progressResult = await postMasterAgentTaskProgress(config, runtime, {
taskId: task.taskId,
status: "running",
executionProgress,
});
if (!progressResult.ok) {
await postAppLog(config, runtime, {
projectId: task.projectId,
level: "warn",
category: "local_agent.codex_app_server_progress_failed",
message: "Codex App Server 进度实时回写失败,完成回写仍会携带最终进度。",
detail: progressResult.body,
mirrorToMaster: false,
},
onProgress: async (executionProgress) => {
latestCodexExecutionProgress = executionProgress;
const progressResult = await postMasterAgentTaskProgress(config, runtime, {
taskId: task.taskId,
status: "running",
phase: "awaiting_reply",
executionProgress,
});
}
if (!progressResult.ok) {
await postAppLog(config, runtime, {
projectId: task.projectId,
level: "warn",
category: "local_agent.codex_app_server_progress_failed",
message: "Codex App Server 进度实时回写失败,完成回写仍会携带最终进度。",
detail: progressResult.body,
mirrorToMaster: false,
});
}
},
},
},
task,
);
task,
);
} finally {
stopLongRunningProgressHeartbeat();
}
if (appServerResult.status === "interrupted") {
return {
interruptedCompletion: {
@@ -1114,6 +1308,7 @@ async function runMasterAgentTask(config, runtime, task) {
},
async () =>
await new Promise((resolveTask, rejectTask) => {
void emitTaskPhase("turn_started");
const child = spawn("codex", codexExecution.args, {
cwd: codexExecution.cwd,
env: process.env,
@@ -1193,6 +1388,7 @@ async function runMasterAgentTask(config, runtime, task) {
return;
}
const { replyBody, dispatchExecutionCompletion, executionProgress } = executionResult;
await emitTaskPhase("completing", executionProgress);
const completion = await completeMasterAgentTask(
config,
@@ -1210,6 +1406,24 @@ async function runMasterAgentTask(config, runtime, task) {
executionProgress,
}),
);
if (!completion.ok) {
await emitTaskPhase("completing", {
...(executionProgress && typeof executionProgress === "object" ? executionProgress : {}),
title: "结果已生成,正在同步",
warnings: [
...(
Array.isArray(executionProgress?.warnings)
? executionProgress.warnings.filter(Boolean).slice(0, 6)
: []
),
{
id: "task-complete-sync-retrying",
severity: "warning",
message: "本机已生成任务结果,正在重试同步到 Boss 对话窗口。",
},
],
});
}
runtime.activeMasterTask = {
taskId: task.taskId,
status: completion.ok ? "completed" : "complete_failed",
@@ -1218,10 +1432,14 @@ async function runMasterAgentTask(config, runtime, task) {
};
await postAppLog(config, runtime, {
projectId: "master-agent",
level: "info",
category: "local_agent.master_agent_task_completed",
message: `Master Codex Node 已完成主 Agent 任务 ${task.taskId}`,
detail: replyBody.slice(0, 280),
level: completion.ok ? "info" : "warn",
category: completion.ok
? "local_agent.master_agent_task_completed"
: "local_agent.master_agent_task_completion_sync_retrying",
message: completion.ok
? `Master Codex Node 已完成主 Agent 任务 ${task.taskId}`
: `Master Codex Node 已生成结果,正在重试同步主 Agent 任务 ${task.taskId}`,
detail: completion.ok ? replyBody.slice(0, 280) : completion.body,
mirrorToMaster: false,
});
} catch (error) {
@@ -1440,10 +1658,124 @@ const runtime = {
lastProjectDiscoveryAt: null,
lastProjectDiscoveryOk: false,
lastProjectDiscoverySummary: null,
lastReliableOutboxReplay: null,
};
function replayReliableOutboxInBackground(config, runtime) {
if (runtime.reliableOutboxReplayBusy) {
return;
}
runtime.reliableOutboxReplayBusy = true;
runtime.lastReliableOutboxReplayStartedAt = new Date().toISOString();
void replayReliableOutbox(config, {
limit: config.heartbeatOutboxReplayLimit ?? 5,
requestTimeoutMs: config.heartbeatOutboxRequestTimeoutMs ?? 1_000,
maxDurationMs: config.heartbeatOutboxReplayBudgetMs ?? 2_500,
})
.then((result) => {
runtime.lastReliableOutboxReplay = result;
runtime.lastReliableOutboxReplayAt = new Date().toISOString();
})
.catch((error) => {
runtime.lastReliableOutboxReplay = {
attempted: 0,
sent: 0,
retained: 0,
stoppedByBudget: false,
error: error instanceof Error ? error.message : String(error),
};
runtime.lastReliableOutboxReplayAt = new Date().toISOString();
})
.finally(() => {
runtime.reliableOutboxReplayBusy = false;
});
}
function syncThreadContextsInBackground(config, runtime, snapshots) {
if (runtime.threadContextSyncBusy || !Array.isArray(snapshots) || snapshots.length === 0) {
return;
}
runtime.threadContextSyncBusy = true;
runtime.lastThreadContextSyncStartedAt = new Date().toISOString();
void (async () => {
const results = [];
for (const snapshot of snapshots) {
let threadResult;
try {
threadResult = await postThreadContext(config, runtime, snapshot);
} catch (error) {
threadResult = {
ok: false,
status: 0,
body: error instanceof Error ? error.message : String(error),
workerId: snapshot.workerId ?? config.workerId ?? `${config.deviceId}-worker`,
threadId: snapshot.threadId,
};
}
results.push(threadResult);
if (!threadResult.ok) {
postAppLog(config, runtime, {
projectId: snapshot.projectId,
level: "error",
category: "local_agent.thread_context_failed",
message: `线程预算上报失败:${snapshot.threadId}`,
detail: threadResult.body,
mirrorToMaster: true,
}).catch(() => null);
}
}
runtime.lastThreadContextResults = results;
runtime.lastThreadContextSyncAt = new Date().toISOString();
})()
.catch((error) => {
runtime.lastThreadContextResults = [{
ok: false,
status: 0,
body: error instanceof Error ? error.message : String(error),
}];
runtime.lastThreadContextSyncAt = new Date().toISOString();
})
.finally(() => {
runtime.threadContextSyncBusy = false;
});
}
function syncSkillsInBackground(config, runtime) {
if (runtime.skillSyncBusy) {
return;
}
runtime.skillSyncBusy = true;
runtime.lastSkillSyncStartedAt = new Date().toISOString();
void (async () => {
const skills = await discoverSkills(config);
runtime.lastSkills = skills;
const skillSyncResult = await postSkills(config, runtime, skills);
runtime.lastSkillSyncAt = new Date().toISOString();
runtime.lastSkillSyncOk = skillSyncResult.ok;
runtime.lastSkillSyncStatus = skillSyncResult.status;
runtime.lastSkillSyncBody = skillSyncResult.body;
})()
.catch((error) => {
runtime.lastSkillSyncAt = new Date().toISOString();
runtime.lastSkillSyncOk = false;
runtime.lastSkillSyncStatus = 0;
runtime.lastSkillSyncBody = error instanceof Error ? error.message : String(error);
postAppLog(config, runtime, {
level: "error",
category: "local_agent.skills_sync_failed",
message: "Skill 扫描或同步失败。",
detail: runtime.lastSkillSyncBody,
mirrorToMaster: true,
}).catch(() => null);
})
.finally(() => {
runtime.skillSyncBusy = false;
});
}
async function performHeartbeat() {
try {
replayReliableOutboxInBackground(config, runtime);
const heartbeatProjects = await resolveHeartbeatProjects(config, runtime);
const result = await postHeartbeat(config, runtime, heartbeatProjects);
runtime.lastHeartbeatAt = new Date().toISOString();
@@ -1464,43 +1796,8 @@ async function performHeartbeat() {
}
const snapshots = Array.isArray(config.threadContexts) ? config.threadContexts : [];
runtime.lastThreadContextResults = [];
for (const snapshot of snapshots) {
const threadResult = await postThreadContext(config, runtime, snapshot);
runtime.lastThreadContextResults.push(threadResult);
if (!threadResult.ok) {
await postAppLog(config, runtime, {
projectId: snapshot.projectId,
level: "error",
category: "local_agent.thread_context_failed",
message: `线程预算上报失败:${snapshot.threadId}`,
detail: threadResult.body,
mirrorToMaster: true,
});
}
}
try {
const skills = await discoverSkills(config);
runtime.lastSkills = skills;
const skillSyncResult = await postSkills(config, runtime, skills);
runtime.lastSkillSyncAt = new Date().toISOString();
runtime.lastSkillSyncOk = skillSyncResult.ok;
runtime.lastSkillSyncStatus = skillSyncResult.status;
runtime.lastSkillSyncBody = skillSyncResult.body;
} catch (error) {
runtime.lastSkillSyncAt = new Date().toISOString();
runtime.lastSkillSyncOk = false;
runtime.lastSkillSyncStatus = 0;
runtime.lastSkillSyncBody = error instanceof Error ? error.message : String(error);
await postAppLog(config, runtime, {
level: "error",
category: "local_agent.skills_sync_failed",
message: "Skill 扫描或同步失败。",
detail: runtime.lastSkillSyncBody,
mirrorToMaster: true,
});
}
syncThreadContextsInBackground(config, runtime, snapshots);
syncSkillsInBackground(config, runtime);
} catch (error) {
runtime.lastHeartbeatAt = new Date().toISOString();
runtime.lastHeartbeatOk = false;
@@ -1516,7 +1813,10 @@ async function performHeartbeat() {
}
}
const heartbeat = createSerializedRunner(performHeartbeat);
const heartbeat = createSerializedRunner(performHeartbeat, {
timeoutMs: config.heartbeatTimeoutMs ?? 12_000,
timeoutErrorMessage: "LOCAL_AGENT_HEARTBEAT_TIMEOUT",
});
const masterTaskPoll = createSerializedRunner(async () => {
await pollMasterAgentTasks(config, runtime);
});
@@ -1646,13 +1946,19 @@ const server = createServer(async (request, response) => {
if (requestUrl.pathname === "/health") {
response.writeHead(200, { "Content-Type": "application/json" });
response.end(
JSON.stringify({
if (requestUrl.searchParams.get("verbose") === "1") {
response.end(
JSON.stringify({
ok: true,
service: "boss-local-agent",
runtime,
}),
);
return;
}
response.end(
JSON.stringify(buildLocalAgentHealthSummary(config, runtime)),
);
return;
}
@@ -1681,9 +1987,11 @@ const server = createServer(async (request, response) => {
}
if (requestUrl.pathname === "/api/v1/heartbeat" && request.method === "POST") {
await heartbeat();
await heartbeat().catch((error) => {
recordHeartbeatRunnerError(runtime, error);
});
response.writeHead(200, { "Content-Type": "application/json" });
response.end(JSON.stringify({ ok: runtime.lastHeartbeatOk, runtime }));
response.end(JSON.stringify(buildLocalAgentHealthSummary(config, runtime)));
return;
}
@@ -1704,14 +2012,18 @@ server.listen(config.port, config.bindHost, () => {
});
void (async () => {
await heartbeat();
await heartbeat().catch((error) => {
recordHeartbeatRunnerError(runtime, error);
});
await masterTaskPoll();
await skillLifecyclePoll();
await bossAgentOtaPoll();
})();
setInterval(() => {
void heartbeat();
void heartbeat().catch((error) => {
recordHeartbeatRunnerError(runtime, error);
});
}, config.heartbeatIntervalMs ?? 15000);
setInterval(() => {