chore: checkpoint Boss app v2.5.11
This commit is contained in:
@@ -13,6 +13,21 @@ import {
|
||||
getCodexAppServerRunnerConfig,
|
||||
shouldUseCodexAppServerTaskRunner,
|
||||
} from "./codex-app-server-runner.mjs";
|
||||
import {
|
||||
shouldSkipCodexAppServerDiscovery,
|
||||
} from "./codex-app-server-discovery-guard.mjs";
|
||||
import {
|
||||
buildLongRunningCodexProgressSnapshot,
|
||||
normalizeLongRunningProgressIntervalMs,
|
||||
} from "./master-task-progress-heartbeat.mjs";
|
||||
import {
|
||||
resolveHeartbeatProjectsFromSnapshot,
|
||||
runHeartbeatProjectDiscoveryWithTimeout,
|
||||
storeHeartbeatProjectsSnapshot,
|
||||
} from "./heartbeat-project-snapshot.mjs";
|
||||
import {
|
||||
recordHeartbeatRunnerError,
|
||||
} from "./heartbeat-error-state.mjs";
|
||||
import { appendBossUserMessageToCodexThreadRollout } from "./codex-thread-rollout-writer.mjs";
|
||||
import {
|
||||
executeOmxTeamTask,
|
||||
@@ -65,7 +80,15 @@ import {
|
||||
buildMasterAgentTaskCompletionRequestBody,
|
||||
buildRemoteExecutionCompletionPayload,
|
||||
} from "./master-task-completion.mjs";
|
||||
import {
|
||||
postThroughReliableOutbox,
|
||||
replayReliableOutbox,
|
||||
} from "./reliable-outbox.mjs";
|
||||
import {
|
||||
buildLocalAgentHealthSummary,
|
||||
} from "./health-summary.mjs";
|
||||
import { createSerializedRunner } from "./serialized-runner.mjs";
|
||||
import { fetchWithTimeout } from "./fetch-timeout.mjs";
|
||||
|
||||
async function loadConfig(configPath) {
|
||||
const raw = await readFile(resolve(configPath), "utf8");
|
||||
@@ -75,6 +98,23 @@ async function loadConfig(configPath) {
|
||||
async function resolveHeartbeatProjects(config, runtime) {
|
||||
const staticProjects = Array.isArray(config.projects) ? config.projects : [];
|
||||
const staticCandidates = Array.isArray(config.projectCandidates) ? config.projectCandidates : [];
|
||||
const snapshotFallback = runtime.lastHeartbeatProjectsSnapshot && typeof runtime.lastHeartbeatProjectsSnapshot === "object"
|
||||
? runtime.lastHeartbeatProjectsSnapshot
|
||||
: {
|
||||
projects: staticProjects,
|
||||
projectCandidates: staticCandidates,
|
||||
guiConnected: runtime.lastCodexGuiConnected === true,
|
||||
};
|
||||
const snapshotDecision = resolveHeartbeatProjectsFromSnapshot({ config, runtime });
|
||||
if (snapshotDecision.shouldUseSnapshot) {
|
||||
runtime.lastProjectDiscoverySkippedAt = new Date().toISOString();
|
||||
runtime.lastProjectDiscoverySkipReason = "master_task_running";
|
||||
return {
|
||||
projects: snapshotDecision.projects,
|
||||
projectCandidates: snapshotDecision.projectCandidates,
|
||||
guiConnected: snapshotDecision.guiConnected,
|
||||
};
|
||||
}
|
||||
if (config.codexSessionDiscoveryEnabled === false) {
|
||||
return {
|
||||
projects: staticProjects,
|
||||
@@ -83,14 +123,38 @@ async function resolveHeartbeatProjects(config, runtime) {
|
||||
}
|
||||
|
||||
try {
|
||||
const discovered = await discoverCodexProjectCandidatesInWorker({
|
||||
stateDbPath: config.codexStateDbPath,
|
||||
logsDbPath: config.codexLogsDbPath,
|
||||
sessionIndexPath: config.codexSessionIndexPath,
|
||||
globalStatePath: config.codexGlobalStatePath,
|
||||
sessionsDir: config.codexSessionsDir,
|
||||
lookbackHours: config.codexSessionLookbackHours,
|
||||
const discoveryTimeoutMs = config.codexSessionDiscoveryTimeoutMs ?? 3_500;
|
||||
const discoveryResult = await runHeartbeatProjectDiscoveryWithTimeout({
|
||||
timeoutMs: discoveryTimeoutMs,
|
||||
fallback: snapshotFallback,
|
||||
discover: () => discoverCodexProjectCandidatesInWorker({
|
||||
stateDbPath: config.codexStateDbPath,
|
||||
logsDbPath: config.codexLogsDbPath,
|
||||
sessionIndexPath: config.codexSessionIndexPath,
|
||||
globalStatePath: config.codexGlobalStatePath,
|
||||
sessionsDir: config.codexSessionsDir,
|
||||
lookbackHours: config.codexSessionLookbackHours,
|
||||
timeoutMs: discoveryTimeoutMs,
|
||||
}),
|
||||
});
|
||||
if (discoveryResult.error) {
|
||||
runtime.lastProjectDiscoveryAt = new Date().toISOString();
|
||||
runtime.lastProjectDiscoveryOk = false;
|
||||
runtime.lastProjectDiscoverySummary = discoveryResult.error instanceof Error
|
||||
? discoveryResult.error.message
|
||||
: String(discoveryResult.error);
|
||||
runtime.lastCodexGuiConnected = discoveryResult.value.guiConnected === true;
|
||||
postAppLog(config, runtime, {
|
||||
level: "warning",
|
||||
category: "local_agent.codex_discovery_degraded",
|
||||
message: "Codex 线程扫描超时或失败,已使用缓存项目继续心跳。",
|
||||
detail: runtime.lastProjectDiscoverySummary,
|
||||
mirrorToMaster: false,
|
||||
}).catch(() => null);
|
||||
return discoveryResult.value;
|
||||
}
|
||||
|
||||
const discovered = discoveryResult.value;
|
||||
const candidateMap = new Map();
|
||||
for (const candidate of [...staticCandidates, ...discovered.projectCandidates]) {
|
||||
candidateMap.set(candidate.codexThreadRef ?? candidate.threadId, candidate);
|
||||
@@ -101,22 +165,24 @@ async function resolveHeartbeatProjects(config, runtime) {
|
||||
runtime.lastProjectDiscoveryOk = true;
|
||||
runtime.lastProjectDiscoverySummary = `${mergedCandidates.length} threads / ${mergedProjects.length} folders`;
|
||||
runtime.lastCodexGuiConnected = discovered.guiConnected === true;
|
||||
return {
|
||||
const heartbeatProjects = {
|
||||
projects: mergedProjects,
|
||||
projectCandidates: mergedCandidates,
|
||||
guiConnected: discovered.guiConnected === true,
|
||||
};
|
||||
storeHeartbeatProjectsSnapshot(runtime, heartbeatProjects);
|
||||
return heartbeatProjects;
|
||||
} catch (error) {
|
||||
runtime.lastProjectDiscoveryAt = new Date().toISOString();
|
||||
runtime.lastProjectDiscoveryOk = false;
|
||||
runtime.lastProjectDiscoverySummary = error instanceof Error ? error.message : String(error);
|
||||
await postAppLog(config, runtime, {
|
||||
postAppLog(config, runtime, {
|
||||
level: "error",
|
||||
category: "local_agent.codex_discovery_failed",
|
||||
message: "Codex 线程扫描失败,已退回静态项目配置。",
|
||||
detail: runtime.lastProjectDiscoverySummary,
|
||||
mirrorToMaster: true,
|
||||
});
|
||||
}).catch(() => null);
|
||||
return {
|
||||
projects: staticProjects,
|
||||
projectCandidates: staticCandidates,
|
||||
@@ -252,53 +318,60 @@ async function postHeartbeat(config, runtime, heartbeatProjects) {
|
||||
...mergedProjectCandidates.map((candidate) => candidate.folderName).filter(Boolean),
|
||||
]),
|
||||
];
|
||||
const response = await fetch(`${config.controlPlaneUrl.replace(/\/$/, "")}/api/device-heartbeat`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
deviceId: config.deviceId,
|
||||
token: runtime.issuedToken ?? config.token,
|
||||
pairingCode: runtime.issuedToken ? undefined : config.pairingCode,
|
||||
name: config.name,
|
||||
avatar: config.avatar,
|
||||
account: config.account,
|
||||
status: config.status,
|
||||
quota5h: config.quota5h,
|
||||
quota7d: config.quota7d,
|
||||
capabilities: {
|
||||
gui: {
|
||||
connected: guiConnected,
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
const response = await fetchWithTimeout(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/device-heartbeat`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
deviceId: config.deviceId,
|
||||
token: runtime.issuedToken ?? config.token,
|
||||
pairingCode: runtime.issuedToken ? undefined : config.pairingCode,
|
||||
name: config.name,
|
||||
avatar: config.avatar,
|
||||
account: config.account,
|
||||
status: config.status,
|
||||
quota5h: config.quota5h,
|
||||
quota7d: config.quota7d,
|
||||
capabilities: {
|
||||
gui: {
|
||||
connected: guiConnected,
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
},
|
||||
cli: {
|
||||
connected: config.cliConnected !== false,
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
},
|
||||
browserAutomation: {
|
||||
connected: config.browserAutomationConnected !== false || Boolean(browserControlRuntime.enabled && browserControlRuntime.command),
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
},
|
||||
computerUse: {
|
||||
connected: computerUseConnected,
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
},
|
||||
codexAppServer: {
|
||||
connected: codexAppServerConnected,
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
metadata: codexAppServerMetadata,
|
||||
},
|
||||
},
|
||||
cli: {
|
||||
connected: config.cliConnected !== false,
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
},
|
||||
browserAutomation: {
|
||||
connected: config.browserAutomationConnected !== false || Boolean(browserControlRuntime.enabled && browserControlRuntime.command),
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
},
|
||||
computerUse: {
|
||||
connected: computerUseConnected,
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
},
|
||||
codexAppServer: {
|
||||
connected: codexAppServerConnected,
|
||||
lastSeenAt: now,
|
||||
lastActiveProjectId: "",
|
||||
metadata: codexAppServerMetadata,
|
||||
},
|
||||
},
|
||||
preferredExecutionMode,
|
||||
projects: mergedProjects,
|
||||
projectCandidates: mergedProjectCandidates,
|
||||
endpoint: config.endpoint,
|
||||
}),
|
||||
});
|
||||
preferredExecutionMode,
|
||||
projects: mergedProjects,
|
||||
projectCandidates: mergedProjectCandidates,
|
||||
endpoint: config.endpoint,
|
||||
}),
|
||||
},
|
||||
{
|
||||
timeoutMs: config.heartbeatPostTimeoutMs ?? 4_000,
|
||||
timeoutMessage: "LOCAL_AGENT_HEARTBEAT_POST_TIMEOUT",
|
||||
},
|
||||
);
|
||||
|
||||
const text = await response.text();
|
||||
let json = null;
|
||||
@@ -379,11 +452,48 @@ async function resolveCodexAppServerCapabilityConnected(codexAppServerRuntime) {
|
||||
return canExecuteCommand(codexAppServerRuntime.command, codexAppServerRuntime.cwd || process.cwd());
|
||||
}
|
||||
|
||||
function refreshCodexAppServerCapabilityMetadataInBackground(config, runtime, codexAppServerRuntime, now) {
|
||||
if (runtime.codexAppServerCapabilityMetadataRefreshBusy) {
|
||||
return;
|
||||
}
|
||||
runtime.codexAppServerCapabilityMetadataRefreshBusy = true;
|
||||
runtime.codexAppServerCapabilityMetadataRefreshStartedAt = new Date(now).toISOString();
|
||||
void (async () => {
|
||||
try {
|
||||
const metadata = await discoverCodexAppServerCapabilities(codexAppServerRuntime);
|
||||
runtime.codexAppServerCapabilityMetadata = metadata;
|
||||
runtime.codexAppServerCapabilityMetadataAtMs = Date.now();
|
||||
runtime.codexAppServerCapabilityMetadataError = "";
|
||||
runtime.codexAppServerCapabilityMetadataRefreshCompletedAt = new Date().toISOString();
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
runtime.codexAppServerCapabilityMetadataError = message;
|
||||
runtime.codexAppServerCapabilityMetadataRefreshFailedAt = new Date().toISOString();
|
||||
await postAppLog(config, runtime, {
|
||||
level: "warn",
|
||||
category: "local_agent.codex_app_server_capability_discovery_failed",
|
||||
message: "Codex App Server 能力清单发现失败,设备心跳继续上报连接状态。",
|
||||
detail: message,
|
||||
mirrorToMaster: false,
|
||||
});
|
||||
} finally {
|
||||
runtime.codexAppServerCapabilityMetadataRefreshBusy = false;
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
async function resolveCodexAppServerCapabilityMetadata(config, runtime, codexAppServerRuntime, connected) {
|
||||
if (!connected || !codexAppServerRuntime?.enabled || codexAppServerRuntime.discoveryEnabled === false) {
|
||||
return undefined;
|
||||
}
|
||||
const now = Date.now();
|
||||
const discoveryGuard = shouldSkipCodexAppServerDiscovery({ config, runtime });
|
||||
if (discoveryGuard.skip) {
|
||||
runtime.codexAppServerCapabilityMetadataSkippedAt = new Date(now).toISOString();
|
||||
runtime.codexAppServerCapabilityMetadataSkipReason = discoveryGuard.reason;
|
||||
runtime.codexAppServerCapabilityMetadataSkipTaskId = discoveryGuard.activeTaskId;
|
||||
return runtime.codexAppServerCapabilityMetadata;
|
||||
}
|
||||
const ttlMs = codexAppServerRuntime.discoveryTtlMs ?? 300_000;
|
||||
if (
|
||||
runtime.codexAppServerCapabilityMetadata &&
|
||||
@@ -393,24 +503,31 @@ async function resolveCodexAppServerCapabilityMetadata(config, runtime, codexApp
|
||||
return runtime.codexAppServerCapabilityMetadata;
|
||||
}
|
||||
|
||||
try {
|
||||
const metadata = await discoverCodexAppServerCapabilities(codexAppServerRuntime);
|
||||
runtime.codexAppServerCapabilityMetadata = metadata;
|
||||
runtime.codexAppServerCapabilityMetadataAtMs = now;
|
||||
runtime.codexAppServerCapabilityMetadataError = "";
|
||||
return metadata;
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
runtime.codexAppServerCapabilityMetadataError = message;
|
||||
await postAppLog(config, runtime, {
|
||||
level: "warn",
|
||||
category: "local_agent.codex_app_server_capability_discovery_failed",
|
||||
message: "Codex App Server 能力清单发现失败,设备心跳继续上报连接状态。",
|
||||
detail: message,
|
||||
mirrorToMaster: false,
|
||||
});
|
||||
return runtime.codexAppServerCapabilityMetadata;
|
||||
if (config.codexAppServerDiscoveryInlineInHeartbeat === true) {
|
||||
try {
|
||||
const metadata = await discoverCodexAppServerCapabilities(codexAppServerRuntime);
|
||||
runtime.codexAppServerCapabilityMetadata = metadata;
|
||||
runtime.codexAppServerCapabilityMetadataAtMs = now;
|
||||
runtime.codexAppServerCapabilityMetadataError = "";
|
||||
return metadata;
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
runtime.codexAppServerCapabilityMetadataError = message;
|
||||
postAppLog(config, runtime, {
|
||||
level: "warn",
|
||||
category: "local_agent.codex_app_server_capability_discovery_failed",
|
||||
message: "Codex App Server 能力清单发现失败,设备心跳继续上报连接状态。",
|
||||
detail: message,
|
||||
mirrorToMaster: false,
|
||||
}).catch(() => null);
|
||||
return runtime.codexAppServerCapabilityMetadata;
|
||||
}
|
||||
}
|
||||
|
||||
refreshCodexAppServerCapabilityMetadataInBackground(config, runtime, codexAppServerRuntime, now);
|
||||
runtime.codexAppServerCapabilityMetadataSkippedAt = new Date(now).toISOString();
|
||||
runtime.codexAppServerCapabilityMetadataSkipReason = "background_refresh";
|
||||
return runtime.codexAppServerCapabilityMetadata;
|
||||
}
|
||||
|
||||
function deviceTokenHeaders(config, runtime) {
|
||||
@@ -420,7 +537,7 @@ function deviceTokenHeaders(config, runtime) {
|
||||
|
||||
async function postThreadContext(config, runtime, snapshot) {
|
||||
const workerId = snapshot.workerId ?? config.workerId ?? `${config.deviceId}-worker`;
|
||||
const response = await fetch(
|
||||
const response = await fetchWithTimeout(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/workers/${workerId}/thread-context`,
|
||||
{
|
||||
method: "POST",
|
||||
@@ -452,6 +569,10 @@ async function postThreadContext(config, runtime, snapshot) {
|
||||
capturedAt: new Date().toISOString(),
|
||||
}),
|
||||
},
|
||||
{
|
||||
timeoutMs: config.threadContextPostTimeoutMs ?? 3_000,
|
||||
timeoutMessage: "LOCAL_AGENT_THREAD_CONTEXT_POST_TIMEOUT",
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -525,7 +646,7 @@ async function discoverSkills(config) {
|
||||
}
|
||||
|
||||
async function postSkills(config, runtime, skills) {
|
||||
const response = await fetch(
|
||||
const response = await fetchWithTimeout(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/devices/${config.deviceId}/skills`,
|
||||
{
|
||||
method: "POST",
|
||||
@@ -535,6 +656,10 @@ async function postSkills(config, runtime, skills) {
|
||||
},
|
||||
body: JSON.stringify({ skills }),
|
||||
},
|
||||
{
|
||||
timeoutMs: config.skillsPostTimeoutMs ?? 3_000,
|
||||
timeoutMessage: "LOCAL_AGENT_SKILLS_POST_TIMEOUT",
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -547,17 +672,18 @@ async function postSkills(config, runtime, skills) {
|
||||
|
||||
async function postAppLog(config, runtime, payload) {
|
||||
try {
|
||||
await fetch(`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/app-logs`, {
|
||||
method: "POST",
|
||||
await postThroughReliableOutbox(config, {
|
||||
kind: "app.log",
|
||||
url: `${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/app-logs`,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...deviceTokenHeaders(config, runtime),
|
||||
},
|
||||
body: JSON.stringify({
|
||||
body: {
|
||||
deviceId: config.deviceId,
|
||||
source: "local_agent",
|
||||
...payload,
|
||||
}),
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// Ignore log transport failures to avoid blocking the agent loop.
|
||||
@@ -571,7 +697,7 @@ async function claimMasterAgentTask(config, runtime) {
|
||||
const waitMs = Number.isFinite(configuredWaitMs)
|
||||
? Math.max(0, Math.min(30_000, Math.floor(configuredWaitMs)))
|
||||
: 25_000;
|
||||
const response = await fetch(
|
||||
const response = await fetchWithTimeout(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/claim`,
|
||||
{
|
||||
method: "POST",
|
||||
@@ -581,6 +707,10 @@ async function claimMasterAgentTask(config, runtime) {
|
||||
},
|
||||
body: JSON.stringify({ deviceId: config.deviceId, waitMs }),
|
||||
},
|
||||
{
|
||||
timeoutMs: waitMs + Number(config.masterAgentClaimTimeoutPaddingMs ?? 5_000),
|
||||
timeoutMessage: "LOCAL_AGENT_MASTER_TASK_CLAIM_TIMEOUT",
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -591,52 +721,41 @@ async function claimMasterAgentTask(config, runtime) {
|
||||
}
|
||||
|
||||
async function completeMasterAgentTask(config, runtime, payload) {
|
||||
const response = await fetch(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${payload.taskId}/complete`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...deviceTokenHeaders(config, runtime),
|
||||
},
|
||||
body: JSON.stringify(buildMasterAgentTaskCompletionRequestBody(config, payload)),
|
||||
const result = await postThroughReliableOutbox(config, {
|
||||
kind: "task.complete",
|
||||
url: `${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${payload.taskId}/complete`,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...deviceTokenHeaders(config, runtime),
|
||||
},
|
||||
);
|
||||
body: buildMasterAgentTaskCompletionRequestBody(config, payload),
|
||||
});
|
||||
|
||||
return {
|
||||
ok: response.ok,
|
||||
status: response.status,
|
||||
body: await response.text(),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
async function postMasterAgentTaskProgress(config, runtime, payload) {
|
||||
const response = await fetch(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${payload.taskId}/progress`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...deviceTokenHeaders(config, runtime),
|
||||
},
|
||||
body: JSON.stringify({
|
||||
deviceId: config.deviceId,
|
||||
status: payload.status || "running",
|
||||
requestId: payload.requestId,
|
||||
executionProgress: payload.executionProgress,
|
||||
}),
|
||||
const result = await postThroughReliableOutbox(config, {
|
||||
kind: "task.progress",
|
||||
url: `${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${payload.taskId}/progress`,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...deviceTokenHeaders(config, runtime),
|
||||
},
|
||||
);
|
||||
body: {
|
||||
deviceId: config.deviceId,
|
||||
status: payload.status || "running",
|
||||
phase: payload.phase,
|
||||
requestId: payload.requestId,
|
||||
executionProgress: payload.executionProgress,
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
ok: response.ok,
|
||||
status: response.status,
|
||||
body: await response.text(),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
async function fetchMasterAgentTaskControlState(config, runtime, task) {
|
||||
const response = await fetch(
|
||||
const response = await fetchWithTimeout(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/master-agent/tasks/${task.taskId}/control-state`,
|
||||
{
|
||||
method: "GET",
|
||||
@@ -644,6 +763,10 @@ async function fetchMasterAgentTaskControlState(config, runtime, task) {
|
||||
...deviceTokenHeaders(config, runtime),
|
||||
},
|
||||
},
|
||||
{
|
||||
timeoutMs: config.masterAgentControlStateTimeoutMs ?? 3_000,
|
||||
timeoutMessage: "LOCAL_AGENT_MASTER_TASK_CONTROL_STATE_TIMEOUT",
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
return {
|
||||
@@ -685,7 +808,7 @@ function buildCodexRemoteControlMaintenanceReply(task, result) {
|
||||
}
|
||||
|
||||
async function claimSkillLifecycleRequest(config, runtime) {
|
||||
const response = await fetch(
|
||||
const response = await fetchWithTimeout(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/devices/${config.deviceId}/skill-requests/claim`,
|
||||
{
|
||||
method: "POST",
|
||||
@@ -695,6 +818,10 @@ async function claimSkillLifecycleRequest(config, runtime) {
|
||||
},
|
||||
body: JSON.stringify({ deviceId: config.deviceId }),
|
||||
},
|
||||
{
|
||||
timeoutMs: config.skillLifecycleClaimTimeoutMs ?? 5_000,
|
||||
timeoutMessage: "LOCAL_AGENT_SKILL_REQUEST_CLAIM_TIMEOUT",
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -705,7 +832,7 @@ async function claimSkillLifecycleRequest(config, runtime) {
|
||||
}
|
||||
|
||||
async function completeSkillLifecycleRequest(config, runtime, request, result) {
|
||||
const response = await fetch(
|
||||
const response = await fetchWithTimeout(
|
||||
`${config.controlPlaneUrl.replace(/\/$/, "")}/api/v1/devices/${config.deviceId}/skill-requests/${request.requestId}/complete`,
|
||||
{
|
||||
method: "POST",
|
||||
@@ -719,6 +846,10 @@ async function completeSkillLifecycleRequest(config, runtime, request, result) {
|
||||
error: result.error,
|
||||
}),
|
||||
},
|
||||
{
|
||||
timeoutMs: config.skillLifecycleCompleteTimeoutMs ?? 5_000,
|
||||
timeoutMessage: "LOCAL_AGENT_SKILL_REQUEST_COMPLETE_TIMEOUT",
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -897,9 +1028,60 @@ async function runMasterAgentTask(config, runtime, task) {
|
||||
status: "running",
|
||||
startedAt: new Date().toISOString(),
|
||||
};
|
||||
const emitTaskPhase = async (phase, executionProgress) => {
|
||||
try {
|
||||
const result = await postMasterAgentTaskProgress(config, runtime, {
|
||||
taskId: task.taskId,
|
||||
status: "running",
|
||||
phase,
|
||||
executionProgress: {
|
||||
...(executionProgress || {}),
|
||||
phase,
|
||||
},
|
||||
});
|
||||
return result;
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
status: 0,
|
||||
body: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
};
|
||||
const createLongRunningProgressHeartbeat = ({ phase = "awaiting_reply", getProgress } = {}) => {
|
||||
const intervalMs = normalizeLongRunningProgressIntervalMs(
|
||||
config.masterAgentLongTaskProgressIntervalMs ?? config.masterAgentProgressHeartbeatIntervalMs,
|
||||
);
|
||||
if (intervalMs <= 0) {
|
||||
return () => {};
|
||||
}
|
||||
const startedAtMs = Date.now();
|
||||
let heartbeatCount = 0;
|
||||
const sendHeartbeat = async () => {
|
||||
heartbeatCount += 1;
|
||||
await emitTaskPhase(
|
||||
phase,
|
||||
buildLongRunningCodexProgressSnapshot({
|
||||
task,
|
||||
phase,
|
||||
startedAtMs,
|
||||
nowMs: Date.now(),
|
||||
baseProgress: typeof getProgress === "function" ? getProgress() : undefined,
|
||||
heartbeatCount,
|
||||
}),
|
||||
);
|
||||
};
|
||||
const timer = setInterval(() => {
|
||||
void sendHeartbeat();
|
||||
}, intervalMs);
|
||||
return () => {
|
||||
clearInterval(timer);
|
||||
};
|
||||
};
|
||||
|
||||
try {
|
||||
let activeChild = null;
|
||||
await emitTaskPhase("executor_starting");
|
||||
const executionResult = await (async () => {
|
||||
if (canHandleCodexRemoteControlMaintenanceTask(task)) {
|
||||
const daemonResult = await runCodexRemoteControlDaemonAction(
|
||||
@@ -966,43 +1148,55 @@ async function runMasterAgentTask(config, runtime, task) {
|
||||
|
||||
const codexAppServerRunner = getCodexAppServerRunnerConfig(process.env, config);
|
||||
if (shouldUseCodexAppServerTaskRunner(codexAppServerRunner, task)) {
|
||||
const appServerResult = await executeCodexAppServerTask(
|
||||
{
|
||||
...codexAppServerRunner,
|
||||
interruptPollIntervalMs: normalizeInterruptPollIntervalMs(config),
|
||||
shouldInterruptActiveTurn: async () => {
|
||||
const controlState = await fetchMasterAgentTaskControlState(config, runtime, task);
|
||||
if (!controlState.ok) {
|
||||
let latestCodexExecutionProgress;
|
||||
const stopLongRunningProgressHeartbeat = createLongRunningProgressHeartbeat({
|
||||
phase: "awaiting_reply",
|
||||
getProgress: () => latestCodexExecutionProgress,
|
||||
});
|
||||
let appServerResult;
|
||||
try {
|
||||
appServerResult = await executeCodexAppServerTask(
|
||||
{
|
||||
...codexAppServerRunner,
|
||||
interruptPollIntervalMs: normalizeInterruptPollIntervalMs(config),
|
||||
shouldInterruptActiveTurn: async () => {
|
||||
const controlState = await fetchMasterAgentTaskControlState(config, runtime, task);
|
||||
if (!controlState.ok) {
|
||||
return false;
|
||||
}
|
||||
if (controlState.body?.canceled === true || controlState.body?.status === "canceled") {
|
||||
return {
|
||||
interrupt: true,
|
||||
reason: controlState.body?.cancelReason || "USER_CANCELED_TASK",
|
||||
};
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (controlState.body?.canceled === true || controlState.body?.status === "canceled") {
|
||||
return {
|
||||
interrupt: true,
|
||||
reason: controlState.body?.cancelReason || "USER_CANCELED_TASK",
|
||||
};
|
||||
}
|
||||
return false;
|
||||
},
|
||||
onProgress: async (executionProgress) => {
|
||||
const progressResult = await postMasterAgentTaskProgress(config, runtime, {
|
||||
taskId: task.taskId,
|
||||
status: "running",
|
||||
executionProgress,
|
||||
});
|
||||
if (!progressResult.ok) {
|
||||
await postAppLog(config, runtime, {
|
||||
projectId: task.projectId,
|
||||
level: "warn",
|
||||
category: "local_agent.codex_app_server_progress_failed",
|
||||
message: "Codex App Server 进度实时回写失败,完成回写仍会携带最终进度。",
|
||||
detail: progressResult.body,
|
||||
mirrorToMaster: false,
|
||||
},
|
||||
onProgress: async (executionProgress) => {
|
||||
latestCodexExecutionProgress = executionProgress;
|
||||
const progressResult = await postMasterAgentTaskProgress(config, runtime, {
|
||||
taskId: task.taskId,
|
||||
status: "running",
|
||||
phase: "awaiting_reply",
|
||||
executionProgress,
|
||||
});
|
||||
}
|
||||
if (!progressResult.ok) {
|
||||
await postAppLog(config, runtime, {
|
||||
projectId: task.projectId,
|
||||
level: "warn",
|
||||
category: "local_agent.codex_app_server_progress_failed",
|
||||
message: "Codex App Server 进度实时回写失败,完成回写仍会携带最终进度。",
|
||||
detail: progressResult.body,
|
||||
mirrorToMaster: false,
|
||||
});
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
task,
|
||||
);
|
||||
task,
|
||||
);
|
||||
} finally {
|
||||
stopLongRunningProgressHeartbeat();
|
||||
}
|
||||
if (appServerResult.status === "interrupted") {
|
||||
return {
|
||||
interruptedCompletion: {
|
||||
@@ -1114,6 +1308,7 @@ async function runMasterAgentTask(config, runtime, task) {
|
||||
},
|
||||
async () =>
|
||||
await new Promise((resolveTask, rejectTask) => {
|
||||
void emitTaskPhase("turn_started");
|
||||
const child = spawn("codex", codexExecution.args, {
|
||||
cwd: codexExecution.cwd,
|
||||
env: process.env,
|
||||
@@ -1193,6 +1388,7 @@ async function runMasterAgentTask(config, runtime, task) {
|
||||
return;
|
||||
}
|
||||
const { replyBody, dispatchExecutionCompletion, executionProgress } = executionResult;
|
||||
await emitTaskPhase("completing", executionProgress);
|
||||
|
||||
const completion = await completeMasterAgentTask(
|
||||
config,
|
||||
@@ -1210,6 +1406,24 @@ async function runMasterAgentTask(config, runtime, task) {
|
||||
executionProgress,
|
||||
}),
|
||||
);
|
||||
if (!completion.ok) {
|
||||
await emitTaskPhase("completing", {
|
||||
...(executionProgress && typeof executionProgress === "object" ? executionProgress : {}),
|
||||
title: "结果已生成,正在同步",
|
||||
warnings: [
|
||||
...(
|
||||
Array.isArray(executionProgress?.warnings)
|
||||
? executionProgress.warnings.filter(Boolean).slice(0, 6)
|
||||
: []
|
||||
),
|
||||
{
|
||||
id: "task-complete-sync-retrying",
|
||||
severity: "warning",
|
||||
message: "本机已生成任务结果,正在重试同步到 Boss 对话窗口。",
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
runtime.activeMasterTask = {
|
||||
taskId: task.taskId,
|
||||
status: completion.ok ? "completed" : "complete_failed",
|
||||
@@ -1218,10 +1432,14 @@ async function runMasterAgentTask(config, runtime, task) {
|
||||
};
|
||||
await postAppLog(config, runtime, {
|
||||
projectId: "master-agent",
|
||||
level: "info",
|
||||
category: "local_agent.master_agent_task_completed",
|
||||
message: `Master Codex Node 已完成主 Agent 任务 ${task.taskId}。`,
|
||||
detail: replyBody.slice(0, 280),
|
||||
level: completion.ok ? "info" : "warn",
|
||||
category: completion.ok
|
||||
? "local_agent.master_agent_task_completed"
|
||||
: "local_agent.master_agent_task_completion_sync_retrying",
|
||||
message: completion.ok
|
||||
? `Master Codex Node 已完成主 Agent 任务 ${task.taskId}。`
|
||||
: `Master Codex Node 已生成结果,正在重试同步主 Agent 任务 ${task.taskId}。`,
|
||||
detail: completion.ok ? replyBody.slice(0, 280) : completion.body,
|
||||
mirrorToMaster: false,
|
||||
});
|
||||
} catch (error) {
|
||||
@@ -1440,10 +1658,124 @@ const runtime = {
|
||||
lastProjectDiscoveryAt: null,
|
||||
lastProjectDiscoveryOk: false,
|
||||
lastProjectDiscoverySummary: null,
|
||||
lastReliableOutboxReplay: null,
|
||||
};
|
||||
|
||||
function replayReliableOutboxInBackground(config, runtime) {
|
||||
if (runtime.reliableOutboxReplayBusy) {
|
||||
return;
|
||||
}
|
||||
runtime.reliableOutboxReplayBusy = true;
|
||||
runtime.lastReliableOutboxReplayStartedAt = new Date().toISOString();
|
||||
void replayReliableOutbox(config, {
|
||||
limit: config.heartbeatOutboxReplayLimit ?? 5,
|
||||
requestTimeoutMs: config.heartbeatOutboxRequestTimeoutMs ?? 1_000,
|
||||
maxDurationMs: config.heartbeatOutboxReplayBudgetMs ?? 2_500,
|
||||
})
|
||||
.then((result) => {
|
||||
runtime.lastReliableOutboxReplay = result;
|
||||
runtime.lastReliableOutboxReplayAt = new Date().toISOString();
|
||||
})
|
||||
.catch((error) => {
|
||||
runtime.lastReliableOutboxReplay = {
|
||||
attempted: 0,
|
||||
sent: 0,
|
||||
retained: 0,
|
||||
stoppedByBudget: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
runtime.lastReliableOutboxReplayAt = new Date().toISOString();
|
||||
})
|
||||
.finally(() => {
|
||||
runtime.reliableOutboxReplayBusy = false;
|
||||
});
|
||||
}
|
||||
|
||||
function syncThreadContextsInBackground(config, runtime, snapshots) {
|
||||
if (runtime.threadContextSyncBusy || !Array.isArray(snapshots) || snapshots.length === 0) {
|
||||
return;
|
||||
}
|
||||
runtime.threadContextSyncBusy = true;
|
||||
runtime.lastThreadContextSyncStartedAt = new Date().toISOString();
|
||||
void (async () => {
|
||||
const results = [];
|
||||
for (const snapshot of snapshots) {
|
||||
let threadResult;
|
||||
try {
|
||||
threadResult = await postThreadContext(config, runtime, snapshot);
|
||||
} catch (error) {
|
||||
threadResult = {
|
||||
ok: false,
|
||||
status: 0,
|
||||
body: error instanceof Error ? error.message : String(error),
|
||||
workerId: snapshot.workerId ?? config.workerId ?? `${config.deviceId}-worker`,
|
||||
threadId: snapshot.threadId,
|
||||
};
|
||||
}
|
||||
results.push(threadResult);
|
||||
if (!threadResult.ok) {
|
||||
postAppLog(config, runtime, {
|
||||
projectId: snapshot.projectId,
|
||||
level: "error",
|
||||
category: "local_agent.thread_context_failed",
|
||||
message: `线程预算上报失败:${snapshot.threadId}`,
|
||||
detail: threadResult.body,
|
||||
mirrorToMaster: true,
|
||||
}).catch(() => null);
|
||||
}
|
||||
}
|
||||
runtime.lastThreadContextResults = results;
|
||||
runtime.lastThreadContextSyncAt = new Date().toISOString();
|
||||
})()
|
||||
.catch((error) => {
|
||||
runtime.lastThreadContextResults = [{
|
||||
ok: false,
|
||||
status: 0,
|
||||
body: error instanceof Error ? error.message : String(error),
|
||||
}];
|
||||
runtime.lastThreadContextSyncAt = new Date().toISOString();
|
||||
})
|
||||
.finally(() => {
|
||||
runtime.threadContextSyncBusy = false;
|
||||
});
|
||||
}
|
||||
|
||||
function syncSkillsInBackground(config, runtime) {
|
||||
if (runtime.skillSyncBusy) {
|
||||
return;
|
||||
}
|
||||
runtime.skillSyncBusy = true;
|
||||
runtime.lastSkillSyncStartedAt = new Date().toISOString();
|
||||
void (async () => {
|
||||
const skills = await discoverSkills(config);
|
||||
runtime.lastSkills = skills;
|
||||
const skillSyncResult = await postSkills(config, runtime, skills);
|
||||
runtime.lastSkillSyncAt = new Date().toISOString();
|
||||
runtime.lastSkillSyncOk = skillSyncResult.ok;
|
||||
runtime.lastSkillSyncStatus = skillSyncResult.status;
|
||||
runtime.lastSkillSyncBody = skillSyncResult.body;
|
||||
})()
|
||||
.catch((error) => {
|
||||
runtime.lastSkillSyncAt = new Date().toISOString();
|
||||
runtime.lastSkillSyncOk = false;
|
||||
runtime.lastSkillSyncStatus = 0;
|
||||
runtime.lastSkillSyncBody = error instanceof Error ? error.message : String(error);
|
||||
postAppLog(config, runtime, {
|
||||
level: "error",
|
||||
category: "local_agent.skills_sync_failed",
|
||||
message: "Skill 扫描或同步失败。",
|
||||
detail: runtime.lastSkillSyncBody,
|
||||
mirrorToMaster: true,
|
||||
}).catch(() => null);
|
||||
})
|
||||
.finally(() => {
|
||||
runtime.skillSyncBusy = false;
|
||||
});
|
||||
}
|
||||
|
||||
async function performHeartbeat() {
|
||||
try {
|
||||
replayReliableOutboxInBackground(config, runtime);
|
||||
const heartbeatProjects = await resolveHeartbeatProjects(config, runtime);
|
||||
const result = await postHeartbeat(config, runtime, heartbeatProjects);
|
||||
runtime.lastHeartbeatAt = new Date().toISOString();
|
||||
@@ -1464,43 +1796,8 @@ async function performHeartbeat() {
|
||||
}
|
||||
|
||||
const snapshots = Array.isArray(config.threadContexts) ? config.threadContexts : [];
|
||||
runtime.lastThreadContextResults = [];
|
||||
for (const snapshot of snapshots) {
|
||||
const threadResult = await postThreadContext(config, runtime, snapshot);
|
||||
runtime.lastThreadContextResults.push(threadResult);
|
||||
if (!threadResult.ok) {
|
||||
await postAppLog(config, runtime, {
|
||||
projectId: snapshot.projectId,
|
||||
level: "error",
|
||||
category: "local_agent.thread_context_failed",
|
||||
message: `线程预算上报失败:${snapshot.threadId}`,
|
||||
detail: threadResult.body,
|
||||
mirrorToMaster: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const skills = await discoverSkills(config);
|
||||
runtime.lastSkills = skills;
|
||||
const skillSyncResult = await postSkills(config, runtime, skills);
|
||||
runtime.lastSkillSyncAt = new Date().toISOString();
|
||||
runtime.lastSkillSyncOk = skillSyncResult.ok;
|
||||
runtime.lastSkillSyncStatus = skillSyncResult.status;
|
||||
runtime.lastSkillSyncBody = skillSyncResult.body;
|
||||
} catch (error) {
|
||||
runtime.lastSkillSyncAt = new Date().toISOString();
|
||||
runtime.lastSkillSyncOk = false;
|
||||
runtime.lastSkillSyncStatus = 0;
|
||||
runtime.lastSkillSyncBody = error instanceof Error ? error.message : String(error);
|
||||
await postAppLog(config, runtime, {
|
||||
level: "error",
|
||||
category: "local_agent.skills_sync_failed",
|
||||
message: "Skill 扫描或同步失败。",
|
||||
detail: runtime.lastSkillSyncBody,
|
||||
mirrorToMaster: true,
|
||||
});
|
||||
}
|
||||
syncThreadContextsInBackground(config, runtime, snapshots);
|
||||
syncSkillsInBackground(config, runtime);
|
||||
} catch (error) {
|
||||
runtime.lastHeartbeatAt = new Date().toISOString();
|
||||
runtime.lastHeartbeatOk = false;
|
||||
@@ -1516,7 +1813,10 @@ async function performHeartbeat() {
|
||||
}
|
||||
}
|
||||
|
||||
const heartbeat = createSerializedRunner(performHeartbeat);
|
||||
const heartbeat = createSerializedRunner(performHeartbeat, {
|
||||
timeoutMs: config.heartbeatTimeoutMs ?? 12_000,
|
||||
timeoutErrorMessage: "LOCAL_AGENT_HEARTBEAT_TIMEOUT",
|
||||
});
|
||||
const masterTaskPoll = createSerializedRunner(async () => {
|
||||
await pollMasterAgentTasks(config, runtime);
|
||||
});
|
||||
@@ -1646,13 +1946,19 @@ const server = createServer(async (request, response) => {
|
||||
|
||||
if (requestUrl.pathname === "/health") {
|
||||
response.writeHead(200, { "Content-Type": "application/json" });
|
||||
response.end(
|
||||
JSON.stringify({
|
||||
if (requestUrl.searchParams.get("verbose") === "1") {
|
||||
response.end(
|
||||
JSON.stringify({
|
||||
ok: true,
|
||||
service: "boss-local-agent",
|
||||
runtime,
|
||||
}),
|
||||
);
|
||||
return;
|
||||
}
|
||||
response.end(
|
||||
JSON.stringify(buildLocalAgentHealthSummary(config, runtime)),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1681,9 +1987,11 @@ const server = createServer(async (request, response) => {
|
||||
}
|
||||
|
||||
if (requestUrl.pathname === "/api/v1/heartbeat" && request.method === "POST") {
|
||||
await heartbeat();
|
||||
await heartbeat().catch((error) => {
|
||||
recordHeartbeatRunnerError(runtime, error);
|
||||
});
|
||||
response.writeHead(200, { "Content-Type": "application/json" });
|
||||
response.end(JSON.stringify({ ok: runtime.lastHeartbeatOk, runtime }));
|
||||
response.end(JSON.stringify(buildLocalAgentHealthSummary(config, runtime)));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1704,14 +2012,18 @@ server.listen(config.port, config.bindHost, () => {
|
||||
});
|
||||
|
||||
void (async () => {
|
||||
await heartbeat();
|
||||
await heartbeat().catch((error) => {
|
||||
recordHeartbeatRunnerError(runtime, error);
|
||||
});
|
||||
await masterTaskPoll();
|
||||
await skillLifecyclePoll();
|
||||
await bossAgentOtaPoll();
|
||||
})();
|
||||
|
||||
setInterval(() => {
|
||||
void heartbeat();
|
||||
void heartbeat().catch((error) => {
|
||||
recordHeartbeatRunnerError(runtime, error);
|
||||
});
|
||||
}, config.heartbeatIntervalMs ?? 15000);
|
||||
|
||||
setInterval(() => {
|
||||
|
||||
Reference in New Issue
Block a user