Compare commits
9 commits
PAP-878-cr
...
feat/PAPA-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a82f9ec7eb | ||
|
|
e58a2330a6 | ||
|
|
b3bccf3648 | ||
|
|
d747d847e4 | ||
|
|
8e384947aa | ||
|
|
aa88db7238 | ||
|
|
942d023148 | ||
|
|
1b70091dcd | ||
|
|
af5b980362 |
13 changed files with 12139 additions and 10 deletions
1
packages/db/src/migrations/0046_lethal_starfox.sql
Normal file
1
packages/db/src/migrations/0046_lethal_starfox.sql
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
ALTER TABLE "heartbeat_runs" ADD COLUMN "last_output_at" timestamp with time zone;
|
||||||
11857
packages/db/src/migrations/meta/0046_snapshot.json
Normal file
11857
packages/db/src/migrations/meta/0046_snapshot.json
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -323,6 +323,13 @@
|
||||||
"when": 1774530504348,
|
"when": 1774530504348,
|
||||||
"tag": "0045_workable_shockwave",
|
"tag": "0045_workable_shockwave",
|
||||||
"breakpoints": true
|
"breakpoints": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"idx": 46,
|
||||||
|
"version": "7",
|
||||||
|
"when": 1774589600897,
|
||||||
|
"tag": "0046_lethal_starfox",
|
||||||
|
"breakpoints": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -33,6 +33,7 @@ export const heartbeatRuns = pgTable(
|
||||||
externalRunId: text("external_run_id"),
|
externalRunId: text("external_run_id"),
|
||||||
processPid: integer("process_pid"),
|
processPid: integer("process_pid"),
|
||||||
processStartedAt: timestamp("process_started_at", { withTimezone: true }),
|
processStartedAt: timestamp("process_started_at", { withTimezone: true }),
|
||||||
|
lastOutputAt: timestamp("last_output_at", { withTimezone: true }),
|
||||||
retryOfRunId: uuid("retry_of_run_id").references((): AnyPgColumn => heartbeatRuns.id, {
|
retryOfRunId: uuid("retry_of_run_id").references((): AnyPgColumn => heartbeatRuns.id, {
|
||||||
onDelete: "set null",
|
onDelete: "set null",
|
||||||
}),
|
}),
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ export interface HeartbeatRun {
|
||||||
externalRunId: string | null;
|
externalRunId: string | null;
|
||||||
processPid: number | null;
|
processPid: number | null;
|
||||||
processStartedAt: Date | null;
|
processStartedAt: Date | null;
|
||||||
|
lastOutputAt: Date | null;
|
||||||
retryOfRunId: string | null;
|
retryOfRunId: string | null;
|
||||||
processLossRetryCount: number;
|
processLossRetryCount: number;
|
||||||
contextSnapshot: Record<string, unknown> | null;
|
contextSnapshot: Record<string, unknown> | null;
|
||||||
|
|
|
||||||
|
|
@ -73,6 +73,8 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
|
||||||
includeIssue?: boolean;
|
includeIssue?: boolean;
|
||||||
runErrorCode?: string | null;
|
runErrorCode?: string | null;
|
||||||
runError?: string | null;
|
runError?: string | null;
|
||||||
|
lastOutputAt?: Date | null;
|
||||||
|
startedAt?: Date | null;
|
||||||
}) {
|
}) {
|
||||||
const companyId = randomUUID();
|
const companyId = randomUUID();
|
||||||
const agentId = randomUUID();
|
const agentId = randomUUID();
|
||||||
|
|
@ -127,7 +129,8 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
|
||||||
processLossRetryCount: input?.processLossRetryCount ?? 0,
|
processLossRetryCount: input?.processLossRetryCount ?? 0,
|
||||||
errorCode: input?.runErrorCode ?? null,
|
errorCode: input?.runErrorCode ?? null,
|
||||||
error: input?.runError ?? null,
|
error: input?.runError ?? null,
|
||||||
startedAt: now,
|
startedAt: input?.startedAt ?? now,
|
||||||
|
lastOutputAt: input?.lastOutputAt ?? null,
|
||||||
updatedAt: new Date("2026-03-19T00:00:00.000Z"),
|
updatedAt: new Date("2026-03-19T00:00:00.000Z"),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -157,6 +160,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
|
||||||
const { runId, wakeupRequestId } = await seedRunFixture({
|
const { runId, wakeupRequestId } = await seedRunFixture({
|
||||||
processPid: child.pid ?? null,
|
processPid: child.pid ?? null,
|
||||||
includeIssue: false,
|
includeIssue: false,
|
||||||
|
lastOutputAt: new Date(),
|
||||||
});
|
});
|
||||||
const heartbeat = heartbeatService(db);
|
const heartbeat = heartbeatService(db);
|
||||||
|
|
||||||
|
|
@ -236,6 +240,82 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
|
||||||
expect(issue?.checkoutRunId).toBe(runId);
|
expect(issue?.checkoutRunId).toBe(runId);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("sets idle_warning when a run has no output for over 10 minutes", async () => {
|
||||||
|
const elevenMinutesAgo = new Date(Date.now() - 11 * 60 * 1000);
|
||||||
|
const child = spawnAliveProcess();
|
||||||
|
childProcesses.add(child);
|
||||||
|
const { runId } = await seedRunFixture({
|
||||||
|
includeIssue: false,
|
||||||
|
processPid: child.pid ?? null,
|
||||||
|
startedAt: elevenMinutesAgo,
|
||||||
|
lastOutputAt: elevenMinutesAgo,
|
||||||
|
});
|
||||||
|
// Register in runningProcesses so orphan reaper skips it; idle pass still checks it
|
||||||
|
runningProcesses.set(runId, { child, graceSec: 10 } as any);
|
||||||
|
const heartbeat = heartbeatService(db);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await heartbeat.reapOrphanedRuns();
|
||||||
|
expect(result.idleWarned).toBe(1);
|
||||||
|
expect(result.idleKilled).toBe(0);
|
||||||
|
|
||||||
|
const run = await heartbeat.getRun(runId);
|
||||||
|
expect(run?.status).toBe("running");
|
||||||
|
expect(run?.errorCode).toBe("idle_warning");
|
||||||
|
} finally {
|
||||||
|
runningProcesses.delete(runId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("kills a run that has been idle for over 15 minutes", async () => {
|
||||||
|
const sixteenMinutesAgo = new Date(Date.now() - 16 * 60 * 1000);
|
||||||
|
const child = spawnAliveProcess();
|
||||||
|
childProcesses.add(child);
|
||||||
|
const { runId } = await seedRunFixture({
|
||||||
|
processPid: child.pid ?? null,
|
||||||
|
startedAt: sixteenMinutesAgo,
|
||||||
|
lastOutputAt: sixteenMinutesAgo,
|
||||||
|
});
|
||||||
|
runningProcesses.set(runId, { child, graceSec: 10 } as any);
|
||||||
|
const heartbeat = heartbeatService(db);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await heartbeat.reapOrphanedRuns();
|
||||||
|
expect(result.idleKilled).toBe(1);
|
||||||
|
|
||||||
|
const run = await heartbeat.getRun(runId);
|
||||||
|
expect(run?.status).toBe("failed");
|
||||||
|
expect(run?.errorCode).toBe("idle_timeout");
|
||||||
|
} finally {
|
||||||
|
runningProcesses.delete(runId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not idle-warn a run with recent output", async () => {
|
||||||
|
const child = spawnAliveProcess();
|
||||||
|
childProcesses.add(child);
|
||||||
|
const { runId } = await seedRunFixture({
|
||||||
|
includeIssue: false,
|
||||||
|
processPid: child.pid ?? null,
|
||||||
|
startedAt: new Date(Date.now() - 20 * 60 * 1000),
|
||||||
|
lastOutputAt: new Date(),
|
||||||
|
});
|
||||||
|
runningProcesses.set(runId, { child, graceSec: 10 } as any);
|
||||||
|
const heartbeat = heartbeatService(db);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await heartbeat.reapOrphanedRuns();
|
||||||
|
expect(result.idleWarned).toBe(0);
|
||||||
|
expect(result.idleKilled).toBe(0);
|
||||||
|
|
||||||
|
const run = await heartbeat.getRun(runId);
|
||||||
|
expect(run?.status).toBe("running");
|
||||||
|
expect(run?.errorCode).toBeNull();
|
||||||
|
} finally {
|
||||||
|
runningProcesses.delete(runId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
it("clears the detached warning when the run reports activity again", async () => {
|
it("clears the detached warning when the run reports activity again", async () => {
|
||||||
const { runId } = await seedRunFixture({
|
const { runId } = await seedRunFixture({
|
||||||
includeIssue: false,
|
includeIssue: false,
|
||||||
|
|
@ -252,4 +332,22 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
|
||||||
expect(run?.errorCode).toBeNull();
|
expect(run?.errorCode).toBeNull();
|
||||||
expect(run?.error).toBeNull();
|
expect(run?.error).toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("clears the idle warning when the run reports activity again", async () => {
|
||||||
|
const { runId } = await seedRunFixture({
|
||||||
|
includeIssue: false,
|
||||||
|
runErrorCode: "idle_warning",
|
||||||
|
runError: "Idle warning: no output for 10 minutes",
|
||||||
|
});
|
||||||
|
const heartbeat = heartbeatService(db);
|
||||||
|
|
||||||
|
const updated = await heartbeat.reportRunActivity(runId);
|
||||||
|
expect(updated?.errorCode).toBeNull();
|
||||||
|
expect(updated?.error).toBeNull();
|
||||||
|
expect(updated?.lastOutputAt).toBeTruthy();
|
||||||
|
|
||||||
|
const run = await heartbeat.getRun(runId);
|
||||||
|
expect(run?.errorCode).toBeNull();
|
||||||
|
expect(run?.error).toBeNull();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -2083,6 +2083,8 @@ export function agentRoutes(db: Db) {
|
||||||
agentId: heartbeatRuns.agentId,
|
agentId: heartbeatRuns.agentId,
|
||||||
agentName: agentsTable.name,
|
agentName: agentsTable.name,
|
||||||
adapterType: agentsTable.adapterType,
|
adapterType: agentsTable.adapterType,
|
||||||
|
errorCode: heartbeatRuns.errorCode,
|
||||||
|
lastOutputAt: heartbeatRuns.lastOutputAt,
|
||||||
issueId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("issueId"),
|
issueId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("issueId"),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -2250,6 +2252,8 @@ export function agentRoutes(db: Db) {
|
||||||
agentId: heartbeatRuns.agentId,
|
agentId: heartbeatRuns.agentId,
|
||||||
agentName: agentsTable.name,
|
agentName: agentsTable.name,
|
||||||
adapterType: agentsTable.adapterType,
|
adapterType: agentsTable.adapterType,
|
||||||
|
errorCode: heartbeatRuns.errorCode,
|
||||||
|
lastOutputAt: heartbeatRuns.lastOutputAt,
|
||||||
})
|
})
|
||||||
.from(heartbeatRuns)
|
.from(heartbeatRuns)
|
||||||
.innerJoin(agentsTable, eq(heartbeatRuns.agentId, agentsTable.id))
|
.innerJoin(agentsTable, eq(heartbeatRuns.agentId, agentsTable.id))
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,10 @@ const HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT = 1;
|
||||||
const HEARTBEAT_MAX_CONCURRENT_RUNS_MAX = 10;
|
const HEARTBEAT_MAX_CONCURRENT_RUNS_MAX = 10;
|
||||||
const DEFERRED_WAKE_CONTEXT_KEY = "_paperclipWakeContext";
|
const DEFERRED_WAKE_CONTEXT_KEY = "_paperclipWakeContext";
|
||||||
const DETACHED_PROCESS_ERROR_CODE = "process_detached";
|
const DETACHED_PROCESS_ERROR_CODE = "process_detached";
|
||||||
|
const IDLE_WARNING_ERROR_CODE = "idle_warning";
|
||||||
|
const IDLE_WARNING_THRESHOLD_MS = 10 * 60 * 1000; // 10 minutes
|
||||||
|
const IDLE_KILL_THRESHOLD_MS = 15 * 60 * 1000; // 15 minutes
|
||||||
|
const IDLE_KILL_GRACE_SEC = 10; // seconds between SIGTERM and SIGKILL
|
||||||
const startLocksByAgent = new Map<string, Promise<void>>();
|
const startLocksByAgent = new Map<string, Promise<void>>();
|
||||||
const REPO_ONLY_CWD_SENTINEL = "/__paperclip_repo_only__";
|
const REPO_ONLY_CWD_SENTINEL = "/__paperclip_repo_only__";
|
||||||
const MANAGED_WORKSPACE_GIT_CLONE_TIMEOUT_MS = 10 * 60 * 1000;
|
const MANAGED_WORKSPACE_GIT_CLONE_TIMEOUT_MS = 10 * 60 * 1000;
|
||||||
|
|
@ -1484,14 +1488,20 @@ export function heartbeatService(db: Db) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function clearDetachedRunWarning(runId: string) {
|
async function clearDetachedRunWarning(runId: string) {
|
||||||
|
// Clear both detached process warnings and idle warnings when activity is reported
|
||||||
const updated = await db
|
const updated = await db
|
||||||
.update(heartbeatRuns)
|
.update(heartbeatRuns)
|
||||||
.set({
|
.set({
|
||||||
error: null,
|
error: null,
|
||||||
errorCode: null,
|
errorCode: null,
|
||||||
|
lastOutputAt: new Date(),
|
||||||
updatedAt: new Date(),
|
updatedAt: new Date(),
|
||||||
})
|
})
|
||||||
.where(and(eq(heartbeatRuns.id, runId), eq(heartbeatRuns.status, "running"), eq(heartbeatRuns.errorCode, DETACHED_PROCESS_ERROR_CODE)))
|
.where(and(
|
||||||
|
eq(heartbeatRuns.id, runId),
|
||||||
|
eq(heartbeatRuns.status, "running"),
|
||||||
|
inArray(heartbeatRuns.errorCode, [DETACHED_PROCESS_ERROR_CODE, IDLE_WARNING_ERROR_CODE]),
|
||||||
|
))
|
||||||
.returning()
|
.returning()
|
||||||
.then((rows) => rows[0] ?? null);
|
.then((rows) => rows[0] ?? null);
|
||||||
if (!updated) return null;
|
if (!updated) return null;
|
||||||
|
|
@ -1500,7 +1510,7 @@ export function heartbeatService(db: Db) {
|
||||||
eventType: "lifecycle",
|
eventType: "lifecycle",
|
||||||
stream: "system",
|
stream: "system",
|
||||||
level: "info",
|
level: "info",
|
||||||
message: "Detached child process reported activity; cleared detached warning",
|
message: "Activity reported; cleared run warning",
|
||||||
});
|
});
|
||||||
return updated;
|
return updated;
|
||||||
}
|
}
|
||||||
|
|
@ -1826,7 +1836,100 @@ export function heartbeatService(db: Db) {
|
||||||
if (reaped.length > 0) {
|
if (reaped.length > 0) {
|
||||||
logger.warn({ reapedCount: reaped.length, runIds: reaped }, "reaped orphaned heartbeat runs");
|
logger.warn({ reapedCount: reaped.length, runIds: reaped }, "reaped orphaned heartbeat runs");
|
||||||
}
|
}
|
||||||
return { reaped: reaped.length, runIds: reaped };
|
|
||||||
|
// ── Idle-timeout pass: reuse activeRuns query result (no duplicate DB call) ──
|
||||||
|
const idleWarned: string[] = [];
|
||||||
|
const idleKilled: string[] = [];
|
||||||
|
|
||||||
|
for (const { run, adapterType } of activeRuns) {
|
||||||
|
// Idle check applies to all running runs, including tracked ones
|
||||||
|
if (reaped.includes(run.id)) continue;
|
||||||
|
const tracksLocalChild = isTrackedLocalChildProcessAdapter(adapterType);
|
||||||
|
if (!tracksLocalChild) continue;
|
||||||
|
|
||||||
|
// Determine the most recent activity timestamp
|
||||||
|
const lastActivity = run.lastOutputAt
|
||||||
|
? new Date(run.lastOutputAt).getTime()
|
||||||
|
: run.startedAt
|
||||||
|
? new Date(run.startedAt).getTime()
|
||||||
|
: new Date(run.createdAt).getTime();
|
||||||
|
const idleMs = now.getTime() - lastActivity;
|
||||||
|
|
||||||
|
if (idleMs >= IDLE_KILL_THRESHOLD_MS) {
|
||||||
|
// Kill: SIGTERM then SIGKILL
|
||||||
|
const tracked = runningProcesses.get(run.id);
|
||||||
|
const pid = tracked?.child?.pid ?? run.processPid;
|
||||||
|
const killMessage = `Idle timeout: no output for ${Math.round(idleMs / 60_000)} minutes — killing process (pid ${pid ?? "unknown"})`;
|
||||||
|
logger.warn({ runId: run.id, pid, idleMs }, killMessage);
|
||||||
|
|
||||||
|
if (tracked?.child) {
|
||||||
|
tracked.child.kill("SIGTERM");
|
||||||
|
setTimeout(() => {
|
||||||
|
try { if (!tracked.child.killed) tracked.child.kill("SIGKILL"); } catch {}
|
||||||
|
}, IDLE_KILL_GRACE_SEC * 1000);
|
||||||
|
} else if (pid && isProcessAlive(pid)) {
|
||||||
|
try { process.kill(pid, "SIGTERM"); } catch {}
|
||||||
|
setTimeout(() => {
|
||||||
|
try { if (isProcessAlive(pid)) process.kill(pid, "SIGKILL"); } catch {}
|
||||||
|
}, IDLE_KILL_GRACE_SEC * 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark the run as failed
|
||||||
|
let killedRun = await setRunStatus(run.id, "failed", {
|
||||||
|
error: killMessage,
|
||||||
|
errorCode: "idle_timeout",
|
||||||
|
finishedAt: now,
|
||||||
|
});
|
||||||
|
await setWakeupStatus(run.wakeupRequestId, "failed", {
|
||||||
|
finishedAt: now,
|
||||||
|
error: killMessage,
|
||||||
|
});
|
||||||
|
if (!killedRun) killedRun = await getRun(run.id);
|
||||||
|
if (killedRun) {
|
||||||
|
await appendRunEvent(killedRun, await nextRunEventSeq(killedRun.id), {
|
||||||
|
eventType: "lifecycle",
|
||||||
|
stream: "system",
|
||||||
|
level: "error",
|
||||||
|
message: killMessage,
|
||||||
|
payload: { pid, idleMs },
|
||||||
|
});
|
||||||
|
await releaseIssueExecutionAndPromote(killedRun);
|
||||||
|
}
|
||||||
|
|
||||||
|
await finalizeAgentStatus(run.agentId, "failed");
|
||||||
|
await startNextQueuedRunForAgent(run.agentId);
|
||||||
|
runningProcesses.delete(run.id);
|
||||||
|
idleKilled.push(run.id);
|
||||||
|
} else if (idleMs >= IDLE_WARNING_THRESHOLD_MS && run.errorCode !== IDLE_WARNING_ERROR_CODE) {
|
||||||
|
// Warning: approaching idle timeout
|
||||||
|
const warnMessage = `Idle warning: no output for ${Math.round(idleMs / 60_000)} minutes — will be killed at ${IDLE_KILL_THRESHOLD_MS / 60_000} min`;
|
||||||
|
logger.warn({ runId: run.id, idleMs }, warnMessage);
|
||||||
|
|
||||||
|
const warnedRun = await setRunStatus(run.id, "running", {
|
||||||
|
error: warnMessage,
|
||||||
|
errorCode: IDLE_WARNING_ERROR_CODE,
|
||||||
|
});
|
||||||
|
if (warnedRun) {
|
||||||
|
await appendRunEvent(warnedRun, await nextRunEventSeq(warnedRun.id), {
|
||||||
|
eventType: "lifecycle",
|
||||||
|
stream: "system",
|
||||||
|
level: "warn",
|
||||||
|
message: warnMessage,
|
||||||
|
payload: { idleMs },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
idleWarned.push(run.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idleWarned.length > 0) {
|
||||||
|
logger.info({ count: idleWarned.length, runIds: idleWarned }, "idle-warned runs");
|
||||||
|
}
|
||||||
|
if (idleKilled.length > 0) {
|
||||||
|
logger.warn({ count: idleKilled.length, runIds: idleKilled }, "idle-killed runs");
|
||||||
|
}
|
||||||
|
|
||||||
|
return { reaped: reaped.length, runIds: reaped, idleWarned: idleWarned.length, idleKilled: idleKilled.length };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function resumeQueuedRuns() {
|
async function resumeQueuedRuns() {
|
||||||
|
|
@ -2397,11 +2500,38 @@ export function heartbeatService(db: Db) {
|
||||||
.where(eq(heartbeatRuns.id, runId));
|
.where(eq(heartbeatRuns.id, runId));
|
||||||
|
|
||||||
const currentUserRedactionOptions = await getCurrentUserRedactionOptions();
|
const currentUserRedactionOptions = await getCurrentUserRedactionOptions();
|
||||||
|
let lastOutputAtFlushPending = false;
|
||||||
|
let lastOutputAtLatest: Date | null = null;
|
||||||
|
let lastOutputAtTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
const onLog = async (stream: "stdout" | "stderr", chunk: string) => {
|
const onLog = async (stream: "stdout" | "stderr", chunk: string) => {
|
||||||
const sanitizedChunk = redactCurrentUserText(chunk, currentUserRedactionOptions);
|
const sanitizedChunk = redactCurrentUserText(chunk, currentUserRedactionOptions);
|
||||||
if (stream === "stdout") stdoutExcerpt = appendExcerpt(stdoutExcerpt, sanitizedChunk);
|
if (stream === "stdout") stdoutExcerpt = appendExcerpt(stdoutExcerpt, sanitizedChunk);
|
||||||
if (stream === "stderr") stderrExcerpt = appendExcerpt(stderrExcerpt, sanitizedChunk);
|
if (stream === "stderr") stderrExcerpt = appendExcerpt(stderrExcerpt, sanitizedChunk);
|
||||||
const ts = new Date().toISOString();
|
const ts = new Date().toISOString();
|
||||||
|
lastOutputAtLatest = new Date(ts);
|
||||||
|
|
||||||
|
// Batch lastOutputAt writes — flush at most once per 30 seconds to avoid DB churn
|
||||||
|
if (!lastOutputAtFlushPending) {
|
||||||
|
lastOutputAtFlushPending = true;
|
||||||
|
// Flush immediately on first output
|
||||||
|
await db.update(heartbeatRuns)
|
||||||
|
.set({ lastOutputAt: lastOutputAtLatest, updatedAt: new Date() })
|
||||||
|
.where(eq(heartbeatRuns.id, runId));
|
||||||
|
// Clear idle_warning if set — output resumed
|
||||||
|
await db.update(heartbeatRuns)
|
||||||
|
.set({ errorCode: null, error: null })
|
||||||
|
.where(and(eq(heartbeatRuns.id, runId), eq(heartbeatRuns.errorCode, IDLE_WARNING_ERROR_CODE)));
|
||||||
|
lastOutputAtTimer = setTimeout(() => {
|
||||||
|
lastOutputAtFlushPending = false;
|
||||||
|
lastOutputAtTimer = null;
|
||||||
|
if (!lastOutputAtLatest) return;
|
||||||
|
db.update(heartbeatRuns)
|
||||||
|
.set({ lastOutputAt: lastOutputAtLatest, updatedAt: new Date() })
|
||||||
|
.where(eq(heartbeatRuns.id, runId))
|
||||||
|
.then(() => {})
|
||||||
|
.catch((err) => logger.warn({ err, runId }, "failed to flush lastOutputAt"));
|
||||||
|
}, 30_000);
|
||||||
|
}
|
||||||
|
|
||||||
if (handle) {
|
if (handle) {
|
||||||
await runLogStore.append(handle, {
|
await runLogStore.append(handle, {
|
||||||
|
|
@ -2525,6 +2655,8 @@ export function heartbeatService(db: Db) {
|
||||||
},
|
},
|
||||||
authToken: authToken ?? undefined,
|
authToken: authToken ?? undefined,
|
||||||
});
|
});
|
||||||
|
// Clear the debounce timer now that the run is complete
|
||||||
|
if (lastOutputAtTimer) { clearTimeout(lastOutputAtTimer); lastOutputAtTimer = null; }
|
||||||
const adapterManagedRuntimeServices = adapterResult.runtimeServices
|
const adapterManagedRuntimeServices = adapterResult.runtimeServices
|
||||||
? await persistAdapterManagedRuntimeServices({
|
? await persistAdapterManagedRuntimeServices({
|
||||||
db,
|
db,
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,8 @@ export interface LiveRunForIssue {
|
||||||
agentId: string;
|
agentId: string;
|
||||||
agentName: string;
|
agentName: string;
|
||||||
adapterType: string;
|
adapterType: string;
|
||||||
|
errorCode?: string | null;
|
||||||
|
lastOutputAt?: string | null;
|
||||||
issueId?: string | null;
|
issueId?: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -92,9 +92,11 @@ function AgentRunCard({
|
||||||
return (
|
return (
|
||||||
<div className={cn(
|
<div className={cn(
|
||||||
"flex h-[320px] flex-col overflow-hidden rounded-xl border shadow-sm",
|
"flex h-[320px] flex-col overflow-hidden rounded-xl border shadow-sm",
|
||||||
isActive
|
run.errorCode === "idle_warning"
|
||||||
? "border-cyan-500/25 bg-cyan-500/[0.04] shadow-[0_16px_40px_rgba(6,182,212,0.08)]"
|
? "border-orange-500/25 bg-orange-500/[0.04] shadow-[0_16px_40px_rgba(249,115,22,0.08)]"
|
||||||
: "border-border bg-background/70",
|
: isActive
|
||||||
|
? "border-cyan-500/25 bg-cyan-500/[0.04] shadow-[0_16px_40px_rgba(6,182,212,0.08)]"
|
||||||
|
: "border-border bg-background/70",
|
||||||
)}>
|
)}>
|
||||||
<div className="border-b border-border/60 px-3 py-3">
|
<div className="border-b border-border/60 px-3 py-3">
|
||||||
<div className="flex items-start justify-between gap-2">
|
<div className="flex items-start justify-between gap-2">
|
||||||
|
|
@ -112,6 +114,11 @@ function AgentRunCard({
|
||||||
</div>
|
</div>
|
||||||
<div className="mt-2 flex items-center gap-2 text-[11px] text-muted-foreground">
|
<div className="mt-2 flex items-center gap-2 text-[11px] text-muted-foreground">
|
||||||
<span>{isActive ? "Live now" : run.finishedAt ? `Finished ${relativeTime(run.finishedAt)}` : `Started ${relativeTime(run.createdAt)}`}</span>
|
<span>{isActive ? "Live now" : run.finishedAt ? `Finished ${relativeTime(run.finishedAt)}` : `Started ${relativeTime(run.createdAt)}`}</span>
|
||||||
|
{run.errorCode === "idle_warning" && (
|
||||||
|
<span className="rounded-full border border-orange-500/20 bg-orange-500/[0.06] px-1.5 py-0.5 text-[10px] font-medium text-orange-700 dark:text-orange-300">
|
||||||
|
Idle
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@ import { Link } from "@/lib/router";
|
||||||
import { useQuery, useQueryClient } from "@tanstack/react-query";
|
import { useQuery, useQueryClient } from "@tanstack/react-query";
|
||||||
import { heartbeatsApi, type LiveRunForIssue } from "../api/heartbeats";
|
import { heartbeatsApi, type LiveRunForIssue } from "../api/heartbeats";
|
||||||
import { queryKeys } from "../lib/queryKeys";
|
import { queryKeys } from "../lib/queryKeys";
|
||||||
import { formatDateTime } from "../lib/utils";
|
import { formatDateTime, relativeTime } from "../lib/utils";
|
||||||
import { ExternalLink, Square } from "lucide-react";
|
import { AlertTriangle, ExternalLink, Square } from "lucide-react";
|
||||||
import { Identity } from "./Identity";
|
import { Identity } from "./Identity";
|
||||||
import { StatusBadge } from "./StatusBadge";
|
import { StatusBadge } from "./StatusBadge";
|
||||||
import { RunTranscriptView } from "./transcript/RunTranscriptView";
|
import { RunTranscriptView } from "./transcript/RunTranscriptView";
|
||||||
|
|
@ -24,6 +24,7 @@ function isRunActive(status: string): boolean {
|
||||||
return status === "queued" || status === "running";
|
return status === "queued" || status === "running";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export function LiveRunWidget({ issueId, companyId }: LiveRunWidgetProps) {
|
export function LiveRunWidget({ issueId, companyId }: LiveRunWidgetProps) {
|
||||||
const queryClient = useQueryClient();
|
const queryClient = useQueryClient();
|
||||||
const [cancellingRunIds, setCancellingRunIds] = useState(new Set<string>());
|
const [cancellingRunIds, setCancellingRunIds] = useState(new Set<string>());
|
||||||
|
|
@ -59,6 +60,8 @@ export function LiveRunWidget({ issueId, companyId }: LiveRunWidgetProps) {
|
||||||
agentId: activeRun.agentId,
|
agentId: activeRun.agentId,
|
||||||
agentName: activeRun.agentName,
|
agentName: activeRun.agentName,
|
||||||
adapterType: activeRun.adapterType,
|
adapterType: activeRun.adapterType,
|
||||||
|
errorCode: activeRun.errorCode,
|
||||||
|
lastOutputAt: toIsoString(activeRun.lastOutputAt),
|
||||||
issueId,
|
issueId,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -116,7 +119,21 @@ export function LiveRunWidget({ issueId, companyId }: LiveRunWidgetProps) {
|
||||||
{run.id.slice(0, 8)}
|
{run.id.slice(0, 8)}
|
||||||
</Link>
|
</Link>
|
||||||
<StatusBadge status={run.status} />
|
<StatusBadge status={run.status} />
|
||||||
<span>{formatDateTime(run.startedAt ?? run.createdAt)}</span>
|
{run.errorCode === "idle_warning" && (
|
||||||
|
<span className="inline-flex items-center gap-1 rounded-full border border-orange-500/20 bg-orange-500/[0.06] px-2 py-0.5 text-[11px] font-medium text-orange-700 dark:text-orange-300">
|
||||||
|
<AlertTriangle className="h-3 w-3" />
|
||||||
|
Idle
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{isRunActive(run.status) && run.lastOutputAt ? (
|
||||||
|
<span title={`Last output: ${formatDateTime(run.lastOutputAt)}`}>
|
||||||
|
Last output {relativeTime(run.lastOutputAt)}
|
||||||
|
</span>
|
||||||
|
) : run.finishedAt ? (
|
||||||
|
<span>{formatDateTime(run.finishedAt)}</span>
|
||||||
|
) : (
|
||||||
|
<span>{formatDateTime(run.startedAt ?? run.createdAt)}</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -113,6 +113,7 @@ function makeRun(id: string, status: HeartbeatRun["status"], createdAt: string,
|
||||||
externalRunId: null,
|
externalRunId: null,
|
||||||
processPid: null,
|
processPid: null,
|
||||||
processStartedAt: null,
|
processStartedAt: null,
|
||||||
|
lastOutputAt: null,
|
||||||
retryOfRunId: null,
|
retryOfRunId: null,
|
||||||
processLossRetryCount: 0,
|
processLossRetryCount: 0,
|
||||||
stdoutExcerpt: null,
|
stdoutExcerpt: null,
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,7 @@ export const statusBadge: Record<string, string> = {
|
||||||
// Run statuses
|
// Run statuses
|
||||||
failed: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",
|
failed: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",
|
||||||
timed_out: "bg-orange-100 text-orange-700 dark:bg-orange-900/50 dark:text-orange-300",
|
timed_out: "bg-orange-100 text-orange-700 dark:bg-orange-900/50 dark:text-orange-300",
|
||||||
|
idle_timeout: "bg-orange-100 text-orange-700 dark:bg-orange-900/50 dark:text-orange-300",
|
||||||
succeeded: "bg-green-100 text-green-700 dark:bg-green-900/50 dark:text-green-300",
|
succeeded: "bg-green-100 text-green-700 dark:bg-green-900/50 dark:text-green-300",
|
||||||
error: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",
|
error: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",
|
||||||
terminated: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",
|
terminated: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue