Compare commits

..

No commits in common. "a82f9ec7eb3347ad68ca7dfe36874ced784121c1" and "af5b980362e4ac5f682f4241c5e6abbf94eb5ba8" have entirely different histories.

13 changed files with 10 additions and 12139 deletions

View file

@ -1 +0,0 @@
ALTER TABLE "heartbeat_runs" ADD COLUMN "last_output_at" timestamp with time zone;

File diff suppressed because it is too large Load diff

View file

@ -323,13 +323,6 @@
"when": 1774530504348,
"tag": "0045_workable_shockwave",
"breakpoints": true
},
{
"idx": 46,
"version": "7",
"when": 1774589600897,
"tag": "0046_lethal_starfox",
"breakpoints": true
}
]
}

View file

@ -33,7 +33,6 @@ export const heartbeatRuns = pgTable(
externalRunId: text("external_run_id"),
processPid: integer("process_pid"),
processStartedAt: timestamp("process_started_at", { withTimezone: true }),
lastOutputAt: timestamp("last_output_at", { withTimezone: true }),
retryOfRunId: uuid("retry_of_run_id").references((): AnyPgColumn => heartbeatRuns.id, {
onDelete: "set null",
}),

View file

@ -35,7 +35,6 @@ export interface HeartbeatRun {
externalRunId: string | null;
processPid: number | null;
processStartedAt: Date | null;
lastOutputAt: Date | null;
retryOfRunId: string | null;
processLossRetryCount: number;
contextSnapshot: Record<string, unknown> | null;

View file

@ -73,8 +73,6 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
includeIssue?: boolean;
runErrorCode?: string | null;
runError?: string | null;
lastOutputAt?: Date | null;
startedAt?: Date | null;
}) {
const companyId = randomUUID();
const agentId = randomUUID();
@ -129,8 +127,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
processLossRetryCount: input?.processLossRetryCount ?? 0,
errorCode: input?.runErrorCode ?? null,
error: input?.runError ?? null,
startedAt: input?.startedAt ?? now,
lastOutputAt: input?.lastOutputAt ?? null,
startedAt: now,
updatedAt: new Date("2026-03-19T00:00:00.000Z"),
});
@ -160,7 +157,6 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
const { runId, wakeupRequestId } = await seedRunFixture({
processPid: child.pid ?? null,
includeIssue: false,
lastOutputAt: new Date(),
});
const heartbeat = heartbeatService(db);
@ -240,82 +236,6 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
expect(issue?.checkoutRunId).toBe(runId);
});
it("sets idle_warning when a run has no output for over 10 minutes", async () => {
const elevenMinutesAgo = new Date(Date.now() - 11 * 60 * 1000);
const child = spawnAliveProcess();
childProcesses.add(child);
const { runId } = await seedRunFixture({
includeIssue: false,
processPid: child.pid ?? null,
startedAt: elevenMinutesAgo,
lastOutputAt: elevenMinutesAgo,
});
// Register in runningProcesses so orphan reaper skips it; idle pass still checks it
runningProcesses.set(runId, { child, graceSec: 10 } as any);
const heartbeat = heartbeatService(db);
try {
const result = await heartbeat.reapOrphanedRuns();
expect(result.idleWarned).toBe(1);
expect(result.idleKilled).toBe(0);
const run = await heartbeat.getRun(runId);
expect(run?.status).toBe("running");
expect(run?.errorCode).toBe("idle_warning");
} finally {
runningProcesses.delete(runId);
}
});
it("kills a run that has been idle for over 15 minutes", async () => {
const sixteenMinutesAgo = new Date(Date.now() - 16 * 60 * 1000);
const child = spawnAliveProcess();
childProcesses.add(child);
const { runId } = await seedRunFixture({
processPid: child.pid ?? null,
startedAt: sixteenMinutesAgo,
lastOutputAt: sixteenMinutesAgo,
});
runningProcesses.set(runId, { child, graceSec: 10 } as any);
const heartbeat = heartbeatService(db);
try {
const result = await heartbeat.reapOrphanedRuns();
expect(result.idleKilled).toBe(1);
const run = await heartbeat.getRun(runId);
expect(run?.status).toBe("failed");
expect(run?.errorCode).toBe("idle_timeout");
} finally {
runningProcesses.delete(runId);
}
});
it("does not idle-warn a run with recent output", async () => {
const child = spawnAliveProcess();
childProcesses.add(child);
const { runId } = await seedRunFixture({
includeIssue: false,
processPid: child.pid ?? null,
startedAt: new Date(Date.now() - 20 * 60 * 1000),
lastOutputAt: new Date(),
});
runningProcesses.set(runId, { child, graceSec: 10 } as any);
const heartbeat = heartbeatService(db);
try {
const result = await heartbeat.reapOrphanedRuns();
expect(result.idleWarned).toBe(0);
expect(result.idleKilled).toBe(0);
const run = await heartbeat.getRun(runId);
expect(run?.status).toBe("running");
expect(run?.errorCode).toBeNull();
} finally {
runningProcesses.delete(runId);
}
});
it("clears the detached warning when the run reports activity again", async () => {
const { runId } = await seedRunFixture({
includeIssue: false,
@ -332,22 +252,4 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
expect(run?.errorCode).toBeNull();
expect(run?.error).toBeNull();
});
it("clears the idle warning when the run reports activity again", async () => {
const { runId } = await seedRunFixture({
includeIssue: false,
runErrorCode: "idle_warning",
runError: "Idle warning: no output for 10 minutes",
});
const heartbeat = heartbeatService(db);
const updated = await heartbeat.reportRunActivity(runId);
expect(updated?.errorCode).toBeNull();
expect(updated?.error).toBeNull();
expect(updated?.lastOutputAt).toBeTruthy();
const run = await heartbeat.getRun(runId);
expect(run?.errorCode).toBeNull();
expect(run?.error).toBeNull();
});
});

View file

@ -2083,8 +2083,6 @@ export function agentRoutes(db: Db) {
agentId: heartbeatRuns.agentId,
agentName: agentsTable.name,
adapterType: agentsTable.adapterType,
errorCode: heartbeatRuns.errorCode,
lastOutputAt: heartbeatRuns.lastOutputAt,
issueId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("issueId"),
};
@ -2252,8 +2250,6 @@ export function agentRoutes(db: Db) {
agentId: heartbeatRuns.agentId,
agentName: agentsTable.name,
adapterType: agentsTable.adapterType,
errorCode: heartbeatRuns.errorCode,
lastOutputAt: heartbeatRuns.lastOutputAt,
})
.from(heartbeatRuns)
.innerJoin(agentsTable, eq(heartbeatRuns.agentId, agentsTable.id))

View file

@ -63,10 +63,6 @@ const HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT = 1;
const HEARTBEAT_MAX_CONCURRENT_RUNS_MAX = 10;
const DEFERRED_WAKE_CONTEXT_KEY = "_paperclipWakeContext";
const DETACHED_PROCESS_ERROR_CODE = "process_detached";
const IDLE_WARNING_ERROR_CODE = "idle_warning";
const IDLE_WARNING_THRESHOLD_MS = 10 * 60 * 1000; // 10 minutes
const IDLE_KILL_THRESHOLD_MS = 15 * 60 * 1000; // 15 minutes
const IDLE_KILL_GRACE_SEC = 10; // seconds between SIGTERM and SIGKILL
const startLocksByAgent = new Map<string, Promise<void>>();
const REPO_ONLY_CWD_SENTINEL = "/__paperclip_repo_only__";
const MANAGED_WORKSPACE_GIT_CLONE_TIMEOUT_MS = 10 * 60 * 1000;
@ -1488,20 +1484,14 @@ export function heartbeatService(db: Db) {
}
async function clearDetachedRunWarning(runId: string) {
// Clear both detached process warnings and idle warnings when activity is reported
const updated = await db
.update(heartbeatRuns)
.set({
error: null,
errorCode: null,
lastOutputAt: new Date(),
updatedAt: new Date(),
})
.where(and(
eq(heartbeatRuns.id, runId),
eq(heartbeatRuns.status, "running"),
inArray(heartbeatRuns.errorCode, [DETACHED_PROCESS_ERROR_CODE, IDLE_WARNING_ERROR_CODE]),
))
.where(and(eq(heartbeatRuns.id, runId), eq(heartbeatRuns.status, "running"), eq(heartbeatRuns.errorCode, DETACHED_PROCESS_ERROR_CODE)))
.returning()
.then((rows) => rows[0] ?? null);
if (!updated) return null;
@ -1510,7 +1500,7 @@ export function heartbeatService(db: Db) {
eventType: "lifecycle",
stream: "system",
level: "info",
message: "Activity reported; cleared run warning",
message: "Detached child process reported activity; cleared detached warning",
});
return updated;
}
@ -1836,100 +1826,7 @@ export function heartbeatService(db: Db) {
if (reaped.length > 0) {
logger.warn({ reapedCount: reaped.length, runIds: reaped }, "reaped orphaned heartbeat runs");
}
// ── Idle-timeout pass: reuse activeRuns query result (no duplicate DB call) ──
const idleWarned: string[] = [];
const idleKilled: string[] = [];
for (const { run, adapterType } of activeRuns) {
// Idle check applies to all running runs, including tracked ones
if (reaped.includes(run.id)) continue;
const tracksLocalChild = isTrackedLocalChildProcessAdapter(adapterType);
if (!tracksLocalChild) continue;
// Determine the most recent activity timestamp
const lastActivity = run.lastOutputAt
? new Date(run.lastOutputAt).getTime()
: run.startedAt
? new Date(run.startedAt).getTime()
: new Date(run.createdAt).getTime();
const idleMs = now.getTime() - lastActivity;
if (idleMs >= IDLE_KILL_THRESHOLD_MS) {
// Kill: SIGTERM then SIGKILL
const tracked = runningProcesses.get(run.id);
const pid = tracked?.child?.pid ?? run.processPid;
const killMessage = `Idle timeout: no output for ${Math.round(idleMs / 60_000)} minutes — killing process (pid ${pid ?? "unknown"})`;
logger.warn({ runId: run.id, pid, idleMs }, killMessage);
if (tracked?.child) {
tracked.child.kill("SIGTERM");
setTimeout(() => {
try { if (!tracked.child.killed) tracked.child.kill("SIGKILL"); } catch {}
}, IDLE_KILL_GRACE_SEC * 1000);
} else if (pid && isProcessAlive(pid)) {
try { process.kill(pid, "SIGTERM"); } catch {}
setTimeout(() => {
try { if (isProcessAlive(pid)) process.kill(pid, "SIGKILL"); } catch {}
}, IDLE_KILL_GRACE_SEC * 1000);
}
// Mark the run as failed
let killedRun = await setRunStatus(run.id, "failed", {
error: killMessage,
errorCode: "idle_timeout",
finishedAt: now,
});
await setWakeupStatus(run.wakeupRequestId, "failed", {
finishedAt: now,
error: killMessage,
});
if (!killedRun) killedRun = await getRun(run.id);
if (killedRun) {
await appendRunEvent(killedRun, await nextRunEventSeq(killedRun.id), {
eventType: "lifecycle",
stream: "system",
level: "error",
message: killMessage,
payload: { pid, idleMs },
});
await releaseIssueExecutionAndPromote(killedRun);
}
await finalizeAgentStatus(run.agentId, "failed");
await startNextQueuedRunForAgent(run.agentId);
runningProcesses.delete(run.id);
idleKilled.push(run.id);
} else if (idleMs >= IDLE_WARNING_THRESHOLD_MS && run.errorCode !== IDLE_WARNING_ERROR_CODE) {
// Warning: approaching idle timeout
const warnMessage = `Idle warning: no output for ${Math.round(idleMs / 60_000)} minutes — will be killed at ${IDLE_KILL_THRESHOLD_MS / 60_000} min`;
logger.warn({ runId: run.id, idleMs }, warnMessage);
const warnedRun = await setRunStatus(run.id, "running", {
error: warnMessage,
errorCode: IDLE_WARNING_ERROR_CODE,
});
if (warnedRun) {
await appendRunEvent(warnedRun, await nextRunEventSeq(warnedRun.id), {
eventType: "lifecycle",
stream: "system",
level: "warn",
message: warnMessage,
payload: { idleMs },
});
}
idleWarned.push(run.id);
}
}
if (idleWarned.length > 0) {
logger.info({ count: idleWarned.length, runIds: idleWarned }, "idle-warned runs");
}
if (idleKilled.length > 0) {
logger.warn({ count: idleKilled.length, runIds: idleKilled }, "idle-killed runs");
}
return { reaped: reaped.length, runIds: reaped, idleWarned: idleWarned.length, idleKilled: idleKilled.length };
return { reaped: reaped.length, runIds: reaped };
}
async function resumeQueuedRuns() {
@ -2500,38 +2397,11 @@ export function heartbeatService(db: Db) {
.where(eq(heartbeatRuns.id, runId));
const currentUserRedactionOptions = await getCurrentUserRedactionOptions();
let lastOutputAtFlushPending = false;
let lastOutputAtLatest: Date | null = null;
let lastOutputAtTimer: ReturnType<typeof setTimeout> | null = null;
const onLog = async (stream: "stdout" | "stderr", chunk: string) => {
const sanitizedChunk = redactCurrentUserText(chunk, currentUserRedactionOptions);
if (stream === "stdout") stdoutExcerpt = appendExcerpt(stdoutExcerpt, sanitizedChunk);
if (stream === "stderr") stderrExcerpt = appendExcerpt(stderrExcerpt, sanitizedChunk);
const ts = new Date().toISOString();
lastOutputAtLatest = new Date(ts);
// Batch lastOutputAt writes — flush at most once per 30 seconds to avoid DB churn
if (!lastOutputAtFlushPending) {
lastOutputAtFlushPending = true;
// Flush immediately on first output
await db.update(heartbeatRuns)
.set({ lastOutputAt: lastOutputAtLatest, updatedAt: new Date() })
.where(eq(heartbeatRuns.id, runId));
// Clear idle_warning if set — output resumed
await db.update(heartbeatRuns)
.set({ errorCode: null, error: null })
.where(and(eq(heartbeatRuns.id, runId), eq(heartbeatRuns.errorCode, IDLE_WARNING_ERROR_CODE)));
lastOutputAtTimer = setTimeout(() => {
lastOutputAtFlushPending = false;
lastOutputAtTimer = null;
if (!lastOutputAtLatest) return;
db.update(heartbeatRuns)
.set({ lastOutputAt: lastOutputAtLatest, updatedAt: new Date() })
.where(eq(heartbeatRuns.id, runId))
.then(() => {})
.catch((err) => logger.warn({ err, runId }, "failed to flush lastOutputAt"));
}, 30_000);
}
if (handle) {
await runLogStore.append(handle, {
@ -2655,8 +2525,6 @@ export function heartbeatService(db: Db) {
},
authToken: authToken ?? undefined,
});
// Clear the debounce timer now that the run is complete
if (lastOutputAtTimer) { clearTimeout(lastOutputAtTimer); lastOutputAtTimer = null; }
const adapterManagedRuntimeServices = adapterResult.runtimeServices
? await persistAdapterManagedRuntimeServices({
db,

View file

@ -23,8 +23,6 @@ export interface LiveRunForIssue {
agentId: string;
agentName: string;
adapterType: string;
errorCode?: string | null;
lastOutputAt?: string | null;
issueId?: string | null;
}

View file

@ -92,11 +92,9 @@ function AgentRunCard({
return (
<div className={cn(
"flex h-[320px] flex-col overflow-hidden rounded-xl border shadow-sm",
run.errorCode === "idle_warning"
? "border-orange-500/25 bg-orange-500/[0.04] shadow-[0_16px_40px_rgba(249,115,22,0.08)]"
: isActive
? "border-cyan-500/25 bg-cyan-500/[0.04] shadow-[0_16px_40px_rgba(6,182,212,0.08)]"
: "border-border bg-background/70",
isActive
? "border-cyan-500/25 bg-cyan-500/[0.04] shadow-[0_16px_40px_rgba(6,182,212,0.08)]"
: "border-border bg-background/70",
)}>
<div className="border-b border-border/60 px-3 py-3">
<div className="flex items-start justify-between gap-2">
@ -114,11 +112,6 @@ function AgentRunCard({
</div>
<div className="mt-2 flex items-center gap-2 text-[11px] text-muted-foreground">
<span>{isActive ? "Live now" : run.finishedAt ? `Finished ${relativeTime(run.finishedAt)}` : `Started ${relativeTime(run.createdAt)}`}</span>
{run.errorCode === "idle_warning" && (
<span className="rounded-full border border-orange-500/20 bg-orange-500/[0.06] px-1.5 py-0.5 text-[10px] font-medium text-orange-700 dark:text-orange-300">
Idle
</span>
)}
</div>
</div>

View file

@ -3,8 +3,8 @@ import { Link } from "@/lib/router";
import { useQuery, useQueryClient } from "@tanstack/react-query";
import { heartbeatsApi, type LiveRunForIssue } from "../api/heartbeats";
import { queryKeys } from "../lib/queryKeys";
import { formatDateTime, relativeTime } from "../lib/utils";
import { AlertTriangle, ExternalLink, Square } from "lucide-react";
import { formatDateTime } from "../lib/utils";
import { ExternalLink, Square } from "lucide-react";
import { Identity } from "./Identity";
import { StatusBadge } from "./StatusBadge";
import { RunTranscriptView } from "./transcript/RunTranscriptView";
@ -24,7 +24,6 @@ function isRunActive(status: string): boolean {
return status === "queued" || status === "running";
}
export function LiveRunWidget({ issueId, companyId }: LiveRunWidgetProps) {
const queryClient = useQueryClient();
const [cancellingRunIds, setCancellingRunIds] = useState(new Set<string>());
@ -60,8 +59,6 @@ export function LiveRunWidget({ issueId, companyId }: LiveRunWidgetProps) {
agentId: activeRun.agentId,
agentName: activeRun.agentName,
adapterType: activeRun.adapterType,
errorCode: activeRun.errorCode,
lastOutputAt: toIsoString(activeRun.lastOutputAt),
issueId,
});
}
@ -119,21 +116,7 @@ export function LiveRunWidget({ issueId, companyId }: LiveRunWidgetProps) {
{run.id.slice(0, 8)}
</Link>
<StatusBadge status={run.status} />
{run.errorCode === "idle_warning" && (
<span className="inline-flex items-center gap-1 rounded-full border border-orange-500/20 bg-orange-500/[0.06] px-2 py-0.5 text-[11px] font-medium text-orange-700 dark:text-orange-300">
<AlertTriangle className="h-3 w-3" />
Idle
</span>
)}
{isRunActive(run.status) && run.lastOutputAt ? (
<span title={`Last output: ${formatDateTime(run.lastOutputAt)}`}>
Last output {relativeTime(run.lastOutputAt)}
</span>
) : run.finishedAt ? (
<span>{formatDateTime(run.finishedAt)}</span>
) : (
<span>{formatDateTime(run.startedAt ?? run.createdAt)}</span>
)}
<span>{formatDateTime(run.startedAt ?? run.createdAt)}</span>
</div>
</div>

View file

@ -113,7 +113,6 @@ function makeRun(id: string, status: HeartbeatRun["status"], createdAt: string,
externalRunId: null,
processPid: null,
processStartedAt: null,
lastOutputAt: null,
retryOfRunId: null,
processLossRetryCount: 0,
stdoutExcerpt: null,

View file

@ -55,7 +55,6 @@ export const statusBadge: Record<string, string> = {
// Run statuses
failed: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",
timed_out: "bg-orange-100 text-orange-700 dark:bg-orange-900/50 dark:text-orange-300",
idle_timeout: "bg-orange-100 text-orange-700 dark:bg-orange-900/50 dark:text-orange-300",
succeeded: "bg-green-100 text-green-700 dark:bg-green-900/50 dark:text-green-300",
error: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",
terminated: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300",