From a01c28dff2b627a0a70152c42917ce35cfeb18eb Mon Sep 17 00:00:00 2001 From: Nexus Dev Date: Sun, 5 Apr 2026 09:55:08 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20Phase=2040=20=E2=80=94=20Job=20Infrastr?= =?UTF-8?q?ucture=20(content=20jobs,=20SSE=20events,=20namespaced=20storag?= =?UTF-8?q?e)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .planning/REQUIREMENTS.md | 16 +- .planning/ROADMAP.md | 10 +- .planning/STATE.md | 30 +- .../40-job-infrastructure/40-01-PLAN.md | 395 ++++++++ .../40-job-infrastructure/40-01-SUMMARY.md | 113 +++ .../40-job-infrastructure/40-02-PLAN.md | 317 +++++++ .../40-job-infrastructure/40-02-SUMMARY.md | 97 ++ .../40-job-infrastructure/40-CONTEXT.md | 41 + .../40-job-infrastructure/40-RESEARCH.md | 575 ++++++++++++ .../40-job-infrastructure/40-VALIDATION.md | 79 ++ .../40-job-infrastructure/40-VERIFICATION.md | 129 +++ .planning/research/FORMAT-CONVERSION.md | 846 ++++++++++++++++++ .../db/src/migrations/0046_tense_randall.sql | 19 + .../db/src/migrations/meta/0046_snapshot.json | 168 +++- packages/db/src/migrations/meta/_journal.json | 53 +- packages/db/src/schema/assets.ts | 1 + packages/db/src/schema/content_jobs.ts | 33 + packages/db/src/schema/index.ts | 1 + packages/shared/src/constants.ts | 4 + .../src/__tests__/content-jobs-routes.test.ts | 190 ++++ server/src/__tests__/content-jobs-sse.test.ts | 139 +++ server/src/app.ts | 2 + server/src/attachment-types.ts | 3 + server/src/routes/content-jobs.ts | 121 +++ server/src/services/content-job-runner.ts | 87 ++ server/src/services/content-job-store.ts | 37 + server/src/services/index.ts | 2 + 27 files changed, 3419 insertions(+), 89 deletions(-) create mode 100644 .planning/phases/40-job-infrastructure/40-01-PLAN.md create mode 100644 .planning/phases/40-job-infrastructure/40-01-SUMMARY.md create mode 100644 .planning/phases/40-job-infrastructure/40-02-PLAN.md create mode 100644 .planning/phases/40-job-infrastructure/40-02-SUMMARY.md create mode 100644 .planning/phases/40-job-infrastructure/40-CONTEXT.md create mode 100644 .planning/phases/40-job-infrastructure/40-RESEARCH.md create mode 100644 .planning/phases/40-job-infrastructure/40-VALIDATION.md create mode 100644 .planning/phases/40-job-infrastructure/40-VERIFICATION.md create mode 100644 .planning/research/FORMAT-CONVERSION.md create mode 100644 packages/db/src/migrations/0046_tense_randall.sql create mode 100644 packages/db/src/schema/content_jobs.ts create mode 100644 server/src/__tests__/content-jobs-routes.test.ts create mode 100644 server/src/__tests__/content-jobs-sse.test.ts create mode 100644 server/src/routes/content-jobs.ts create mode 100644 server/src/services/content-job-runner.ts create mode 100644 server/src/services/content-job-store.ts diff --git a/.planning/REQUIREMENTS.md b/.planning/REQUIREMENTS.md index 1ff4f5c2..00bf4f24 100644 --- a/.planning/REQUIREMENTS.md +++ b/.planning/REQUIREMENTS.md @@ -9,10 +9,10 @@ Requirements for Content Generation milestone. Each maps to roadmap phases. ### Infrastructure -- [ ] **INFRA-01**: System processes content generation jobs asynchronously with queued → running → done/failed lifecycle -- [ ] **INFRA-02**: System pushes job progress updates via SSE to connected clients -- [ ] **INFRA-03**: Generated content stored in namespaced storage without size restrictions blocking video/images -- [ ] **INFRA-04**: All generated content tracked in database with source conversation linkage +- [x] **INFRA-01**: System processes content generation jobs asynchronously with queued → running → done/failed lifecycle +- [x] **INFRA-02**: System pushes job progress updates via SSE to connected clients +- [x] **INFRA-03**: Generated content stored in namespaced storage without size restrictions blocking video/images +- [x] **INFRA-04**: All generated content tracked in database with source conversation linkage ### Diagram Generation @@ -128,10 +128,10 @@ Which phases cover which requirements. Updated during roadmap creation. | Requirement | Phase | Status | |-------------|-------|--------| -| INFRA-01 | Phase 40 | Pending | -| INFRA-02 | Phase 40 | Pending | -| INFRA-03 | Phase 40 | Pending | -| INFRA-04 | Phase 40 | Pending | +| INFRA-01 | Phase 40 | Complete | +| INFRA-02 | Phase 40 | Complete | +| INFRA-03 | Phase 40 | Complete | +| INFRA-04 | Phase 40 | Complete | | DIAG-01 | Phase 41 | Pending | | DIAG-02 | Phase 41 | Pending | | DIAG-03 | Phase 41 | Pending | diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index c8eb4232..e4becbaa 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -170,7 +170,7 @@ Plans: ## Phases -- [ ] **Phase 40: Job Infrastructure** — content_jobs table, async render lifecycle, SSE progress events, namespaced storage without size limit (INFRA-01..04) +- [x] **Phase 40: Job Infrastructure** — content_jobs table, async render lifecycle, SSE progress events, namespaced storage without size limit (INFRA-01..04) (completed 2026-04-04) - [ ] **Phase 41: Diagrams, Icons & Theme Engine** — Mermaid diagrams, SVG icon generation, OKLCH theme palette with WCAG AA and live preview (DIAG-01..05, ICON-01..03, THEME-01..07) - [ ] **Phase 42: Wallpapers, Social, Format Conversion & Voice** — Satori image pipeline, social content, format conversion registry with AI fallback, Whisper web chat mic (WALL-01..04, SOCIAL-01..03, CONV-01..09, VOICE-01..03) - [ ] **Phase 43: Documents & Branding** — Playwright PDF reports and invoices, full brand identity kit with zip export (DOC-01..03, BRAND-01..06) @@ -188,7 +188,11 @@ Plans: 2. A connected browser receives SSE events as a job progresses through queued → generating → ready (or error), with no polling required 3. A generated video file larger than 10MB can be stored and retrieved without a size-limit error — the generated/ storage namespace bypasses the upload route limit 4. Every generated asset in the database has a sourceTaskId linking it to the originating conversation task, visible via the asset list API -**Plans**: TBD +**Plans**: 2 plans + +Plans: +- [x] 40-01-PLAN.md — Schema, constants, migrations, contentJobStore + contentJobRunner services +- [x] 40-02-PLAN.md — HTTP routes (POST 202, GET, SSE), app.ts wiring, integration tests ### Phase 41: Diagrams, Icons & Theme Engine **Goal**: Users can generate diagrams from natural language, produce SVG icon sets from descriptions, and create a complete OKLCH color theme from a single seed color — all without binary dependencies beyond what is already installed @@ -339,7 +343,7 @@ All 52 v1.7 requirements are mapped to exactly one phase. No orphans. | 37. Web Chat Voice UI | v1.6 | 3/4 | Complete | 2026-04-04 | | 38. Telegram Bridge | v1.6 | 3/3 | Complete | 2026-04-04 | | 39. Voice Polish | v1.6 | 1/2 | Complete | 2026-04-04 | -| 40. Job Infrastructure | v1.7 | 0/TBD | Not started | - | +| 40. Job Infrastructure | v1.7 | 2/2 | Complete | 2026-04-04 | | 41. Diagrams, Icons & Theme Engine | v1.7 | 0/TBD | Not started | - | | 42. Wallpapers, Social, Format Conversion & Voice | v1.7 | 0/TBD | Not started | - | | 43. Documents & Branding | v1.7 | 0/TBD | Not started | - | diff --git a/.planning/STATE.md b/.planning/STATE.md index f53c6514..f4e3f0e4 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -2,15 +2,15 @@ gsd_state_version: 1.0 milestone: v1.7 milestone_name: Content Generation -status: planning -stopped_at: "" -last_updated: "2026-04-04" +status: verifying +stopped_at: Completed 40-02-PLAN.md — HTTP routes, app.ts mount, integration tests +last_updated: "2026-04-04T12:50:26.357Z" last_activity: 2026-04-04 progress: total_phases: 6 - completed_phases: 0 - total_plans: 0 - completed_plans: 0 + completed_phases: 1 + total_plans: 2 + completed_plans: 2 percent: 0 --- @@ -21,20 +21,21 @@ progress: See: .planning/PROJECT.md (updated 2026-04-04) **Core value:** A fresh onboard asks for ONE thing (root directory), auto-creates PM + Engineer agents, and drops you in the dashboard. -**Current focus:** Phase 40 — Job Infrastructure (v1.7 start) +**Current focus:** Phase 40 — job-infrastructure ## Current Position -Phase: 40 of 45 (Job Infrastructure) -Plan: — (not yet planned) -Status: Ready to plan -Last activity: 2026-04-04 — v1.7 roadmap created, 52 requirements mapped to 6 phases +Phase: 41 +Plan: Not started +Status: Phase complete — ready for verification +Last activity: 2026-04-04 Progress: [░░░░░░░░░░] 0% ## Performance Metrics **Velocity:** + - Total plans completed: 0 (v1.7) - Average duration: - - Total execution time: 0 hours @@ -61,6 +62,9 @@ Key constraints for v1.7: - AI-bridged conversion (CONV-05) is the fallback for all format pairs — never show a format pair as blocked - CONV-08: converter availability detected at startup via probe; unavailable direct paths fall to AI bridge - CONV-09: magic-byte MIME validation before processing — reject misnamed files with a clear error +- [Phase 40]: content_jobs uses no FK for resultAssetId or sourceTaskId — avoids circular FK, tasks are string IDs not UUIDs +- [Phase 40]: renderContent is a stub in Phase 40 — phases 41-45 add real renderers keyed by jobType +- [Phase 40]: SSE uses EventEmitter subscription not polling for content_job.* events ### Pending Todos @@ -75,6 +79,6 @@ None yet. ## Session Continuity -Last session: 2026-04-04 -Stopped at: v1.7 roadmap created — 52 requirements mapped, 6 phases defined (40-45), files written +Last session: 2026-04-04T12:45:05.515Z +Stopped at: Completed 40-02-PLAN.md — HTTP routes, app.ts mount, integration tests Resume file: None diff --git a/.planning/phases/40-job-infrastructure/40-01-PLAN.md b/.planning/phases/40-job-infrastructure/40-01-PLAN.md new file mode 100644 index 00000000..ade49a0e --- /dev/null +++ b/.planning/phases/40-job-infrastructure/40-01-PLAN.md @@ -0,0 +1,395 @@ +--- +phase: 40-job-infrastructure +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: + - packages/db/src/schema/content_jobs.ts + - packages/db/src/schema/assets.ts + - packages/db/src/schema/index.ts + - packages/shared/src/constants.ts + - server/src/attachment-types.ts + - server/src/services/content-job-store.ts + - server/src/services/content-job-runner.ts + - server/src/services/index.ts +autonomous: true +requirements: + - INFRA-01 + - INFRA-03 + - INFRA-04 + +must_haves: + truths: + - "content_jobs table exists in DB with queued/running/done/failed lifecycle columns" + - "assets table has a source_task_id column for conversation linkage" + - "LIVE_EVENT_TYPES includes content_job.queued, content_job.running, content_job.done, content_job.failed" + - "MAX_GENERATED_ASSET_BYTES constant exists and defaults to 500MB" + - "contentJobStore service can create, get, list, and transition jobs" + - "contentJobRunner dispatches a job asynchronously without blocking, transitions through lifecycle, stores asset, publishes live events" + artifacts: + - path: "packages/db/src/schema/content_jobs.ts" + provides: "content_jobs table definition with status lifecycle" + exports: ["contentJobs", "CONTENT_JOB_STATUSES"] + - path: "server/src/services/content-job-store.ts" + provides: "CRUD service for content_jobs table" + exports: ["contentJobStore"] + - path: "server/src/services/content-job-runner.ts" + provides: "Async job dispatcher with live event publishing" + exports: ["contentJobRunner"] + key_links: + - from: "server/src/services/content-job-runner.ts" + to: "server/src/services/content-job-store.ts" + via: "store.transition() calls" + pattern: "store\\.transition" + - from: "server/src/services/content-job-runner.ts" + to: "server/src/services/live-events.ts" + via: "publishLiveEvent for content_job.* events" + pattern: "publishLiveEvent.*content_job" + - from: "server/src/services/content-job-runner.ts" + to: "server/src/services/assets.ts" + via: "assetService.create with sourceTaskId" + pattern: "assetService.*create.*sourceTaskId" +--- + + +Create the content_jobs DB schema, shared constants, storage constant, and service layer (store + runner) that form the foundation for all async content generation in v1.7. + +Purpose: Phase 40 is the hard dependency for phases 41-45. This plan establishes the data model and async execution engine so Plan 02 can wire HTTP routes on top. +Output: Schema file, two migrations, updated constants, contentJobStore service, contentJobRunner service. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@.planning/phases/40-job-infrastructure/40-RESEARCH.md + + + + +From packages/db/src/schema/assets.ts: +```typescript +export const assets = pgTable("assets", { + id: uuid("id").primaryKey().defaultRandom(), + companyId: uuid("company_id").notNull().references(() => companies.id), + provider: text("provider").notNull(), + objectKey: text("object_key").notNull(), + contentType: text("content_type").notNull(), + byteSize: integer("byte_size").notNull(), + sha256: text("sha256").notNull(), + originalFilename: text("original_filename"), + createdByAgentId: uuid("created_by_agent_id").references(() => agents.id), + createdByUserId: text("created_by_user_id"), + createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(), + updatedAt: timestamp("updated_at", { withTimezone: true }).notNull().defaultNow(), +}, (table) => ({ ... })); +``` + +From packages/shared/src/constants.ts: +```typescript +export const LIVE_EVENT_TYPES = [ + "heartbeat.run.queued", + "heartbeat.run.status", + "heartbeat.run.event", + "heartbeat.run.log", + "agent.status", + "activity.logged", + "plugin.ui.updated", + "plugin.worker.crashed", + "plugin.worker.restarted", +] as const; +export type LiveEventType = (typeof LIVE_EVENT_TYPES)[number]; +``` + +From server/src/attachment-types.ts: +```typescript +export const MAX_ATTACHMENT_BYTES = + Number(process.env.PAPERCLIP_ATTACHMENT_MAX_BYTES) || 10 * 1024 * 1024; +``` + +From server/src/services/live-events.ts: +```typescript +export function publishLiveEvent(input: { + companyId: string; + type: LiveEventType; + payload?: Record; +}): LiveEvent; +export function subscribeCompanyLiveEvents(companyId: string, listener: (event: LiveEvent) => void): () => void; +``` + +From server/src/services/assets.ts: +```typescript +export function assetService(db: Db) { + return { + create: (companyId: string, data: Omit) => ..., + getById: (id: string) => ..., + }; +} +``` + +From server/src/services/index.ts — barrel export pattern: +```typescript +export { assetService } from "./assets.js"; +export { publishLiveEvent, subscribeCompanyLiveEvents } from "./live-events.js"; +``` + +From packages/db/src/schema/index.ts — barrel export pattern: +```typescript +export { assets } from "./assets.js"; +``` + + + + + + + Task 1: Schema, constants, and migrations + + packages/db/src/schema/content_jobs.ts, + packages/db/src/schema/assets.ts, + packages/db/src/schema/index.ts, + packages/shared/src/constants.ts, + server/src/attachment-types.ts + + + packages/db/src/schema/assets.ts, + packages/db/src/schema/index.ts, + packages/shared/src/constants.ts, + server/src/attachment-types.ts, + packages/db/src/schema/plugin_jobs.ts, + packages/db/src/schema/heartbeat_runs.ts + + + - content_jobs.ts exports contentJobs pgTable with columns: id (uuid PK defaultRandom), companyId (uuid FK companies), jobType (text), status (text default "queued"), input (jsonb default {}), resultAssetId (uuid nullable), errorMessage (text nullable), sourceTaskId (text nullable), startedAt (timestamp tz nullable), finishedAt (timestamp tz nullable), createdAt (timestamp tz defaultNow), updatedAt (timestamp tz defaultNow) + - content_jobs.ts exports CONTENT_JOB_STATUSES = ["queued", "running", "done", "failed"] as const and ContentJobStatus type + - content_jobs.ts defines indexes: content_jobs_company_status_idx on (companyId, status), content_jobs_company_created_idx on (companyId, createdAt) + - assets.ts gains sourceTaskId: text("source_task_id") column (nullable, no FK) + - LIVE_EVENT_TYPES array includes "content_job.queued", "content_job.running", "content_job.done", "content_job.failed" + - MAX_GENERATED_ASSET_BYTES exported from attachment-types.ts, defaults to 500 * 1024 * 1024 + + + 1. Create `packages/db/src/schema/content_jobs.ts`: + - Import pgTable, uuid, text, timestamp, jsonb, index from "drizzle-orm/pg-core" + - Import companies from "./companies.js" + - Export `CONTENT_JOB_STATUSES = ["queued", "running", "done", "failed"] as const` + - Export `type ContentJobStatus = (typeof CONTENT_JOB_STATUSES)[number]` + - Export `contentJobs` pgTable "content_jobs" with exact columns from behavior block + - Add two indexes in the third argument: `content_jobs_company_status_idx` on (companyId, status), `content_jobs_company_created_idx` on (companyId, createdAt) + + 2. Modify `packages/db/src/schema/assets.ts`: + - Add column after `createdByUserId`: `sourceTaskId: text("source_task_id"),` (nullable, no FK, no default) + - No other changes + + 3. Modify `packages/db/src/schema/index.ts`: + - Add line: `export { contentJobs, CONTENT_JOB_STATUSES } from "./content_jobs.js";` + - Place it after the `assets` export line + + 4. Modify `packages/shared/src/constants.ts`: + - Add four entries to LIVE_EVENT_TYPES array before the `] as const;` closing: + `"content_job.queued",` + `"content_job.running",` + `"content_job.done",` + `"content_job.failed",` + + 5. Modify `server/src/attachment-types.ts`: + - Add after the existing `MAX_ATTACHMENT_BYTES` export: + ``` + export const MAX_GENERATED_ASSET_BYTES = + Number(process.env.PAPERCLIP_GENERATED_ASSET_MAX_BYTES) || 500 * 1024 * 1024; + ``` + + 6. Generate migrations: + - Run `cd /opt/nexus && pnpm db:generate` + - Verify two new migration files appear (0056 for content_jobs, 0057 for assets source_task_id — or combined) + - Run `pnpm db:migrate` to apply + + Anti-patterns to avoid: + - Do NOT add a FK constraint for sourceTaskId — task IDs are string identifiers, not UUID FKs + - Do NOT add resultAssetId as a FK to assets — it's populated after asset creation, circular FK causes issues + + + cd /opt/nexus && pnpm tsc --noEmit --project packages/db/tsconfig.json && pnpm tsc --noEmit --project packages/shared/tsconfig.json && pnpm tsc --noEmit --project server/tsconfig.json + + + - packages/db/src/schema/content_jobs.ts contains `export const contentJobs = pgTable("content_jobs"` + - packages/db/src/schema/content_jobs.ts contains `export const CONTENT_JOB_STATUSES = ["queued", "running", "done", "failed"] as const` + - packages/db/src/schema/content_jobs.ts contains `sourceTaskId: text("source_task_id")` + - packages/db/src/schema/content_jobs.ts contains `content_jobs_company_status_idx` + - packages/db/src/schema/assets.ts contains `sourceTaskId: text("source_task_id")` + - packages/db/src/schema/index.ts contains `export { contentJobs, CONTENT_JOB_STATUSES } from "./content_jobs.js"` + - packages/shared/src/constants.ts contains `"content_job.queued"` + - packages/shared/src/constants.ts contains `"content_job.done"` + - server/src/attachment-types.ts contains `export const MAX_GENERATED_ASSET_BYTES` + - server/src/attachment-types.ts contains `500 * 1024 * 1024` + - Migration files exist in packages/db/src/migrations/ numbered 0056 or higher + - `pnpm tsc --noEmit` passes for db, shared, and server packages + + content_jobs schema defined with all columns and indexes, assets table has sourceTaskId, LIVE_EVENT_TYPES extended with 4 content_job event types, MAX_GENERATED_ASSET_BYTES constant exported, migrations generated and applied, TypeScript compiles clean + + + + Task 2: contentJobStore and contentJobRunner services + + server/src/services/content-job-store.ts, + server/src/services/content-job-runner.ts, + server/src/services/index.ts + + + server/src/services/assets.ts, + server/src/services/live-events.ts, + server/src/services/index.ts, + server/src/storage/types.ts, + packages/db/src/schema/content_jobs.ts + + + - contentJobStore(db) returns { create, getById, listByCompany, transition } + - create(companyId, { jobType, input, sourceTaskId }) inserts and returns the row + - getById(id) returns job or null + - listByCompany(companyId) returns jobs ordered by createdAt desc + - transition(id, patch) updates the row with patch + updatedAt = new Date() + - contentJobRunner.dispatch(db, storage, job) fires and forgets — never awaited by caller + - Runner transitions job to "running" then to "done" or "failed" + - Runner publishes content_job.running, then content_job.done or content_job.failed via publishLiveEvent + - Runner always awaits DB write BEFORE publishing live event (prevents stale reads) + - On "done": runner calls storage.putFile with namespace "generated", then assetService.create with sourceTaskId, then transitions to done with resultAssetId + - On error: runner transitions to failed with errorMessage + - Runner has a renderContent stub that returns a fixed test buffer (placeholder for phase 41+) + - Runner checks MAX_GENERATED_ASSET_BYTES before calling putFile + + + 1. Create `server/src/services/content-job-store.ts`: + ```typescript + import { eq, desc } from "drizzle-orm"; + import type { Db } from "@paperclipai/db"; + import { contentJobs } from "@paperclipai/db"; + + export function contentJobStore(db: Db) { + return { + create: (companyId: string, data: { jobType: string; input: Record; sourceTaskId: string | null }) => + db.insert(contentJobs).values({ companyId, ...data }).returning().then((r) => r[0]), + + getById: (id: string) => + db.select().from(contentJobs).where(eq(contentJobs.id, id)).then((r) => r[0] ?? null), + + listByCompany: (companyId: string) => + db.select().from(contentJobs) + .where(eq(contentJobs.companyId, companyId)) + .orderBy(desc(contentJobs.createdAt)), + + transition: (id: string, patch: Partial) => + db.update(contentJobs).set({ ...patch, updatedAt: new Date() }).where(eq(contentJobs.id, id)), + }; + } + ``` + + 2. Create `server/src/services/content-job-runner.ts`: + - Import publishLiveEvent from "./live-events.js" + - Import contentJobStore from "./content-job-store.js" + - Import assetService from "./assets.js" + - Import MAX_GENERATED_ASSET_BYTES from "../attachment-types.js" + - Import type { Db } from "@paperclipai/db" + - Import type { StorageService } from "../storage/types.js" + - Import type { contentJobs } from "@paperclipai/db" + + Define type `ContentJob = typeof contentJobs.$inferSelect` + + Export `renderContent` stub function: + ```typescript + export async function renderContent( + _jobType: string, + _input: Record, + ): Promise<{ filename: string; contentType: string; buffer: Buffer }> { + // Stub — phases 41-45 will add real renderers keyed by jobType + return { + filename: "placeholder.txt", + contentType: "text/plain", + buffer: Buffer.from("placeholder output"), + }; + } + ``` + + Export `contentJobRunner` object with a single `dispatch` method: + ```typescript + export const contentJobRunner = { + dispatch(db: Db, storage: StorageService, job: ContentJob): void { + void runJob(db, storage, job); + }, + }; + ``` + + Implement `async function runJob(db, storage, job)`: + - Create store via `contentJobStore(db)` + - Transition to "running" with startedAt: new Date(), then publishLiveEvent type "content_job.running" payload { jobId: job.id } + - Try block: + - Call renderContent(job.jobType, job.input as Record) + - Check result.buffer.byteLength <= MAX_GENERATED_ASSET_BYTES, throw if exceeded + - Call storage.putFile({ companyId: job.companyId, namespace: "generated", originalFilename: result.filename, contentType: result.contentType, body: result.buffer }) + - Call assetService(db).create(job.companyId, { ...stored, sourceTaskId: job.sourceTaskId, createdByAgentId: null, createdByUserId: null }) + - Transition to "done" with resultAssetId: asset.id, finishedAt: new Date() + - publishLiveEvent type "content_job.done" payload { jobId: job.id, assetId: asset.id } + - Catch block: + - Extract errorMessage from err + - Transition to "failed" with errorMessage, finishedAt: new Date() + - publishLiveEvent type "content_job.failed" payload { jobId: job.id, errorMessage } + + 3. Modify `server/src/services/index.ts`: + - Add: `export { contentJobStore } from "./content-job-store.js";` + - Add: `export { contentJobRunner } from "./content-job-runner.js";` + + Anti-patterns to avoid: + - Do NOT await dispatch in the caller — it must be fire-and-forget (`void`) + - Do NOT publish live event before DB write completes — always `await store.transition(...)` first + - Do NOT import multer anywhere in runner code — runner writes directly via storage.putFile + + + cd /opt/nexus && pnpm tsc --noEmit --project server/tsconfig.json + + + - server/src/services/content-job-store.ts contains `export function contentJobStore(db: Db)` + - server/src/services/content-job-store.ts contains `db.insert(contentJobs).values(` + - server/src/services/content-job-store.ts contains `transition:` + - server/src/services/content-job-runner.ts contains `export const contentJobRunner` + - server/src/services/content-job-runner.ts contains `void runJob(db, storage, job)` + - server/src/services/content-job-runner.ts contains `publishLiveEvent(` + - server/src/services/content-job-runner.ts contains `content_job.running` + - server/src/services/content-job-runner.ts contains `content_job.done` + - server/src/services/content-job-runner.ts contains `content_job.failed` + - server/src/services/content-job-runner.ts contains `MAX_GENERATED_ASSET_BYTES` + - server/src/services/content-job-runner.ts contains `namespace: "generated"` + - server/src/services/content-job-runner.ts contains `sourceTaskId: job.sourceTaskId` + - server/src/services/content-job-runner.ts contains `export async function renderContent` + - server/src/services/index.ts contains `export { contentJobStore } from "./content-job-store.js"` + - server/src/services/index.ts contains `export { contentJobRunner } from "./content-job-runner.js"` + - `pnpm tsc --noEmit --project server/tsconfig.json` exits 0 + + contentJobStore provides create/getById/listByCompany/transition. contentJobRunner.dispatch fires and forgets, transitions through lifecycle, stores generated asset with sourceTaskId, publishes live events after DB writes. Both exported from services/index.ts. TypeScript compiles clean. + + + + + +- `pnpm tsc --noEmit` passes across db, shared, and server packages +- content_jobs table exists in database after migration +- assets table has source_task_id column after migration +- All new exports resolve without import errors + + + +- content_jobs schema has all lifecycle columns with correct types +- assets.sourceTaskId column exists (nullable text) +- LIVE_EVENT_TYPES includes 4 content_job.* entries +- MAX_GENERATED_ASSET_BYTES = 500MB default +- contentJobStore CRUD works against DB +- contentJobRunner dispatches async, transitions lifecycle, stores asset, publishes events +- TypeScript compiles clean across all affected packages + + + +After completion, create `.planning/phases/40-job-infrastructure/40-01-SUMMARY.md` + diff --git a/.planning/phases/40-job-infrastructure/40-01-SUMMARY.md b/.planning/phases/40-job-infrastructure/40-01-SUMMARY.md new file mode 100644 index 00000000..2937a176 --- /dev/null +++ b/.planning/phases/40-job-infrastructure/40-01-SUMMARY.md @@ -0,0 +1,113 @@ +--- +phase: 40-job-infrastructure +plan: "01" +subsystem: content-jobs +tags: [db-schema, services, async-jobs, live-events] +dependency_graph: + requires: [] + provides: + - content_jobs DB table with queued/running/done/failed lifecycle + - contentJobStore CRUD service + - contentJobRunner async dispatcher with live events + - MAX_GENERATED_ASSET_BYTES constant (500MB) + - assets.sourceTaskId column + - LIVE_EVENT_TYPES extended with content_job.* events + affects: + - packages/db/src/schema/ + - packages/shared/src/constants.ts + - server/src/services/ + - server/src/attachment-types.ts +tech_stack: + added: [] + patterns: + - Drizzle ORM pgTable schema with $type<> for typed status fields + - Fire-and-forget async job dispatch with void pattern + - DB write before live event publish (ordered consistency) + - renderContent stub for future renderer plug-in by jobType +key_files: + created: + - packages/db/src/schema/content_jobs.ts + - server/src/services/content-job-store.ts + - server/src/services/content-job-runner.ts + - packages/db/src/migrations/0046_tense_randall.sql + modified: + - packages/db/src/schema/assets.ts + - packages/db/src/schema/index.ts + - packages/shared/src/constants.ts + - server/src/attachment-types.ts + - server/src/services/index.ts +decisions: + - content_jobs.resultAssetId has no FK to assets — populated post-creation, circular FK avoided + - assets.sourceTaskId is nullable text with no FK — task IDs are string identifiers, not UUIDs + - renderContent is a stub returning placeholder.txt — phases 41-45 add real renderers keyed by jobType + - Migration applied via drizzle-kit generate from worktree dist/ (main repo has pre-existing duplicate migration files) +metrics: + duration: "~10 minutes" + completed: "2026-04-04" + tasks_completed: 2 + tasks_total: 2 + files_created: 4 + files_modified: 5 +--- + +# Phase 40 Plan 01: Schema, constants, and services for content jobs Summary + +**One-liner:** content_jobs table + async job runner with store/runner services using fire-and-forget dispatch and ordered DB-before-event lifecycle. + +## What Was Built + +The foundational data model and async execution engine for v1.7 content generation. Phase 40 Plan 01 establishes: + +1. **DB schema** (`content_jobs` table) with full lifecycle columns and two composite indexes for efficient querying by company+status and company+createdAt. +2. **assets.sourceTaskId** — nullable text column allowing generated assets to be traced back to their originating conversation task. +3. **LIVE_EVENT_TYPES** — extended with `content_job.queued`, `content_job.running`, `content_job.done`, `content_job.failed` for real-time job progress. +4. **MAX_GENERATED_ASSET_BYTES** — 500MB constant (vs 10MB upload limit) for generated/namespace storage, configurable via `PAPERCLIP_GENERATED_ASSET_MAX_BYTES`. +5. **contentJobStore** — CRUD service with create/getById/listByCompany/transition operations against the content_jobs table. +6. **contentJobRunner** — async fire-and-forget dispatcher that transitions jobs through running→done/failed, stores generated assets with sourceTaskId, and publishes live events after each DB write. + +## Tasks Completed + +| Task | Name | Commit | Files | +|------|------|--------|-------| +| 1 | Schema, constants, and migrations | 8bf4bd91 | content_jobs.ts, assets.ts, index.ts, constants.ts, attachment-types.ts, migration 0046 | +| 2 | contentJobStore and contentJobRunner | b359fec9 | content-job-store.ts, content-job-runner.ts, services/index.ts | + +## Verification + +- `pnpm tsc --noEmit --project packages/db/tsconfig.json` — PASS +- `pnpm tsc --noEmit --project packages/shared/tsconfig.json` — PASS +- `pnpm tsc --noEmit --project server/tsconfig.json` — PASS +- Migration file `0046_tense_randall.sql` generated with correct DDL + +## Deviations from Plan + +### Migration apply limitation (pre-existing blocker, out of scope) + +**Found during:** Task 1 + +**Issue:** The main repo (`/opt/nexus`) has pre-existing duplicate migration files (`0047_nebulous_klaw.sql` and `0047_overjoyed_groot.sql`, `0048_add_chat_messages_updated_at.sql` and `0048_flashy_marrow.sql`) from parallel agent work. The `check:migrations` script prevents both `db:generate` and `db:migrate` in the main repo. + +**Impact:** Migration `0046_tense_randall.sql` was generated successfully from the worktree's clean migration state but could not be applied via `pnpm db:migrate`. + +**Resolution:** Migration will be applied when the worktree is merged and the duplicate migration numbering conflict is resolved. The SQL is correct and ready for execution. This is a pre-existing issue deferred to `.planning/deferred-items.md`. + +**Migration file content verified:** Creates `content_jobs` table, adds `source_task_id` to `assets`, creates both compound indexes, adds FK to companies. + +### Known Stubs + +**renderContent stub** in `server/src/services/content-job-runner.ts`: +- Returns `{ filename: "placeholder.txt", contentType: "text/plain", buffer: Buffer.from("placeholder output") }` for all jobTypes +- This is intentional — the plan specifies this as a stub for phases 41-45 to fill in real renderers keyed by jobType +- Does NOT prevent the plan's goal (job infrastructure) from being achieved + +## Self-Check: PASSED + +Files verified to exist: +- `/opt/nexus/.claude/worktrees/agent-ac2e6085/packages/db/src/schema/content_jobs.ts` — FOUND +- `/opt/nexus/.claude/worktrees/agent-ac2e6085/server/src/services/content-job-store.ts` — FOUND +- `/opt/nexus/.claude/worktrees/agent-ac2e6085/server/src/services/content-job-runner.ts` — FOUND +- `/opt/nexus/.claude/worktrees/agent-ac2e6085/packages/db/src/migrations/0046_tense_randall.sql` — FOUND + +Commits verified: +- `8bf4bd91` — FOUND (feat(40-01): schema, constants, and migrations for content jobs) +- `b359fec9` — FOUND (feat(40-01): contentJobStore and contentJobRunner services) diff --git a/.planning/phases/40-job-infrastructure/40-02-PLAN.md b/.planning/phases/40-job-infrastructure/40-02-PLAN.md new file mode 100644 index 00000000..b77ae53e --- /dev/null +++ b/.planning/phases/40-job-infrastructure/40-02-PLAN.md @@ -0,0 +1,317 @@ +--- +phase: 40-job-infrastructure +plan: 02 +type: execute +wave: 2 +depends_on: ["40-01"] +files_modified: + - server/src/routes/content-jobs.ts + - server/src/app.ts + - server/src/__tests__/content-jobs-routes.test.ts + - server/src/__tests__/content-jobs-sse.test.ts +autonomous: true +requirements: + - INFRA-01 + - INFRA-02 + - INFRA-03 + - INFRA-04 + +must_haves: + truths: + - "POST /api/companies/:companyId/content-jobs returns 202 with jobId and status within 200ms" + - "GET /api/companies/:companyId/content-jobs/:jobId returns the job record with current status" + - "GET /api/companies/:companyId/content-jobs lists all jobs for a company ordered by createdAt desc" + - "GET /api/companies/:companyId/content-jobs/:jobId/events streams SSE events until terminal state then closes" + - "SSE connection is cleaned up when client disconnects mid-job" + - "Every submitted job accepts sourceTaskId for asset linkage" + artifacts: + - path: "server/src/routes/content-jobs.ts" + provides: "HTTP routes for content job submission, retrieval, and SSE progress" + exports: ["contentJobRoutes"] + - path: "server/src/__tests__/content-jobs-routes.test.ts" + provides: "Integration tests for content job routes" + min_lines: 50 + - path: "server/src/__tests__/content-jobs-sse.test.ts" + provides: "Integration tests for SSE endpoint" + min_lines: 30 + key_links: + - from: "server/src/routes/content-jobs.ts" + to: "server/src/services/content-job-store.ts" + via: "contentJobStore(db) calls in route handlers" + pattern: "contentJobStore\\(db\\)" + - from: "server/src/routes/content-jobs.ts" + to: "server/src/services/content-job-runner.ts" + via: "contentJobRunner.dispatch() in POST handler" + pattern: "contentJobRunner\\.dispatch" + - from: "server/src/routes/content-jobs.ts" + to: "server/src/services/live-events.ts" + via: "subscribeCompanyLiveEvents in SSE endpoint" + pattern: "subscribeCompanyLiveEvents" + - from: "server/src/app.ts" + to: "server/src/routes/content-jobs.ts" + via: "api.use mount" + pattern: "contentJobRoutes" +--- + + +Wire the HTTP routes for content job submission (POST 202), retrieval (GET), listing (GET), and SSE progress streaming (GET /events). Mount in app.ts. Add integration tests covering all INFRA requirements. + +Purpose: Completes the public API surface that downstream phases (41-45) and browser clients use to submit and monitor content generation jobs. +Output: content-jobs.ts route file, app.ts mount, two test files. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@.planning/phases/40-job-infrastructure/40-RESEARCH.md +@.planning/phases/40-job-infrastructure/40-01-SUMMARY.md + + + + +From server/src/services/content-job-store.ts (created in 40-01): +```typescript +export function contentJobStore(db: Db) { + return { + create: (companyId: string, data: { jobType: string; input: Record; sourceTaskId: string | null }) => Promise, + getById: (id: string) => Promise, + listByCompany: (companyId: string) => Promise, + transition: (id: string, patch: Partial<...>) => Promise, + }; +} +``` + +From server/src/services/content-job-runner.ts (created in 40-01): +```typescript +export const contentJobRunner = { + dispatch(db: Db, storage: StorageService, job: ContentJob): void, +}; +``` + +From server/src/services/live-events.ts: +```typescript +export function subscribeCompanyLiveEvents(companyId: string, listener: (event: LiveEvent) => void): () => void; +``` + +From server/src/routes/authz.ts: +```typescript +export function assertCompanyAccess(req: Request, companyId: string): void; +``` + +From server/src/app.ts — route mounting pattern: +```typescript +import { assetRoutes } from "./routes/assets.js"; +// ... +api.use(assetRoutes(db, opts.storageService)); +``` + +From server/src/__tests__/assets.test.ts — test pattern uses supertest + createApp: +```typescript +import { describe, it, expect } from "vitest"; +import request from "supertest"; +``` + + + + + + + Task 1: Content job routes and app.ts wiring + + server/src/routes/content-jobs.ts, + server/src/app.ts + + + server/src/routes/assets.ts, + server/src/routes/voice.ts, + server/src/routes/authz.ts, + server/src/app.ts, + server/src/services/content-job-store.ts, + server/src/services/content-job-runner.ts, + server/src/services/live-events.ts + + + - POST /api/companies/:companyId/content-jobs: validates companyAccess, accepts { jobType, input?, sourceTaskId? }, creates job via store, calls contentJobRunner.dispatch (fire-and-forget), returns 202 { jobId, status, createdAt } + - GET /api/companies/:companyId/content-jobs: validates companyAccess, returns array of jobs from store.listByCompany + - GET /api/companies/:companyId/content-jobs/:jobId: validates companyAccess, returns job from store.getById, 404 if not found + - GET /api/companies/:companyId/content-jobs/:jobId/events: validates companyAccess, sets SSE headers (Content-Type: text/event-stream, Cache-Control: no-cache, Connection: keep-alive), flushHeaders, sends current status immediately, if terminal ends, otherwise subscribes to live events filtered by content_job.* for this jobId, sends status events, ends on terminal, unsubscribes on req.close + + + 1. Create `server/src/routes/content-jobs.ts`: + + ```typescript + import { Router } from "express"; + import type { Db } from "@paperclipai/db"; + import type { StorageService } from "../storage/types.js"; + import { contentJobStore } from "../services/content-job-store.js"; + import { contentJobRunner } from "../services/content-job-runner.js"; + import { subscribeCompanyLiveEvents } from "../services/live-events.js"; + import { assertCompanyAccess } from "./authz.js"; + + export function contentJobRoutes(db: Db, storage: StorageService) { + const router = Router(); + // ... mount 4 endpoints below under /companies/:companyId/content-jobs + } + ``` + + POST handler at `/companies/:companyId/content-jobs`: + - `const companyId = req.params.companyId!;` + - `assertCompanyAccess(req, companyId);` + - Destructure `{ jobType, input, sourceTaskId }` from `req.body` + - Validate jobType is a non-empty string; return 400 `{ error: "jobType is required" }` if missing + - `const store = contentJobStore(db);` + - `const job = await store.create(companyId, { jobType, input: input ?? {}, sourceTaskId: sourceTaskId ?? null });` + - `void contentJobRunner.dispatch(db, storage, job);` (fire-and-forget, do NOT await) + - `res.status(202).json({ jobId: job.id, status: job.status, createdAt: job.createdAt });` + + GET list handler at `/companies/:companyId/content-jobs`: + - assertCompanyAccess, then `const jobs = await contentJobStore(db).listByCompany(companyId);` + - `res.json(jobs);` + + GET by ID handler at `/companies/:companyId/content-jobs/:jobId`: + - assertCompanyAccess, then `const job = await contentJobStore(db).getById(req.params.jobId!);` + - If !job, `res.status(404).json({ error: "Job not found" }); return;` + - `res.json(job);` + + SSE handler at `/companies/:companyId/content-jobs/:jobId/events`: + - assertCompanyAccess + - Set headers: `Content-Type: text/event-stream`, `Cache-Control: no-cache`, `Connection: keep-alive` + - `res.flushHeaders();` + - Define helper: `const sendEvent = (type: string, data: unknown) => { res.write(\`event: ${type}\\ndata: ${JSON.stringify(data)}\\n\\n\`); };` + - Fetch current job: `const job = await contentJobStore(db).getById(req.params.jobId!);` + - If !job, `sendEvent("error", { error: "Job not found" }); res.end(); return;` + - `sendEvent("status", { jobId: job.id, status: job.status, resultAssetId: job.resultAssetId, errorMessage: job.errorMessage });` + - If `job.status === "done" || job.status === "failed"`, `res.end(); return;` + - Subscribe: `const unsubscribe = subscribeCompanyLiveEvents(companyId, (event) => { ... });` + - In listener: check `event.type` starts with `"content_job."` and `event.payload.jobId === req.params.jobId` + - Send `sendEvent("status", event.payload);` + - If `event.payload.status === "done" || event.payload.status === "failed"`, call `unsubscribe(); res.end();` + - Register cleanup: `req.on("close", () => { unsubscribe(); });` + - Return router + + 2. Modify `server/src/app.ts`: + - Add import: `import { contentJobRoutes } from "./routes/content-jobs.js";` + - Add mount after the `api.use(voiceRoutes());` line: `api.use(contentJobRoutes(db, opts.storageService));` + + Anti-patterns to avoid: + - Do NOT await contentJobRunner.dispatch — must be `void` (fire-and-forget) + - Do NOT use a polling loop in SSE — subscribe to EventEmitter + - Do NOT forget req.on("close") cleanup — prevents listener leaks + + + cd /opt/nexus && pnpm tsc --noEmit --project server/tsconfig.json + + + - server/src/routes/content-jobs.ts contains `export function contentJobRoutes(db: Db, storage: StorageService)` + - server/src/routes/content-jobs.ts contains `res.status(202).json(` + - server/src/routes/content-jobs.ts contains `void contentJobRunner.dispatch(` + - server/src/routes/content-jobs.ts contains `text/event-stream` + - server/src/routes/content-jobs.ts contains `res.flushHeaders()` + - server/src/routes/content-jobs.ts contains `subscribeCompanyLiveEvents(` + - server/src/routes/content-jobs.ts contains `req.on("close"` + - server/src/routes/content-jobs.ts contains `assertCompanyAccess(` + - server/src/routes/content-jobs.ts contains `jobType is required` + - server/src/app.ts contains `import { contentJobRoutes } from "./routes/content-jobs.js"` + - server/src/app.ts contains `contentJobRoutes(db, opts.storageService)` + - `pnpm tsc --noEmit --project server/tsconfig.json` exits 0 + + Four route endpoints operational: POST returns 202 with jobId, GET list returns jobs, GET by ID returns single job or 404, GET events streams SSE until terminal. All mounted in app.ts. TypeScript compiles clean. + + + + Task 2: Integration tests for content job routes and SSE + + server/src/__tests__/content-jobs-routes.test.ts, + server/src/__tests__/content-jobs-sse.test.ts + + + server/src/__tests__/assets.test.ts, + server/src/__tests__/chat-routes.test.ts, + server/src/routes/content-jobs.ts, + server/vitest.config.ts + + + - content-jobs-routes.test.ts: + - POST /api/companies/:id/content-jobs returns 202 with { jobId, status: "queued", createdAt } (INFRA-01) + - POST without jobType returns 400 (INFRA-01) + - POST with sourceTaskId stores it on the created job (INFRA-04) + - GET /api/companies/:id/content-jobs returns array of jobs (INFRA-01) + - GET /api/companies/:id/content-jobs/:jobId returns the job (INFRA-01) + - GET /api/companies/:id/content-jobs/nonexistent returns 404 (INFRA-01) + - content-jobs-sse.test.ts: + - GET /api/companies/:id/content-jobs/:jobId/events returns Content-Type text/event-stream (INFRA-02) + - SSE sends initial status event immediately (INFRA-02) + - SSE for a terminal job (done/failed) ends the stream after initial event (INFRA-02) + + + 1. Create `server/src/__tests__/content-jobs-routes.test.ts`: + - Follow the pattern from existing test files (assets.test.ts, chat-routes.test.ts) + - Import describe, it, expect, beforeAll, afterAll from "vitest" + - Import supertest as request + - Set up test app using the same test harness pattern as other test files in the codebase + - Read the existing test setup pattern from assets.test.ts or chat-routes.test.ts to understand how db/app is initialized in tests + + Test cases: + a) `it("POST /api/companies/:id/content-jobs returns 202 with jobId")` — POST with jobType: "test", verify status 202, body has jobId (string), status === "queued", createdAt (string) + b) `it("POST without jobType returns 400")` — POST with empty body, verify status 400, body.error contains "jobType" + c) `it("POST with sourceTaskId persists it")` — POST with jobType: "test", sourceTaskId: "task-abc-123", then GET the job by ID, verify response body has sourceTaskId === "task-abc-123" + d) `it("GET /api/companies/:id/content-jobs returns job list")` — after creating a job, GET list, verify array with length >= 1 + e) `it("GET /api/companies/:id/content-jobs/:jobId returns job")` — create job, GET by ID, verify body.id matches + f) `it("GET /api/companies/:id/content-jobs/nonexistent returns 404")` — GET with random UUID, verify 404 + + 2. Create `server/src/__tests__/content-jobs-sse.test.ts`: + - Same test harness setup + + Test cases: + a) `it("SSE endpoint returns text/event-stream content type")` — GET /events, verify Content-Type header contains "text/event-stream" + b) `it("SSE sends initial status event")` — GET /events, read first chunk, verify it contains `event: status` and the job's current status + c) `it("SSE for nonexistent job sends error event")` — GET /events with random UUID, verify response contains `event: error` + + Note: Follow whichever test harness setup pattern the existing tests use (createTestApp helper, beforeAll/afterAll DB setup, etc.). Read the existing test files first to match the pattern exactly. + + + cd /opt/nexus && pnpm --filter @paperclipai/server test:run -- src/__tests__/content-jobs-routes.test.ts src/__tests__/content-jobs-sse.test.ts + + + - server/src/__tests__/content-jobs-routes.test.ts exists with at least 50 lines + - server/src/__tests__/content-jobs-routes.test.ts contains `202` + - server/src/__tests__/content-jobs-routes.test.ts contains `400` + - server/src/__tests__/content-jobs-routes.test.ts contains `404` + - server/src/__tests__/content-jobs-routes.test.ts contains `sourceTaskId` + - server/src/__tests__/content-jobs-routes.test.ts contains `content-jobs` + - server/src/__tests__/content-jobs-sse.test.ts exists with at least 30 lines + - server/src/__tests__/content-jobs-sse.test.ts contains `text/event-stream` + - server/src/__tests__/content-jobs-sse.test.ts contains `event: status` + - All tests pass: `pnpm --filter @paperclipai/server test:run -- src/__tests__/content-jobs` exits 0 + + Integration tests cover: 202 job submission, 400 validation, sourceTaskId persistence (INFRA-04), job listing, single job retrieval, 404 missing job, SSE content-type, SSE initial event, SSE error for missing job. All tests pass. + + + + + +- `pnpm --filter @paperclipai/server test:run -- src/__tests__/content-jobs` — all tests green +- `pnpm tsc --noEmit --project server/tsconfig.json` — no type errors +- POST /api/companies/:id/content-jobs returns 202 (INFRA-01) +- GET /api/companies/:id/content-jobs/:id/events returns SSE stream (INFRA-02) +- Job created with sourceTaskId shows it in GET response (INFRA-04) + + + +- 4 HTTP endpoints operational: POST (202), GET list, GET by ID, GET SSE events +- POST validates jobType presence (400 on missing) +- SSE streams events and cleans up on disconnect +- Routes mounted in app.ts +- All integration tests pass +- TypeScript compiles clean + + + +After completion, create `.planning/phases/40-job-infrastructure/40-02-SUMMARY.md` + diff --git a/.planning/phases/40-job-infrastructure/40-02-SUMMARY.md b/.planning/phases/40-job-infrastructure/40-02-SUMMARY.md new file mode 100644 index 00000000..bca74a0f --- /dev/null +++ b/.planning/phases/40-job-infrastructure/40-02-SUMMARY.md @@ -0,0 +1,97 @@ +--- +phase: 40-job-infrastructure +plan: "02" +subsystem: content-jobs +tags: [http-routes, sse, async-jobs, live-events, integration-tests] +dependency_graph: + requires: + - content_jobs DB table (40-01) + - contentJobStore CRUD service (40-01) + - contentJobRunner async dispatcher (40-01) + - subscribeCompanyLiveEvents from live-events.ts + - assertCompanyAccess from authz.ts + provides: + - POST /api/companies/:companyId/content-jobs (202 job submission) + - GET /api/companies/:companyId/content-jobs (list) + - GET /api/companies/:companyId/content-jobs/:jobId (single job) + - GET /api/companies/:companyId/content-jobs/:jobId/events (SSE progress stream) + affects: + - server/src/routes/content-jobs.ts + - server/src/app.ts + - server/src/__tests__/content-jobs-routes.test.ts + - server/src/__tests__/content-jobs-sse.test.ts +tech_stack: + added: [] + patterns: + - fire-and-forget dispatch via void contentJobRunner.dispatch() + - SSE with res.flushHeaders() + res.write() + req.on("close") cleanup + - EventEmitter subscription for job progress (no polling) + - Supertest + vitest mocks for route integration testing +key_files: + created: + - server/src/routes/content-jobs.ts + - server/src/__tests__/content-jobs-routes.test.ts + - server/src/__tests__/content-jobs-sse.test.ts + modified: + - server/src/app.ts +decisions: + - SSE streams use EventEmitter subscription not polling — no setTimeout/setInterval + - req.on("close") cleanup prevents listener leaks when client disconnects mid-job + - Terminal jobs (done/failed) end SSE stream immediately after initial status event + - jobType validation rejects empty strings (whitespace-only) as well as missing field +metrics: + duration: "~3 minutes" + completed: "2026-04-04" + tasks_completed: 2 + tasks_total: 2 + files_created: 3 + files_modified: 1 +--- + +# Phase 40 Plan 02: HTTP routes for content job submission and SSE progress Summary + +**One-liner:** Four Express route endpoints (POST 202, GET list, GET by ID, GET SSE events) wired to contentJobStore/contentJobRunner/live-events with 13 integration tests covering INFRA-01 through INFRA-04. + +## What Was Built + +HTTP API surface for content job submission and monitoring: + +1. **POST /api/companies/:companyId/content-jobs** — validates jobType, creates job via contentJobStore, dispatches async runner (fire-and-forget), returns 202 `{ jobId, status, createdAt }`. +2. **GET /api/companies/:companyId/content-jobs** — lists all jobs for a company ordered by createdAt desc. +3. **GET /api/companies/:companyId/content-jobs/:jobId** — retrieves a single job, returns 404 if not found. +4. **GET /api/companies/:companyId/content-jobs/:jobId/events** — SSE endpoint: sets `text/event-stream` headers, flushes, sends initial status immediately, ends for terminal jobs, subscribes to `content_job.*` live events for running jobs, cleans up on `req.close`. +5. **app.ts mount** — `contentJobRoutes(db, opts.storageService)` added after voiceRoutes. + +## Tasks Completed + +| Task | Name | Commit | Files | +|------|------|--------|-------| +| 1 | Content job routes and app.ts wiring | 4c6335b7 | content-jobs.ts (created), app.ts (modified) | +| 2 | Integration tests for content job routes and SSE | ae28542b | content-jobs-routes.test.ts, content-jobs-sse.test.ts (both created) | + +## Verification + +- `pnpm tsc --noEmit --project server/tsconfig.json` — PASS +- `pnpm vitest run src/__tests__/content-jobs-routes.test.ts src/__tests__/content-jobs-sse.test.ts` — 13/13 tests green +- POST /api/companies/:id/content-jobs returns 202 (INFRA-01) — verified +- GET /api/companies/:id/content-jobs/:id/events returns SSE stream (INFRA-02) — verified +- Job created with sourceTaskId shows it in GET response (INFRA-04) — verified + +## Deviations from Plan + +None — plan executed exactly as written. + +## Known Stubs + +None in this plan. The `renderContent` stub lives in `content-job-runner.ts` (documented in 40-01-SUMMARY.md) and is intentionally deferred to phases 41-45. + +## Self-Check: PASSED + +Files verified to exist: +- `/opt/nexus/server/src/routes/content-jobs.ts` — FOUND +- `/opt/nexus/server/src/__tests__/content-jobs-routes.test.ts` — FOUND +- `/opt/nexus/server/src/__tests__/content-jobs-sse.test.ts` — FOUND + +Commits verified: +- `4c6335b7` — FOUND (feat(40-02): content job routes and app.ts wiring) +- `ae28542b` — FOUND (test(40-02): integration tests for content job routes and SSE) diff --git a/.planning/phases/40-job-infrastructure/40-CONTEXT.md b/.planning/phases/40-job-infrastructure/40-CONTEXT.md new file mode 100644 index 00000000..509be7a2 --- /dev/null +++ b/.planning/phases/40-job-infrastructure/40-CONTEXT.md @@ -0,0 +1,41 @@ +# Phase 40: Job Infrastructure - Context + +**Gathered:** 2026-04-04 +**Status:** Ready for planning +**Mode:** Auto-generated (discuss skipped via workflow.skip_discuss) + + +## Phase Boundary + +Every content generation request returns a job ID immediately, progresses through a tracked lifecycle, and stores its output in namespaced storage — so nothing blocks and nothing is orphaned + + + + +## Implementation Decisions + +### Claude's Discretion +All implementation choices are at Claude's discretion — discuss phase was skipped per user setting. Use ROADMAP phase goal, success criteria, and codebase conventions to guide decisions. + + + + +## Existing Code Insights + +Codebase context will be gathered during plan-phase research. + + + + +## Specific Ideas + +No specific requirements — discuss phase skipped. Refer to ROADMAP phase description and success criteria. + + + + +## Deferred Ideas + +None — discuss phase skipped. + + diff --git a/.planning/phases/40-job-infrastructure/40-RESEARCH.md b/.planning/phases/40-job-infrastructure/40-RESEARCH.md new file mode 100644 index 00000000..610fc9d1 --- /dev/null +++ b/.planning/phases/40-job-infrastructure/40-RESEARCH.md @@ -0,0 +1,575 @@ +# Phase 40: Job Infrastructure - Research + +**Researched:** 2026-04-04 +**Domain:** Async job lifecycle, SSE streaming, storage namespacing, asset tracking +**Confidence:** HIGH + +--- + + +## User Constraints (from CONTEXT.md) + +### Locked Decisions +All implementation choices are at Claude's discretion — discuss phase was skipped per user setting. Use ROADMAP phase goal, success criteria, and codebase conventions to guide decisions. + +### Claude's Discretion +All implementation choices are at Claude's discretion. + +### Deferred Ideas (OUT OF SCOPE) +None — discuss phase skipped. + + +--- + + +## Phase Requirements + +| ID | Description | Research Support | +|----|-------------|------------------| +| INFRA-01 | System processes content generation jobs asynchronously with queued → running → done/failed lifecycle | New `content_jobs` table + in-process job runner service; pattern mirrors `heartbeat_runs` / `plugin_job_runs` already in codebase | +| INFRA-02 | System pushes job progress updates via SSE to connected clients | Existing `live-events.ts` EventEmitter + `LIVE_EVENT_TYPES` const; add new event types and a new SSE endpoint scoped to jobs | +| INFRA-03 | Generated content stored in namespaced storage without size restrictions blocking video/images | `StorageService.putFile()` already has no server-side byte limit; add `generated/` namespace + `MAX_GENERATED_ASSET_BYTES` constant bypassing the upload-route multer limit | +| INFRA-04 | All generated content tracked in database with source conversation linkage | Extend `assets` table with `source_task_id` column (nullable FK); assetService.create() gains optional `sourceTaskId` parameter | + + +--- + +## Summary + +Phase 40 builds the async foundation that every subsequent content generation phase depends on. The codebase already has two well-established job-tracking patterns (`heartbeat_runs`, `plugin_job_runs`) and a working SSE streaming pattern in three routes (`voice.ts`, `puter-proxy.ts`, `plugins.ts`). The work here is additive: a new `content_jobs` DB table, a minimal in-process job runner, new live-event types for SSE progress, a `generated/` storage namespace with its own size constant, and a `source_task_id` column on `assets`. + +No external queue infrastructure (Redis, BullMQ) is needed. The project is single-user and local-first. An in-process async runner (fire-and-forget `void Promise`) with EventEmitter fan-out — matching the heartbeat pattern — is the correct approach. If a job crashes the process restarts clean; orphan prevention is via `source_task_id` so consumers can audit. + +**Primary recommendation:** Mirror the `heartbeat_runs` table/service pattern for the `content_jobs` table. Use the existing `publishLiveEvent` function with new `content_job.*` event types for SSE. Add `MAX_GENERATED_ASSET_BYTES` as a server-only constant and a `generated/` namespace prefix. Add `source_task_id` to `assets` via a migration. + +--- + +## Standard Stack + +### Core + +| Library | Version (verified) | Purpose | Why Standard | +|---------|-------------------|---------|--------------| +| drizzle-orm | 0.38.4 (pkg) | Schema definition + query builder | Already used throughout; schema in `packages/db/src/schema/` | +| postgres (postgres.js) | project dep | DB connection | Already used via `createDb()` in `packages/db/src/client.ts` | +| express | project dep | HTTP layer for new routes | All routes are Express; follows existing pattern | +| Node.js EventEmitter | built-in | In-process pub/sub for SSE fan-out | Already used in `live-events.ts`; no extra dep | + +### Supporting + +| Library | Version | Purpose | When to Use | +|---------|---------|---------|-------------| +| drizzle-kit | 0.31.9 (pkg) | Migration generation | Run `pnpm db:generate` after schema change | +| vitest | project dep | Unit + integration tests | All server tests use vitest; pattern in `server/src/__tests__/` | +| supertest | project dep | HTTP route tests | Used in `assets.test.ts`, `chat-routes.test.ts`, etc. | + +### Alternatives Considered + +| Instead of | Could Use | Tradeoff | +|------------|-----------|----------| +| In-process EventEmitter runner | BullMQ / Redis | Redis adds infra complexity; single-user single-process — EventEmitter is correct here | +| In-process EventEmitter runner | Worker threads | Unnecessary isolation; jobs are I/O-bound (renders call child processes) | +| Custom SSE endpoint | WebSocket upgrade | SSE is simpler for one-way server → client push; WebSocket already used for live events via `live-events-ws.ts` — keep SSE for job polling per existing voice/puter patterns | + +**Installation:** No new packages required. All tooling already present. + +--- + +## Architecture Patterns + +### Recommended Project Structure + +``` +packages/db/src/schema/ +├── content_jobs.ts # NEW: content_jobs table definition +├── assets.ts # MODIFY: add source_task_id column +├── index.ts # MODIFY: export content_jobs + +packages/db/src/migrations/ +├── 0056_create_content_jobs.sql # NEW: generated via drizzle-kit +├── 0057_assets_source_task_id.sql # NEW: generated via drizzle-kit + +packages/shared/src/ +├── constants.ts # MODIFY: add content_job.* to LIVE_EVENT_TYPES + # add CONTENT_JOB_STATUSES constant + +server/src/ +├── services/ +│ ├── content-job-store.ts # NEW: DB CRUD for content_jobs +│ ├── content-job-runner.ts # NEW: async executor + live-event publisher +│ └── index.ts # MODIFY: export new services +├── routes/ +│ ├── content-jobs.ts # NEW: POST /companies/:id/content-jobs +│ │ # GET /companies/:id/content-jobs/:jobId +│ │ # GET /companies/:id/content-jobs/:jobId/events (SSE) +│ └── index.ts # MODIFY: mount content-job routes +├── app.ts # MODIFY: register routes +``` + +### Pattern 1: content_jobs Table (mirrors heartbeat_runs / plugin_job_runs) + +**What:** A persisted lifecycle table for async content generation requests. +**When to use:** Any content generation work that may take >200ms. + +```typescript +// packages/db/src/schema/content_jobs.ts +// Pattern source: packages/db/src/schema/heartbeat_runs.ts + plugin_jobs.ts + +import { pgTable, uuid, text, timestamp, jsonb, index } from "drizzle-orm/pg-core"; +import { companies } from "./companies.js"; + +// Status lifecycle: queued → running → done | failed +export const CONTENT_JOB_STATUSES = ["queued", "running", "done", "failed"] as const; +export type ContentJobStatus = (typeof CONTENT_JOB_STATUSES)[number]; + +export const contentJobs = pgTable( + "content_jobs", + { + id: uuid("id").primaryKey().defaultRandom(), + companyId: uuid("company_id").notNull().references(() => companies.id), + jobType: text("job_type").notNull(), // e.g. "diagram", "theme", "video" + status: text("status").$type().notNull().default("queued"), + input: jsonb("input").notNull().default({}), // renderer-specific params + resultAssetId: uuid("result_asset_id"), // populated on done + errorMessage: text("error_message"), // populated on failed + sourceTaskId: text("source_task_id"), // conversation task linkage (INFRA-04) + startedAt: timestamp("started_at", { withTimezone: true }), + finishedAt: timestamp("finished_at", { withTimezone: true }), + createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(), + updatedAt: timestamp("updated_at", { withTimezone: true }).notNull().defaultNow(), + }, + (table) => ({ + companyStatusIdx: index("content_jobs_company_status_idx").on(table.companyId, table.status), + companyCreatedIdx: index("content_jobs_company_created_idx").on(table.companyId, table.createdAt), + }), +); +``` + +### Pattern 2: HTTP 202 + Job ID Response + +**What:** POST to submit a job returns immediately with jobId. +**When to use:** All content generation submissions. + +```typescript +// server/src/routes/content-jobs.ts +router.post("/companies/:companyId/content-jobs", async (req, res) => { + assertCompanyAccess(req, companyId); + const job = await contentJobStore.create(companyId, { + jobType: req.body.jobType, + input: req.body.input ?? {}, + sourceTaskId: req.body.sourceTaskId ?? null, + }); + // Fire and forget — never await the runner here + void contentJobRunner.dispatch(job); + res.status(202).json({ jobId: job.id, status: job.status }); +}); +``` + +### Pattern 3: SSE Job Progress (mirrors voice.ts pattern) + +**What:** GET endpoint that holds connection open and pushes events until terminal state. +**When to use:** Browser polls for job progress without polling. + +```typescript +// server/src/routes/content-jobs.ts +router.get("/companies/:companyId/content-jobs/:jobId/events", async (req, res) => { + assertCompanyAccess(req, companyId); + res.setHeader("Content-Type", "text/event-stream"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Connection", "keep-alive"); + res.flushHeaders(); + + const sendEvent = (type: string, data: unknown) => { + res.write(`event: ${type}\ndata: ${JSON.stringify(data)}\n\n`); + }; + + // Send current state immediately + const job = await contentJobStore.getById(jobId); + sendEvent("status", { jobId, status: job.status }); + if (job.status === "done" || job.status === "failed") { + res.end(); + return; + } + + // Subscribe to live events for this job + const unsubscribe = subscribeCompanyLiveEvents(companyId, (event) => { + if (event.type === "content_job.status" && event.payload.jobId === jobId) { + sendEvent("status", event.payload); + if (event.payload.status === "done" || event.payload.status === "failed") { + unsubscribe(); + res.end(); + } + } + }); + + req.on("close", () => { + unsubscribe(); + }); +}); +``` + +### Pattern 4: Live Event Types for Job Progress + +**What:** Extend LIVE_EVENT_TYPES in shared constants. +**When to use:** Publishing job progress from the runner. + +```typescript +// packages/shared/src/constants.ts — add to LIVE_EVENT_TYPES array: +"content_job.queued", +"content_job.running", +"content_job.done", +"content_job.failed", +``` + +### Pattern 5: Generated Asset Storage Namespace + +**What:** `generated/` namespace bypasses upload-route multer limit. +**When to use:** Writing rendered output (video, SVG, PDF, PNG) from job runner. + +```typescript +// server/src/attachment-types.ts — add alongside MAX_ATTACHMENT_BYTES: +export const MAX_GENERATED_ASSET_BYTES = + Number(process.env.PAPERCLIP_GENERATED_ASSET_MAX_BYTES) || 500 * 1024 * 1024; // 500MB default + +// Job runner stores via: +const stored = await storage.putFile({ + companyId, + namespace: "generated", // bypasses upload limit — this is not from multer + originalFilename: outputFilename, + contentType, + body: outputBuffer, // renderer output, no multer involved +}); +``` + +**Key insight:** The upload route (`assets.ts`) enforces limits via multer `limits: { fileSize: MAX_ATTACHMENT_BYTES }`. Job runners write directly to `storage.putFile()` — multer is never involved. The `MAX_GENERATED_ASSET_BYTES` constant exists for the job runner to validate before calling `putFile`, but `putFile` itself has no byte limit. + +### Pattern 6: source_task_id on assets (INFRA-04) + +**What:** Nullable column added to `assets` table. +**When to use:** Every asset created by a job runner must pass `sourceTaskId`. + +```typescript +// packages/db/src/schema/assets.ts — add column: +sourceTaskId: text("source_task_id"), // nullable, no FK — task IDs are string identifiers + +// assetService.create() in server/src/services/assets.ts accepts it through +// the existing spread pattern: db.insert(assets).values({ ...data, companyId }) +// since the column is nullable, no callers break. +``` + +### Pattern 7: content-job-store.ts (service layer) + +**What:** CRUD service for content_jobs table. +**When to use:** Create and update jobs from routes + runner. + +```typescript +// server/src/services/content-job-store.ts +// Pattern source: server/src/services/assets.ts, heartbeat.ts +export function contentJobStore(db: Db) { + return { + create: (companyId: string, data: { jobType: string; input: Record; sourceTaskId: string | null }) => + db.insert(contentJobs).values({ companyId, ...data }).returning().then((r) => r[0]), + + getById: (id: string) => + db.select().from(contentJobs).where(eq(contentJobs.id, id)).then((r) => r[0] ?? null), + + listByCompany: (companyId: string) => + db.select().from(contentJobs) + .where(eq(contentJobs.companyId, companyId)) + .orderBy(desc(contentJobs.createdAt)), + + transition: (id: string, patch: Partial) => + db.update(contentJobs).set({ ...patch, updatedAt: new Date() }).where(eq(contentJobs.id, id)), + }; +} +``` + +### Anti-Patterns to Avoid + +- **Awaiting render in HTTP handler:** Never `await renderer.run()` in the route handler. Always `void dispatch(job)` and return 202. +- **Using multer for generated asset storage:** Job runners call `storage.putFile()` directly; multer is only for user uploads. +- **Hardcoding status strings:** Always use the typed `CONTENT_JOB_STATUSES` constant from shared, not raw strings. +- **Blocking SSE on DB polling:** SSE endpoint subscribes to EventEmitter via `subscribeCompanyLiveEvents`, not a polling loop. +- **Missing `source_task_id` in job creation:** Every job submission should pass `sourceTaskId` from the incoming request (even if null for now); the column prevents future orphan accumulation. + +--- + +## Don't Hand-Roll + +| Problem | Don't Build | Use Instead | Why | +|---------|-------------|-------------|-----| +| In-process pub/sub for SSE | Custom EventEmitter wrapper | `publishLiveEvent` + `subscribeCompanyLiveEvents` from `live-events.ts` | Already handles multi-company scoping, id sequencing | +| Storage path generation | Custom UUID + date path builder | `StorageService.putFile()` via `service.ts` | Already handles namespace normalization, sha256, objectKey construction | +| Migration execution | Custom SQL runner | `pnpm db:generate` then `pnpm db:migrate` | Existing drizzle-kit + custom migration runner in `client.ts` | +| HTTP route mounting | Ad-hoc app.use() | Follow `app.ts` pattern: create router fn, import in app.ts | Consistent middleware application (auth, actor, etc.) | +| Asset DB record | Custom insert | `assetService(db).create()` | Already handles the full asset shape | + +--- + +## Common Pitfalls + +### Pitfall 1: Forgetting the "generated" namespace bypass logic lives in the runner, not the route + +**What goes wrong:** Developer adds byte-limit check in the new content-job route handler (treating it like the assets upload route), rejecting large files before they're even rendered. +**Why it happens:** The upload route pattern uses multer limits; the developer copies that pattern. +**How to avoid:** The content-jobs route only creates the job record (tiny JSON). The runner calls `storage.putFile()` directly — no multer anywhere. The size check (`MAX_GENERATED_ASSET_BYTES`) belongs in the job runner, after rendering, before writing. +**Warning signs:** Any import of `multer` in `content-jobs.ts` or `content-job-runner.ts`. + +### Pitfall 2: SSE connection leaking if client disconnects mid-job + +**What goes wrong:** Client disconnects; the `unsubscribe` callback is never called; the EventEmitter listener accumulates. With many requests this can trigger Node's MaxListeners warning. +**Why it happens:** SSE streams need explicit `req.on("close")` cleanup. +**How to avoid:** Always register `req.on("close", () => { unsubscribe(); })` in every SSE handler. See `live-events-ws.ts` `cleanupByClient` pattern. +**Warning signs:** MaxListeners exceeded warning in server logs. + +### Pitfall 3: Publishing live events before the DB row is committed + +**What goes wrong:** Browser receives `content_job.done` event, queries `/content-jobs/:id`, gets stale data (or 404 if the row hasn't flushed). +**Why it happens:** `publishLiveEvent` is synchronous; the DB write is async. +**How to avoid:** Always `await db.update(...)` before calling `publishLiveEvent(...)` in the job runner. +**Warning signs:** Frontend shows "done" but API returns "running". + +### Pitfall 4: Migration numbering collision + +**What goes wrong:** Two migrations are created with the same prefix (e.g., `0056_`) and drizzle-kit fails to apply them in order. +**Why it happens:** Parallel development or rebasing creates numbering conflicts. +**How to avoid:** Check `ls packages/db/src/migrations/` before running `pnpm db:generate`. The last file is currently `0055_create_push_subscriptions.sql`. New migrations start at `0056_`. +**Warning signs:** `pnpm db:generate` creates a file with a duplicate number. + +### Pitfall 5: Forgetting to export from schema/index.ts and services/index.ts + +**What goes wrong:** TypeScript compiles but runtime throws "cannot find module" or imports return undefined. +**Why it happens:** The project uses explicit barrel exports; tree-shaking won't auto-discover. +**How to avoid:** After adding `content_jobs.ts` schema and `content-job-store.ts` service, immediately add exports to `packages/db/src/schema/index.ts` and `server/src/services/index.ts`. + +### Pitfall 6: Adding content_job.* event types in the wrong place + +**What goes wrong:** `publishLiveEvent({ type: "content_job.running" })` throws a TypeScript error because the type is not in `LIVE_EVENT_TYPES`. +**Why it happens:** `LiveEventType` is derived from `LIVE_EVENT_TYPES as const` in `packages/shared/src/constants.ts`. +**How to avoid:** Add the four new types (`content_job.queued`, `content_job.running`, `content_job.done`, `content_job.failed`) to the `LIVE_EVENT_TYPES` array in `constants.ts` before writing the runner. + +--- + +## Code Examples + +### Submitting a Job and Returning 202 + +```typescript +// Source: voice.ts (sync return), assets.ts (storage pattern) — combined +router.post("/companies/:companyId/content-jobs", async (req, res) => { + const companyId = req.params.companyId; + assertCompanyAccess(req, companyId); + + const { jobType, input, sourceTaskId } = req.body as { + jobType: string; + input?: Record; + sourceTaskId?: string; + }; + + const store = contentJobStore(db); + const job = await store.create(companyId, { + jobType, + input: input ?? {}, + sourceTaskId: sourceTaskId ?? null, + }); + + void contentJobRunner.dispatch(db, storage, job); // fire and forget + + res.status(202).json({ + jobId: job.id, + status: job.status, + createdAt: job.createdAt, + }); +}); +``` + +### Publishing Progress from the Runner + +```typescript +// Source: live-events.ts publishLiveEvent pattern +async function runJob(db: Db, storage: StorageService, job: ContentJob) { + const store = contentJobStore(db); + + // Transition to running + await store.transition(job.id, { status: "running", startedAt: new Date() }); + publishLiveEvent({ + companyId: job.companyId, + type: "content_job.running", + payload: { jobId: job.id }, + }); + + try { + const result = await renderContent(job.jobType, job.input); + + // Store asset + const stored = await storage.putFile({ + companyId: job.companyId, + namespace: "generated", + originalFilename: result.filename, + contentType: result.contentType, + body: result.buffer, + }); + const asset = await assetService(db).create(job.companyId, { + ...stored, + sourceTaskId: job.sourceTaskId, + createdByAgentId: null, + createdByUserId: null, + }); + + // Transition to done + await store.transition(job.id, { status: "done", resultAssetId: asset.id, finishedAt: new Date() }); + publishLiveEvent({ + companyId: job.companyId, + type: "content_job.done", + payload: { jobId: job.id, assetId: asset.id }, + }); + } catch (err) { + const errorMessage = err instanceof Error ? err.message : "Unknown error"; + await store.transition(job.id, { status: "failed", errorMessage, finishedAt: new Date() }); + publishLiveEvent({ + companyId: job.companyId, + type: "content_job.failed", + payload: { jobId: job.id, errorMessage }, + }); + } +} +``` + +### Migration for content_jobs table + +```sql +-- packages/db/src/migrations/0056_create_content_jobs.sql +-- (generated by drizzle-kit; shown for reference) +CREATE TABLE IF NOT EXISTS "content_jobs" ( + "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, + "company_id" uuid NOT NULL REFERENCES "companies"("id"), + "job_type" text NOT NULL, + "status" text NOT NULL DEFAULT 'queued', + "input" jsonb NOT NULL DEFAULT '{}', + "result_asset_id" uuid, + "error_message" text, + "source_task_id" text, + "started_at" timestamp with time zone, + "finished_at" timestamp with time zone, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone DEFAULT now() NOT NULL +); +CREATE INDEX IF NOT EXISTS "content_jobs_company_status_idx" ON "content_jobs" ("company_id", "status"); +CREATE INDEX IF NOT EXISTS "content_jobs_company_created_idx" ON "content_jobs" ("company_id", "created_at"); +``` + +### Migration for source_task_id on assets + +```sql +-- packages/db/src/migrations/0057_assets_source_task_id.sql +ALTER TABLE "assets" ADD COLUMN IF NOT EXISTS "source_task_id" text; +CREATE INDEX IF NOT EXISTS "assets_source_task_id_idx" ON "assets" ("source_task_id"); +``` + +--- + +## State of the Art + +| Old Approach | Current Approach | When Changed | Impact | +|--------------|------------------|--------------|--------| +| Polling for job status | SSE push | Existing pattern in codebase | No polling loop needed on client | +| Blocking HTTP for render | HTTP 202 + async | Decision from STATE.md | Render time decoupled from response time | +| Flat asset storage | Namespaced storage (`generated/`, `assets/general`) | Existing `service.ts` pattern | No path collision between user uploads and generated output | + +**Already established in project:** +- All DB schemas use Drizzle ORM with explicit migrations (not `drizzle.push`) +- Migration files are numbered sequentially starting at `0000_`; next is `0056_` +- Services follow a simple factory function pattern: `export function xService(db: Db) { return { ... } }` +- Routes follow `export function xRoutes(db, deps...) { const router = Router(); ... return router; }` pattern +- All routes are mounted in `server/src/app.ts` + +--- + +## Environment Availability + +Step 2.6: SKIPPED — Phase 40 is purely code and database changes. No external CLI tools, external services, or runtime binaries are required beyond Node.js 20, pnpm 9, and PostgreSQL already running. + +--- + +## Validation Architecture + +### Test Framework + +| Property | Value | +|----------|-------| +| Framework | vitest (project dep) | +| Config file | `server/vitest.config.ts` — `environment: "node"` | +| Quick run command | `pnpm test:run --project server -- --reporter=verbose src/__tests__/content-jobs*` | +| Full suite command | `pnpm test:run` | + +### Phase Requirements → Test Map + +| Req ID | Behavior | Test Type | Automated Command | File Exists? | +|--------|----------|-----------|-------------------|-------------| +| INFRA-01 | POST /content-jobs returns 202 + jobId within 200ms | unit | `pnpm --filter @paperclipai/server test:run -- src/__tests__/content-jobs-routes.test.ts` | ❌ Wave 0 | +| INFRA-01 | Job transitions queued → running → done/failed | unit | same file | ❌ Wave 0 | +| INFRA-02 | SSE endpoint delivers progress events before terminal | unit | `pnpm --filter @paperclipai/server test:run -- src/__tests__/content-jobs-sse.test.ts` | ❌ Wave 0 | +| INFRA-03 | storage.putFile with generated/ namespace stores bytes without size error | unit | `pnpm --filter @paperclipai/server test:run -- src/__tests__/content-jobs-storage.test.ts` | ❌ Wave 0 | +| INFRA-04 | Asset created by job runner includes sourceTaskId | unit | covered in content-jobs-routes.test.ts | ❌ Wave 0 | + +### Sampling Rate + +- **Per task commit:** `pnpm --filter @paperclipai/server test:run -- src/__tests__/content-jobs*.test.ts` +- **Per wave merge:** `pnpm test:run` +- **Phase gate:** Full suite green before `/gsd:verify-work` + +### Wave 0 Gaps + +- [ ] `server/src/__tests__/content-jobs-routes.test.ts` — covers INFRA-01, INFRA-04 +- [ ] `server/src/__tests__/content-jobs-sse.test.ts` — covers INFRA-02 +- [ ] `server/src/__tests__/content-jobs-storage.test.ts` — covers INFRA-03 + +*(No new test framework needed — vitest already configured for server.)* + +--- + +## Project Constraints (from CLAUDE.md) + +CLAUDE.md does not exist at the project root. The constraints below are derived from `STATE.md` key decisions which carry the same authority: + +1. **Async job pattern is mandatory** — all render requests return 202 + job ID immediately; never block HTTP on render +2. **`content_jobs` table must exist before any renderer is built** — this phase is the hard dependency for all others (phases 41–45) +3. **`sourceTaskId` required on every generated asset from day one** — prevents SSD orphan accumulation +4. **`MAX_GENERATED_ASSET_BYTES` constant bypasses the 10MB upload limit for `generated/` namespace** — separate from upload route +5. **Async pattern** — `renderPipelineService` stub must exist by end of phase (even as no-op) so phase 41 can extend it + +--- + +## Sources + +### Primary (HIGH confidence) + +- Codebase: `server/src/services/live-events.ts` — EventEmitter pub/sub pattern for SSE fan-out +- Codebase: `server/src/routes/voice.ts` — SSE headers pattern (`text/event-stream`, `flushHeaders`, `res.write`) +- Codebase: `packages/db/src/schema/plugin_jobs.ts` — job lifecycle table pattern (status, timestamps, logs) +- Codebase: `packages/db/src/schema/assets.ts` — asset table shape for INFRA-04 extension +- Codebase: `server/src/storage/service.ts` — `putFile` has no byte limit; limit is in multer (upload route only) +- Codebase: `packages/shared/src/constants.ts` — `LIVE_EVENT_TYPES` pattern to extend for job events +- Codebase: `server/src/attachment-types.ts` — `MAX_ATTACHMENT_BYTES` = 10MB; `MAX_GENERATED_ASSET_BYTES` to be added here +- Codebase: `packages/db/src/migrations/` — last migration is `0055_`; next is `0056_` +- Project STATE.md — locked architecture decisions + +### Secondary (MEDIUM confidence) + +- Pattern inference from `heartbeat_runs` / `plugin_job_runs` tables (same repo) for `content_jobs` shape + +### Tertiary (LOW confidence) + +None — all findings are from direct codebase inspection. + +--- + +## Metadata + +**Confidence breakdown:** +- Standard stack: HIGH — all verified from codebase +- Architecture: HIGH — directly modeled on existing patterns in same repo +- Pitfalls: HIGH — identified from direct code review of existing patterns + +**Research date:** 2026-04-04 +**Valid until:** 2026-05-04 (stable internal patterns) diff --git a/.planning/phases/40-job-infrastructure/40-VALIDATION.md b/.planning/phases/40-job-infrastructure/40-VALIDATION.md new file mode 100644 index 00000000..ee50dc74 --- /dev/null +++ b/.planning/phases/40-job-infrastructure/40-VALIDATION.md @@ -0,0 +1,79 @@ +--- +phase: 40 +slug: job-infrastructure +status: draft +nyquist_compliant: false +wave_0_complete: false +created: 2026-04-04 +--- + +# Phase 40 — Validation Strategy + +> Per-phase validation contract for feedback sampling during execution. + +--- + +## Test Infrastructure + +| Property | Value | +|----------|-------| +| **Framework** | vitest | +| **Config file** | vitest.config.ts | +| **Quick run command** | `pnpm test --run` | +| **Full suite command** | `pnpm test --run` | +| **Estimated runtime** | ~30 seconds | + +--- + +## Sampling Rate + +- **After every task commit:** Run `pnpm test --run` +- **After every plan wave:** Run `pnpm test --run` +- **Before `/gsd:verify-work`:** Full suite must be green +- **Max feedback latency:** 30 seconds + +--- + +## Per-Task Verification Map + +| Task ID | Plan | Wave | Requirement | Test Type | Automated Command | File Exists | Status | +|---------|------|------|-------------|-----------|-------------------|-------------|--------| +| 40-01-01 | 01 | 1 | INFRA-01 | unit | `pnpm test --run` | ❌ W0 | ⬜ pending | +| 40-01-02 | 01 | 1 | INFRA-02 | unit | `pnpm test --run` | ❌ W0 | ⬜ pending | +| 40-01-03 | 01 | 1 | INFRA-03 | unit | `pnpm test --run` | ❌ W0 | ⬜ pending | +| 40-01-04 | 01 | 1 | INFRA-04 | unit | `pnpm test --run` | ❌ W0 | ⬜ pending | + +*Status: ⬜ pending · ✅ green · ❌ red · ⚠️ flaky* + +--- + +## Wave 0 Requirements + +- [ ] Test stubs for content_jobs table operations +- [ ] Test stubs for SSE job progress events +- [ ] Test stubs for generated asset storage (bypassing multer) +- [ ] Test stubs for sourceTaskId asset linking + +*If none: "Existing infrastructure covers all phase requirements."* + +--- + +## Manual-Only Verifications + +| Behavior | Requirement | Why Manual | Test Instructions | +|----------|-------------|------------|-------------------| +| SSE events received in browser | INFRA-02 | Requires live browser connection | Submit job, observe SSE events in browser DevTools | +| Large file storage/retrieval | INFRA-03 | Requires actual 10MB+ file | Generate large asset, verify storage and retrieval | + +--- + +## Validation Sign-Off + +- [ ] All tasks have `` verify or Wave 0 dependencies +- [ ] Sampling continuity: no 3 consecutive tasks without automated verify +- [ ] Wave 0 covers all MISSING references +- [ ] No watch-mode flags +- [ ] Feedback latency < 30s +- [ ] `nyquist_compliant: true` set in frontmatter + +**Approval:** pending diff --git a/.planning/phases/40-job-infrastructure/40-VERIFICATION.md b/.planning/phases/40-job-infrastructure/40-VERIFICATION.md new file mode 100644 index 00000000..3e83f573 --- /dev/null +++ b/.planning/phases/40-job-infrastructure/40-VERIFICATION.md @@ -0,0 +1,129 @@ +--- +phase: 40-job-infrastructure +verified: 2026-04-04T12:48:00Z +status: gaps_found +score: 9/10 must-haves verified +gaps: + - truth: "GET /api/companies/:companyId/content-jobs/:jobId/events streams SSE events until terminal state then closes" + status: partial + reason: "SSE endpoint checks event.payload.status for terminal detection, but contentJobRunner publishes content_job.done with { jobId, assetId } and content_job.failed with { jobId, errorMessage } — neither payload contains a status field. The stream therefore never auto-closes when a running job completes; it stays open until the client disconnects (req.close). The initial-status fast-path (for already-terminal jobs) works correctly." + artifacts: + - path: "server/src/services/content-job-runner.ts" + issue: "publishLiveEvent payloads for content_job.done and content_job.failed omit 'status' field" + - path: "server/src/routes/content-jobs.ts" + issue: "Line 108: checks event.payload.status which is always undefined with current runner payloads" + missing: + - "Add status: 'done' to content_job.done payload in content-job-runner.ts" + - "Add status: 'failed' to content_job.failed payload in content-job-runner.ts" + - "OR change SSE terminal detection to use event.type === 'content_job.done' || event.type === 'content_job.failed'" +--- + +# Phase 40: Job Infrastructure Verification Report + +**Phase Goal:** Every content generation request returns a job ID immediately, progresses through a tracked lifecycle, and stores its output in namespaced storage — so nothing blocks and nothing is orphaned +**Verified:** 2026-04-04T12:48:00Z +**Status:** gaps_found +**Re-verification:** No — initial verification + +## Goal Achievement + +### Observable Truths + +| # | Truth | Status | Evidence | +|---|-------|--------|----------| +| 1 | content_jobs table exists in DB with queued/running/done/failed lifecycle columns | VERIFIED | packages/db/src/schema/content_jobs.ts has all lifecycle columns; migration 0046_tense_randall.sql applies CREATE TABLE | +| 2 | assets table has a source_task_id column for conversation linkage | VERIFIED | packages/db/src/schema/assets.ts line 18: sourceTaskId: text("source_task_id"); same migration adds column | +| 3 | LIVE_EVENT_TYPES includes content_job.queued, content_job.running, content_job.done, content_job.failed | VERIFIED | packages/shared/src/constants.ts lines 334-337 confirm all four entries | +| 4 | MAX_GENERATED_ASSET_BYTES constant exists and defaults to 500MB | VERIFIED | server/src/attachment-types.ts line 76-77: exports constant, defaults to 500 * 1024 * 1024 | +| 5 | contentJobStore service can create, get, list, and transition jobs | VERIFIED | server/src/services/content-job-store.ts exports all four methods backed by real Drizzle queries | +| 6 | contentJobRunner dispatches a job asynchronously without blocking, transitions through lifecycle, stores asset, publishes live events | VERIFIED | void runJob() pattern confirmed; transitions running→done/failed; putFile with namespace "generated"; assetService.create with sourceTaskId; publishLiveEvent called | +| 7 | POST /api/companies/:companyId/content-jobs returns 202 with jobId and status within 200ms | VERIFIED | Route handler returns res.status(202).json({ jobId, status, createdAt }); dispatch is fire-and-forget | +| 8 | GET /api/companies/:companyId/content-jobs/:jobId returns the job record with current status | VERIFIED | Route calls contentJobStore(db).getById, returns job or 404 | +| 9 | GET /api/companies/:companyId/content-jobs lists all jobs for a company ordered by createdAt desc | VERIFIED | Route calls listByCompany, Drizzle query uses orderBy(desc(contentJobs.createdAt)) | +| 10 | GET /api/companies/:companyId/content-jobs/:jobId/events streams SSE events until terminal state then closes | FAILED | Initial-status fast-path works for already-terminal jobs. For running jobs, SSE terminal detection checks event.payload.status but runner publishes { jobId, assetId } / { jobId, errorMessage } with no status field — stream will not auto-close on completion | + +**Score:** 9/10 truths verified + +### Required Artifacts + +| Artifact | Expected | Status | Details | +|----------|----------|--------|---------| +| `packages/db/src/schema/content_jobs.ts` | content_jobs table with lifecycle | VERIFIED | 33 lines, exports contentJobs pgTable + CONTENT_JOB_STATUSES, two indexes | +| `server/src/services/content-job-store.ts` | CRUD for content_jobs | VERIFIED | 37 lines, all four methods, real Drizzle queries | +| `server/src/services/content-job-runner.ts` | Async dispatcher with live events | VERIFIED | 88 lines, fire-and-forget dispatch, lifecycle transitions, asset storage, live events published | +| `server/src/routes/content-jobs.ts` | HTTP routes for job API | VERIFIED | 122 lines, all four endpoints implemented | +| `server/src/__tests__/content-jobs-routes.test.ts` | Route integration tests (min 50 lines) | VERIFIED | 190 lines, 8 tests covering 202/400/404/sourceTaskId | +| `server/src/__tests__/content-jobs-sse.test.ts` | SSE integration tests (min 30 lines) | VERIFIED | 139 lines, 5 tests covering content-type/initial event/terminal states | + +### Key Link Verification + +| From | To | Via | Status | Details | +|------|----|----|--------|---------| +| content-job-runner.ts | content-job-store.ts | store.transition() calls | WIRED | Lines 26, 57, 70 call store.transition() after await | +| content-job-runner.ts | live-events.ts | publishLiveEvent for content_job.* events | WIRED | Lines 27-31, 62-65, 74-78 publish running/done/failed events | +| content-job-runner.ts | assets.ts | assetService.create with sourceTaskId | WIRED | Lines 50-55: assetService(db).create with sourceTaskId: job.sourceTaskId | +| content-jobs.ts (route) | content-job-store.ts | contentJobStore(db) calls | WIRED | Lines 28, 51, 60, 82 all call contentJobStore(db) | +| content-jobs.ts (route) | content-job-runner.ts | contentJobRunner.dispatch() in POST handler | WIRED | Line 37: void contentJobRunner.dispatch(db, storage, job!) | +| content-jobs.ts (route) | live-events.ts | subscribeCompanyLiveEvents in SSE endpoint | WIRED | Line 102: subscribeCompanyLiveEvents() with req.on("close") cleanup | +| app.ts | content-jobs.ts | api.use mount | WIRED | Line 190: api.use(contentJobRoutes(db, opts.storageService)) | + +### Data-Flow Trace (Level 4) + +| Artifact | Data Variable | Source | Produces Real Data | Status | +|----------|---------------|--------|-------------------|--------| +| content-job-store.ts → create | row returned | db.insert(contentJobs).values().returning() | Yes — Drizzle INSERT with RETURNING | FLOWING | +| content-job-store.ts → getById | row or null | db.select().from(contentJobs).where(eq(id)) | Yes — Drizzle SELECT | FLOWING | +| content-job-store.ts → listByCompany | rows array | db.select().from(contentJobs).where().orderBy() | Yes — Drizzle SELECT | FLOWING | +| content-job-runner.ts → renderContent | buffer | Stub returning fixed Buffer.from("placeholder output") | Intentional stub — phases 41-45 add real renderers | STUB (documented, intentional) | +| content-jobs.ts (POST route) | job response | contentJobStore(db).create() | Yes — store backed by real Drizzle | FLOWING | + +### Behavioral Spot-Checks + +| Behavior | Command | Result | Status | +|----------|---------|--------|--------| +| All 13 integration tests pass | npx vitest run content-jobs-routes.test.ts content-jobs-sse.test.ts | 13/13 passed in 790ms | PASS | +| TypeScript compiles clean (db) | pnpm tsc --noEmit --project packages/db/tsconfig.json | Exit 0 | PASS | +| TypeScript compiles clean (shared) | pnpm tsc --noEmit --project packages/shared/tsconfig.json | Exit 0 | PASS | +| TypeScript compiles clean (server) | pnpm tsc --noEmit --project server/tsconfig.json | Exit 0 | PASS | +| Migration file contains content_jobs DDL | grep content_jobs migrations/*.sql | Found in 0046_tense_randall.sql | PASS | +| SSE stream auto-close for live jobs | (code inspection) | event.payload.status never set by runner | FAIL | + +### Requirements Coverage + +| Requirement | Source Plan | Description | Status | Evidence | +|-------------|------------|-------------|--------|----------| +| INFRA-01 | 40-01, 40-02 | System processes content generation jobs asynchronously with queued → running → done/failed lifecycle | SATISFIED | contentJobRunner.dispatch fires void runJob; transitions queued→running→done/failed; POST returns 202; tests verify 202/queued | +| INFRA-02 | 40-02 | System pushes job progress updates via SSE to connected clients | PARTIAL | SSE endpoint exists and streams initial status + live events. However, SSE does not auto-close for live jobs because runner payloads lack status field — stream hangs open until client disconnect | +| INFRA-03 | 40-01, 40-02 | Generated content stored in namespaced storage without size restrictions blocking video/images | SATISFIED | storage.putFile called with namespace: "generated"; MAX_GENERATED_ASSET_BYTES = 500MB; no blocking — async dispatch | +| INFRA-04 | 40-01, 40-02 | All generated content tracked in database with source conversation linkage | SATISFIED | assets.sourceTaskId column added; runner passes sourceTaskId: job.sourceTaskId to assetService.create; test verifies sourceTaskId persistence via POST then GET | + +### Anti-Patterns Found + +| File | Line | Pattern | Severity | Impact | +|------|------|---------|----------|--------| +| server/src/services/content-job-runner.ts | 11-21 | renderContent stub returning hardcoded buffer | Info | Intentional — documented in SUMMARY as placeholder for phases 41-45 | +| server/src/routes/content-jobs.ts | 108 | event.payload.status terminal check against payloads that never include status | Blocker | SSE stream for live jobs never auto-closes on completion — client sees no completion signal until disconnect | + +### Human Verification Required + +None required — all behavioral checks were resolved programmatically. + +## Gaps Summary + +One gap blocks complete goal achievement: + +**SSE stream does not auto-close when a running job reaches terminal state.** The `contentJobRunner` publishes `content_job.done` with `{ jobId, assetId }` and `content_job.failed` with `{ jobId, errorMessage }`. Neither payload contains a `status` field. The SSE route's terminal detection at line 108 reads `event.payload.status` and checks for `"done"` or `"failed"` — this will always be `undefined`, so `unsubscribe()` and `res.end()` are never called by the subscriber. The stream remains open indefinitely until the client disconnects. + +The initial-status fast-path works correctly: if a job is already terminal when the SSE connection opens, the stream closes immediately after sending the first event. Only the live-event-driven close path is broken. + +**Fix options (either resolves the gap):** + +1. In `content-job-runner.ts`, add `status: "done"` to the `content_job.done` payload and `status: "failed"` to the `content_job.failed` payload. +2. In `content-jobs.ts` (SSE route), replace the payload status check with `event.type === "content_job.done" || event.type === "content_job.failed"`. + +The `renderContent` stub is not a gap — it is intentionally deferred to phases 41-45 and documented as such. + +--- + +_Verified: 2026-04-04T12:48:00Z_ +_Verifier: Claude (gsd-verifier)_ diff --git a/.planning/research/FORMAT-CONVERSION.md b/.planning/research/FORMAT-CONVERSION.md new file mode 100644 index 00000000..a069efbc --- /dev/null +++ b/.planning/research/FORMAT-CONVERSION.md @@ -0,0 +1,846 @@ +# Format Conversion Ecosystem + +**Project:** Nexus v1.7 — supplemental research for two-tier format conversion system +**Researched:** 2026-04-04 +**Scope:** Direct conversion tools, format registry pattern, AI-bridged conversion boundary, UI patterns, security and performance pitfalls +**Confidence:** HIGH for tool choices, MEDIUM for version numbers (npm registry cross-checked) + +--- + +## Context: What Is Already Available + +These are confirmed installed and must not be re-added: + +| Package | Location | Version | Relevant To Conversion | +|---------|----------|---------|----------------------| +| `sharp ^0.34.5` | `server/` | 0.34.5 | Raster image conversion (resize, format, SVG→PNG) | +| `ffmpeg-static ^5.3.0` | `server/` | 5.3.0 | Audio/video conversion binary | +| `mermaid ^11.12.0` | `ui/` | 11.12.0 | Client-side Mermaid rendering | +| `playwright-chromium ^1.50.0` | `server/` | 1.50.0 | HTML→PDF already decided in STACK.md | + +--- + +## Tier 1: Direct Conversion Tools + +### Image Formats + +**Primary: `sharp ^0.34.5` (already installed)** + +Sharp handles the majority of image format pairs without any additional dependency: + +| Source | Target | Method | +|--------|--------|--------| +| JPEG/PNG/WebP/AVIF/TIFF/GIF | Any raster | `sharp(input).toFormat('webp').toBuffer()` | +| SVG | PNG/JPEG/WebP | `sharp(svgBuffer).png().toBuffer()` — libvips handles SVG via librsvg | +| PNG/JPEG | WebP/AVIF | `sharp(input).webp({ quality: 80 }).toBuffer()` | + +**Important SVG caveat:** sharp's SVG→PNG conversion uses librsvg. It works well for most SVGs but does NOT support all CSS features. For agent-generated SVGs with embedded fonts (produced by `satori`), use `@resvg/resvg-js` as specified in STACK.md. For user-uploaded SVGs without special fonts, `sharp` is sufficient. + +**No ImageMagick needed.** ImageMagick via CLI or WASM adds complexity: +- `imagemagick` npm is an unmaintained CLI wrapper (last release 2020) +- WASM ImageMagick (`@imagemagick/magick-wasm`) works but runs at ~0.3× native speed +- `sharp` via libvips is 4–5× faster than ImageMagick for every supported format pair +- For the format pairs Nexus needs (JPEG, PNG, WebP, AVIF, TIFF, SVG→raster), sharp covers everything + +**No Inkscape needed** for Nexus's scope (vector conversion beyond SVG→PNG is out of scope for v1.7). + +--- + +### Audio / Video Formats + +**Wrapper: `fluent-ffmpeg ^2.1.3`** + +**Important maintenance note:** The `fluent-ffmpeg` repository was archived on May 22, 2025. The package is no longer receiving new features. However: +- It remains published on npm and functional with Node.js >=18 +- The `ffmpeg-static ^5.3.0` binary it wraps is still actively maintained +- `@types/fluent-ffmpeg ^2.1.28` provides TypeScript types (last updated October 2025) +- For Nexus's use case (spawn ffmpeg with known args), the archived state is low risk +- Alternative: write a thin `child_process.spawn` wrapper directly — this is ~30 lines and removes the archived dependency entirely + +**Recommendation for Nexus:** Implement a minimal `ffmpegConvert(inputPath, outputPath, extraArgs)` wrapper using `child_process.spawn` instead of taking on the archived `fluent-ffmpeg`. The full fluent API is unnecessary — format conversion is a single `ffmpeg -i input.mp4 output.webm` call. + +```typescript +// server/src/services/converters/ffmpeg-converter.ts +import { spawn } from "child_process"; +import ffmpegPath from "ffmpeg-static"; + +export function ffmpegConvert( + inputPath: string, + outputPath: string, + extraArgs: string[] = [] +): Promise { + return new Promise((resolve, reject) => { + const proc = spawn(ffmpegPath!, [ + "-i", inputPath, + ...extraArgs, + "-y", // overwrite output + outputPath, + ]); + proc.on("close", (code) => + code === 0 ? resolve() : reject(new Error(`ffmpeg exited ${code}`)) + ); + proc.stderr.on("data", () => {}); // consume stderr to prevent backpressure + }); +} +``` + +**Format coverage via ffmpeg-static:** + +| Source | Target | Extra args | +|--------|--------|-----------| +| MP4/MKV/AVI/MOV | WebM | `["-c:v", "libvpx-vp9", "-c:a", "libopus"]` | +| MP4/WebM | MP3/AAC | `["-vn", "-c:a", "libmp3lame"]` (audio extract) | +| MP3/WAV/FLAC/OGG | MP3 | `["-c:a", "libmp3lame", "-b:a", "192k"]` | +| MP3/WAV/OGG | WAV | `["-c:a", "pcm_s16le"]` | +| Image sequence | MP4 | `["-r", "30", "-c:v", "libx264"]` | +| MP4 | GIF | `["-vf", "fps=10,scale=640:-1:flags=lanczos"]` | +| Any video | Audio-only MP3 | `["-vn"]` | + +--- + +### Documents + +#### DOCX to HTML: `mammoth ^1.12.0` + +Mammoth converts `.docx` → HTML with semantic preservation. It does NOT create DOCX. + +```bash +pnpm --filter @paperclipai/server add mammoth +``` + +```typescript +import mammoth from "mammoth"; + +const { value: html } = await mammoth.convertToHtml({ path: docxPath }); +// html is a clean HTML string; images are embedded as base64 data URIs by default +``` + +**Why mammoth over pandoc for DOCX→HTML:** Mammoth preserves heading hierarchy, tables, lists, and images correctly. Its output is cleaner HTML than pandoc's for Word documents. Single-purpose library, no system binary required. + +**TypeScript types:** Included in the package since v1.7 (`@types/mammoth` not needed). + +**Confidence: HIGH** — official npm, v1.12.0, actively maintained (last publish 20 days ago). + +--- + +#### Markdown → DOCX / PDF / HTML: Pandoc (system binary + thin wrapper) + +**Integration approach:** Pandoc is a Haskell binary — there is no pure-Node.js implementation. Existing Node.js wrappers are thin `child_process` shims: + +- `node-pandoc ^0.2.7` — 13K weekly downloads, most popular, but last updated 2021 +- `pandoc-ts` — TypeScript wrapper, smaller community +- **Recommended: write a 20-line `child_process.spawn` wrapper** — same as ffmpeg approach + +```typescript +// server/src/services/converters/pandoc-converter.ts +import { spawn } from "child_process"; + +export function pandocConvert( + inputPath: string, + outputPath: string, + from: string, + to: string +): Promise { + return new Promise((resolve, reject) => { + const proc = spawn("pandoc", [ + inputPath, + "-f", from, + "-t", to, + "-o", outputPath, + ]); + proc.on("close", (code) => + code === 0 ? resolve() : reject(new Error(`pandoc exited ${code}`)) + ); + }); +} +``` + +**Supported format pairs via pandoc:** + +| Source | Target | +|--------|--------| +| Markdown | DOCX, HTML, RST, LaTeX, EPUB | +| RST | Markdown, HTML, DOCX | +| HTML | Markdown, DOCX | +| LaTeX | HTML, Markdown | +| DOCX | Markdown (lossier than mammoth for HTML) | + +**System dependency:** pandoc must be installed on the Mac Mini. Install via `brew install pandoc`. Check at server startup with `which pandoc`; if absent, degrade gracefully. + +**Confidence: HIGH** — pandoc is the de-facto standard; brew install is 1 command; child_process wrapper is trivial. + +--- + +#### DOCX / ODT / PPTX → PDF: LibreOffice headless + +**Package: `libreoffice-convert ^1.8.1`** + +```bash +pnpm --filter @paperclipai/server add libreoffice-convert +# System dependency: brew install --cask libreoffice +``` + +```typescript +import { convertAsync } from "libreoffice-convert"; +import fs from "fs/promises"; + +const inputBuffer = await fs.readFile(docxPath); +const pdfBuffer = await convertAsync(inputBuffer, ".pdf", undefined); +``` + +**Format pairs:** + +| Source | Target | +|--------|--------| +| DOCX / DOC | PDF, ODT, HTML | +| PPTX / PPT | PDF, ODP | +| XLSX / XLS | PDF, ODS, CSV | +| ODT / ODP / ODS | PDF, DOCX, PPTX | + +**Performance warning:** LibreOffice launches a JVM-equivalent runtime on first call. Cold start: ~3-5 seconds. Warm subsequent calls: ~500ms. Serialize LibreOffice jobs (no concurrent renders) — run maximum one at a time. + +**System dependency:** LibreOffice must be installed at `/Applications/LibreOffice.app` on macOS. Check at server startup; degrade gracefully if absent. + +**Confidence: MEDIUM** — package v1.8.1 confirmed. macOS arm64 LibreOffice runs natively on M4 (confirmed via LibreOffice download page). Single source for LibreOffice npm package maintenance status. + +--- + +#### HTML → PDF: playwright-chromium (already decided in STACK.md) + +Use `playwright-chromium ^1.50.0` for HTML→PDF. Already researched; do not re-add. + +--- + +### Data Formats + +#### Spreadsheets: `xlsx` (SheetJS Community Edition) + +**Package: `xlsx ^0.20.x`** (SheetJS Community Edition) + +```bash +pnpm --filter @paperclipai/server add xlsx +``` + +**Format coverage:** + +| Source | Target | Method | +|--------|--------|--------| +| XLSX / XLS / ODS | CSV | `XLSX.utils.sheet_to_csv(ws)` | +| XLSX / XLS / ODS | JSON | `XLSX.utils.sheet_to_json(ws)` | +| CSV / JSON | XLSX | `XLSX.utils.json_to_sheet(data)` → `XLSX.writeFile(wb, path)` | + +SheetJS has ~7.8M weekly downloads and handles all Excel formats including legacy `.xls`. It is the default choice with no alternatives needed for basic spreadsheet conversion. + +**Licensing note:** SheetJS Community Edition is free (Apache 2.0 for historical versions; check current license at install time). SheetJS Pro adds streaming for very large files — not needed at single-user scale. + +**Confidence: MEDIUM-HIGH** — widely used, 7.8M weekly downloads confirmed. Version 0.20.x confirmed. License nuance warrants a check at install time. + +--- + +#### CSV Parsing: `csv-parse ^5.6.0` (part of the `csv` ecosystem) + +```bash +pnpm --filter @paperclipai/server add csv-parse +``` + +The `csv-parse ^6.2.1` package (latest as of April 2026) implements `stream.Transform` and supports both streaming and synchronous/callback modes. It includes TypeScript types. + +**When to use:** Parsing user-uploaded CSV files before transformation (CSV→JSON, CSV→XLSX). For generating CSV output from JSON/objects, use SheetJS or `csv-stringify` (same ecosystem as `csv-parse`). + +**Confidence: HIGH** — v6.2.1 confirmed from npm search. Maintained (last publish 4 days ago). + +--- + +#### JSON ↔ CSV: Use `csv-parse` + `csv-stringify` (not `json2csv`) + +The `json2csv` package is in maintenance mode at v6.0.0-alpha (3 years old). The `json-2-csv` package (v5.5.10, different package) is active but adds a dependency for something `csv-stringify` already handles. + +**Recommendation:** Use `csv-stringify ^6.x` (same ecosystem as `csv-parse`, same maintainer) for JSON→CSV. This avoids pulling in a separate package. + +```bash +pnpm --filter @paperclipai/server add csv-stringify +``` + +--- + +### Code Formats + +#### Code Formatting (JS/TS/CSS/HTML): `prettier ^3.x` + +```bash +pnpm --filter @paperclipai/server add --save-dev prettier +# For programmatic use in server: +pnpm --filter @paperclipai/server add prettier +``` + +Prettier exposes a programmatic API: + +```typescript +import { format } from "prettier"; + +const formatted = await format(sourceCode, { + parser: "typescript", // or "babel", "css", "html", "markdown", etc. + semi: true, + singleQuote: true, +}); +``` + +**Use case:** Agent generates code → prettier formats it before saving. Also enables code→code conversions like "reformat this JSON" or "convert CommonJS to ESM style." + +**Confidence: HIGH** — Prettier API is well-documented at prettier.io/docs/api. + +--- + +#### TypeScript Type Generation (JSON Schema → TypeScript): `json-schema-to-typescript ^15.x` + +For the AI-bridged case where an agent converts a JSON schema into TypeScript type definitions, this library handles it deterministically: + +```bash +pnpm --filter @paperclipai/server add json-schema-to-typescript +``` + +```typescript +import { compile } from "json-schema-to-typescript"; +const ts = await compile(jsonSchema, "MyType"); +``` + +**Confidence: MEDIUM** — widely used library; version verified as 15.x on npm (as of late 2025). Use for JSON Schema→TypeScript specifically; TypeScript→TypeScript reformatting uses the compiler API or prettier. + +--- + +## Format Coverage Matrix + +| Source → | PNG | JPEG | WebP | AVIF | SVG | PDF | DOCX | XLSX | CSV | JSON | MP4 | MP3 | WebM | +|----------|-----|------|------|------|-----|-----|------|------|-----|------|-----|-----|------| +| PNG | — | sharp | sharp | sharp | — | playwright | — | — | — | — | — | — | — | +| JPEG | sharp | — | sharp | sharp | — | playwright | — | — | — | — | — | — | — | +| WebP | sharp | sharp | — | sharp | — | playwright | — | — | — | — | — | — | — | +| SVG | sharp/@resvg | sharp/@resvg | sharp/@resvg | — | — | playwright | — | — | — | — | — | — | — | +| DOCX | — | — | — | — | — | LibreOffice | — | — | — | — | — | — | — | +| PPTX | — | — | — | — | — | LibreOffice | — | — | — | — | — | — | — | +| XLSX/XLS | — | — | — | — | — | LibreOffice | — | — | SheetJS | SheetJS | — | — | — | +| HTML | — | — | — | — | — | playwright | mammoth→† | — | — | — | — | — | — | +| Markdown | — | — | — | — | — | pandoc | pandoc | — | — | — | — | — | — | +| CSV | — | — | — | — | — | AI-bridged | — | SheetJS | — | csv-parse | — | — | — | +| JSON | — | — | — | — | — | AI-bridged | — | SheetJS | csv-stringify | — | — | — | — | +| MP4/MKV | — | — | — | — | — | — | — | — | — | — | — | ffmpeg | ffmpeg | +| MP3/WAV | — | — | — | — | — | — | — | — | — | — | — | — | — | +| WAV/OGG | — | — | — | — | — | — | — | — | — | — | — | ffmpeg | — | + +† HTML→DOCX requires pandoc (mammoth is one-way: DOCX→HTML only) + +**AI-bridged**: Format pairs without a deterministic tool path. See Tier 2 below. + +--- + +## Tier 2: Format Registry Pattern + +### Dispatch Table Design + +The registry is a map of `"source/target"` → handler function. This is simpler than a class hierarchy and matches the existing Nexus factory function pattern. + +```typescript +// server/src/services/converters/registry.ts + +export type ConversionHandler = ( + inputPath: string, + outputPath: string, + opts?: Record +) => Promise; + +export type ConversionCapability = "direct" | "ai-bridged" | "unavailable"; + +export interface ConversionRoute { + capability: ConversionCapability; + handler?: ConversionHandler; // present when capability = "direct" + aiHint?: string; // present when capability = "ai-bridged" + requiresSystemDep?: string; // e.g. "pandoc", "libreoffice" +} + +// Key format: "source.ext/target.ext" — always lowercase, no leading dot +const registry = new Map(); + +export function registerConverter( + sourceExt: string, + targetExt: string, + route: ConversionRoute +): void { + registry.set(`${sourceExt}/${targetExt}`, route); +} + +export function getConverter(sourceExt: string, targetExt: string): ConversionRoute { + return registry.get(`${sourceExt}/${targetExt}`) ?? { capability: "unavailable" }; +} + +export function listSupportedTargets(sourceExt: string): string[] { + const results: string[] = []; + for (const [key, route] of registry.entries()) { + if (key.startsWith(`${sourceExt}/`) && route.capability !== "unavailable") { + results.push(key.split("/")[1]); + } + } + return results; +} +``` + +**Registration (in server startup):** + +```typescript +// server/src/services/converters/index.ts +import { registerConverter } from "./registry"; +import { sharpConvert } from "./sharp-converter"; +import { ffmpegConvert } from "./ffmpeg-converter"; +import { pandocConvert } from "./pandoc-converter"; + +// Image +registerConverter("png", "webp", { capability: "direct", handler: (i, o) => sharpConvert(i, o, "webp") }); +registerConverter("jpg", "webp", { capability: "direct", handler: (i, o) => sharpConvert(i, o, "webp") }); +registerConverter("svg", "png", { capability: "direct", handler: (i, o) => sharpConvert(i, o, "png") }); + +// Documents +registerConverter("docx", "html", { capability: "direct", handler: mammothConvert, requiresSystemDep: undefined }); +registerConverter("docx", "pdf", { capability: "direct", handler: libreofficeConvert, requiresSystemDep: "libreoffice" }); +registerConverter("md", "docx", { capability: "direct", handler: (i, o) => pandocConvert(i, o, "markdown", "docx"), requiresSystemDep: "pandoc" }); + +// Data +registerConverter("xlsx", "csv", { capability: "direct", handler: sheetjsConvert }); +registerConverter("csv", "xlsx", { capability: "direct", handler: sheetjsConvert }); +registerConverter("csv", "pdf", { capability: "ai-bridged", aiHint: "format as a formatted table report PDF" }); +registerConverter("json", "pdf", { capability: "ai-bridged", aiHint: "render as a structured document report" }); + +// Audio/Video +registerConverter("mp4", "webm", { capability: "direct", handler: (i, o) => ffmpegConvert(i, o, ["-c:v", "libvpx-vp9"]) }); +registerConverter("mp3", "wav", { capability: "direct", handler: (i, o) => ffmpegConvert(i, o, ["-c:a", "pcm_s16le"]) }); +``` + +**Extensibility:** Adding a new format pair is one `registerConverter()` call. The route handler and the registry are decoupled. System dependency checks happen at `registerConverter()` time, not at request time — unavailable converters are registered as `capability: "unavailable"` when their system dep is absent. + +--- + +## Tier 2: AI-Bridged Conversion + +### When to Use AI (vs Direct Tool) + +| Criterion | Direct Tool | AI-Bridged | +|-----------|-------------|------------| +| Output is byte-for-byte deterministic | Yes | No | +| Format pair has an established tool | Yes | No | +| Conversion is purely structural (no semantic change) | Yes | — | +| Conversion requires understanding content meaning | — | Yes | +| Source format is machine-readable but lacks a direct path | — | Yes | +| Example pairs | PNG→WebP, DOCX→PDF, MP4→WebM | CSV→PDF report, JSON→DOCX narrative, schema→TypeScript | + +**Decision rule:** + +> Use direct tool when: `getConverter(src, tgt).capability === "direct"`. +> Use AI when: capability is `"ai-bridged"` AND the source is a text/data format that an LLM can read as context. +> Return `"unavailable"` error when: capability is `"unavailable"` (binary formats with no path, e.g. PDF→XLSX). + +**AI-bridged is NOT a fallback for when a tool is missing.** If LibreOffice is not installed, DOCX→PDF is `"unavailable"`, not `"ai-bridged"`. AI-bridged is only for semantically complex conversions where no deterministic tool exists. + +--- + +### AI-Bridged Prompt Structure + +The prompt must be deterministic in its output format requirements. Vague prompts produce vague output. + +```typescript +// server/src/services/converters/ai-bridged-converter.ts + +export async function aiBridgedConvert( + sourceExt: string, + targetExt: string, + sourceContent: string, // text content of the source file + outputPath: string, + aiHint: string, // from registry route + agentAdapter: AgentAdapter // existing adapter interface +): Promise { + const prompt = buildConversionPrompt(sourceExt, targetExt, sourceContent, aiHint); + const result = await agentAdapter.complete(prompt); + await writeConversionResult(result, targetExt, outputPath); +} + +function buildConversionPrompt( + sourceExt: string, + targetExt: string, + sourceContent: string, + aiHint: string +): string { + const outputSpec = OUTPUT_SPEC[targetExt] ?? "the target format"; + return `Convert the following ${sourceExt.toUpperCase()} content to ${targetExt.toUpperCase()}. + +Instruction: ${aiHint} + +Requirements: +- Output ONLY the converted content, no explanation, no preamble +- Output format: ${outputSpec} +- Preserve all data values exactly; do not summarize or truncate + +Source content: +\`\`\` +${sourceContent} +\`\`\``; +} + +const OUTPUT_SPEC: Record = { + html: "Valid HTML5. No , no / wrapper. Only the content fragment.", + md: "GitHub Flavored Markdown.", + ts: "Valid TypeScript. No imports unless required. Export all types.", + pdf: "HTML that will be rendered to PDF. Use inline