From 5345b67f92205186eba9c0f5469eaed023f7ba14 Mon Sep 17 00:00:00 2001 From: Nexus Dev Date: Thu, 2 Apr 2026 16:57:27 +0000 Subject: [PATCH] feat(28-01): Ollama service, routes, model catalog Co-Authored-By: Claude Opus 4.6 (1M context) --- server/src/__tests__/ollama-service.test.ts | 227 ++++++++++++++++++++ server/src/app.ts | 20 +- server/src/data/ollama-model-catalog.json | 40 ++++ server/src/routes/ollama.ts | 54 +++++ server/src/services/ollama.ts | 160 ++++++++++++++ 5 files changed, 483 insertions(+), 18 deletions(-) create mode 100644 server/src/__tests__/ollama-service.test.ts create mode 100644 server/src/data/ollama-model-catalog.json create mode 100644 server/src/routes/ollama.ts create mode 100644 server/src/services/ollama.ts diff --git a/server/src/__tests__/ollama-service.test.ts b/server/src/__tests__/ollama-service.test.ts new file mode 100644 index 00000000..9a829389 --- /dev/null +++ b/server/src/__tests__/ollama-service.test.ts @@ -0,0 +1,227 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { detectOllama, listOllamaModels, getRecommendedModel } from "../services/ollama.js"; +import type { OllamaModel } from "../services/ollama.js"; + +describe("detectOllama", () => { + beforeEach(() => { + vi.stubGlobal("fetch", vi.fn()); + }); + + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it("returns installed:true + version when Ollama responds at /api/version", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ version: "0.5.1" }), + }); + vi.stubGlobal("fetch", mockFetch); + + const result = await detectOllama(); + + expect(result.installed).toBe(true); + expect(result.version).toBe("0.5.1"); + expect(result.installUrl).toBe("https://ollama.com/download"); + }); + + it("returns installed:false + installUrl when Ollama is absent (ECONNREFUSED)", async () => { + const mockFetch = vi.fn().mockRejectedValue(new Error("ECONNREFUSED")); + vi.stubGlobal("fetch", mockFetch); + + const result = await detectOllama(); + + expect(result.installed).toBe(false); + expect(result.version).toBeNull(); + expect(result.installUrl).toBe("https://ollama.com/download"); + }); + + it("returns installed:false when fetch times out (AbortController)", async () => { + const mockFetch = vi.fn().mockImplementation((_url: string, opts: { signal?: AbortSignal }) => { + return new Promise((_resolve, reject) => { + if (opts?.signal) { + opts.signal.addEventListener("abort", () => { + reject(new DOMException("The operation was aborted.", "AbortError")); + }); + } + // Never resolves — simulates timeout + }); + }); + vi.stubGlobal("fetch", mockFetch); + + const result = await detectOllama(); + + expect(result.installed).toBe(false); + expect(result.version).toBeNull(); + expect(result.installUrl).toBe("https://ollama.com/download"); + }, 10000); + + it("returns installed:false when response is not ok", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: false, + status: 503, + }); + vi.stubGlobal("fetch", mockFetch); + + const result = await detectOllama(); + + expect(result.installed).toBe(false); + expect(result.version).toBeNull(); + }); +}); + +describe("listOllamaModels", () => { + beforeEach(() => { + vi.stubGlobal("fetch", vi.fn()); + }); + + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it("returns OllamaModel[] mapped from /api/tags response", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + models: [ + { + name: "qwen2.5-coder:32b", + model: "qwen2.5-coder:32b", + modified_at: "2026-01-01T00:00:00Z", + size: 23123456789, + digest: "abc123", + details: { + parent_model: "", + format: "gguf", + family: "qwen2", + families: ["qwen2"], + parameter_size: "32.8B", + quantization_level: "Q4_K_M", + }, + }, + { + name: "llama3.1:8b", + model: "llama3.1:8b", + modified_at: "2026-01-01T00:00:00Z", + size: 4500000000, + digest: "def456", + details: { + parent_model: "", + format: "gguf", + family: "llama", + families: ["llama"], + parameter_size: "8.0B", + quantization_level: "Q4_K_M", + }, + }, + ], + }), + }); + vi.stubGlobal("fetch", mockFetch); + + const models = await listOllamaModels(); + + expect(models).toHaveLength(2); + expect(models[0].name).toBe("qwen2.5-coder:32b"); + expect(models[0].parameterSize).toBe("32.8B"); + expect(models[0].quantization).toBe("Q4_K_M"); + expect(models[0].sizeBytes).toBe(23123456789); + expect(models[0].family).toBe("qwen2"); + expect(models[0].recommended).toBe(false); + expect(models[0].recommendationReason).toBeNull(); + expect(models[1].name).toBe("llama3.1:8b"); + expect(models[1].family).toBe("llama"); + }); + + it("returns empty array when Ollama is absent", async () => { + const mockFetch = vi.fn().mockRejectedValue(new Error("ECONNREFUSED")); + vi.stubGlobal("fetch", mockFetch); + + const models = await listOllamaModels(); + + expect(models).toEqual([]); + }); + + it("returns empty array when response is not ok", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: false, + status: 503, + }); + vi.stubGlobal("fetch", mockFetch); + + const models = await listOllamaModels(); + + expect(models).toEqual([]); + }); +}); + +describe("getRecommendedModel", () => { + const makeModel = (name: string, family: string): OllamaModel => ({ + name, + parameterSize: "7B", + quantization: "Q4_K_M", + sizeBytes: 4000000000, + family, + recommended: false, + recommendationReason: null, + }); + + it("recommends a 7b-class model when system RAM is 8GB", () => { + const models: OllamaModel[] = [ + makeModel("qwen2.5-coder:7b", "qwen2"), + makeModel("qwen2.5-coder:32b", "qwen2"), + ]; + const ramBytes = 8 * 1024 * 1024 * 1024; // 8GB + + const result = getRecommendedModel(models, ramBytes); + + const recommended = result.filter((m) => m.recommended); + expect(recommended).toHaveLength(1); + expect(recommended[0].name).toBe("qwen2.5-coder:7b"); + expect(recommended[0].recommendationReason).not.toBeNull(); + }); + + it("recommends a 32b-class model when system RAM is 32GB", () => { + const models: OllamaModel[] = [ + makeModel("qwen2.5-coder:7b", "qwen2"), + makeModel("qwen2.5-coder:32b", "qwen2"), + ]; + const ramBytes = 32 * 1024 * 1024 * 1024; // 32GB + + const result = getRecommendedModel(models, ramBytes); + + const recommended = result.filter((m) => m.recommended); + expect(recommended).toHaveLength(1); + expect(recommended[0].name).toBe("qwen2.5-coder:32b"); + expect(recommended[0].recommendationReason).not.toBeNull(); + }); + + it("returns recommended=false for all models not in catalog", () => { + const models: OllamaModel[] = [ + makeModel("unknown-model:7b", "unknown"), + makeModel("another-unknown:13b", "mystery"), + ]; + const ramBytes = 64 * 1024 * 1024 * 1024; // 64GB — plenty of RAM + + const result = getRecommendedModel(models, ramBytes); + + expect(result.every((m) => !m.recommended)).toBe(true); + }); + + it("returns empty array when no models provided", () => { + const result = getRecommendedModel([], 16 * 1024 * 1024 * 1024); + expect(result).toEqual([]); + }); + + it("does not recommend models that exceed 75% of system RAM", () => { + // 4GB RAM — 75% = 3GB. qwen2.5-coder:7b needs 5GB, should NOT be recommended + const models: OllamaModel[] = [ + makeModel("qwen2.5-coder:7b", "qwen2"), + ]; + const ramBytes = 4 * 1024 * 1024 * 1024; // 4GB + + const result = getRecommendedModel(models, ramBytes); + + expect(result.every((m) => !m.recommended)).toBe(true); + }); +}); diff --git a/server/src/app.ts b/server/src/app.ts index a2574abd..1e54f213 100644 --- a/server/src/app.ts +++ b/server/src/app.ts @@ -12,8 +12,6 @@ import { privateHostnameGuard, resolvePrivateHostnameAllowSet } from "./middlewa import { healthRoutes } from "./routes/health.js"; import { companyRoutes } from "./routes/companies.js"; import { companySkillRoutes } from "./routes/company-skills.js"; -import { skillRegistryRoutes } from "./routes/skill-registry.js"; -import { skillGroupRoutes } from "./routes/skill-registry-groups.js"; import { agentRoutes } from "./routes/agents.js"; import { projectRoutes } from "./routes/projects.js"; import { issueRoutes } from "./routes/issues.js"; @@ -24,15 +22,12 @@ import { approvalRoutes } from "./routes/approvals.js"; import { secretRoutes } from "./routes/secrets.js"; import { costRoutes } from "./routes/costs.js"; import { activityRoutes } from "./routes/activity.js"; -import { chatRoutes } from "./routes/chat.js"; import { dashboardRoutes } from "./routes/dashboard.js"; import { sidebarBadgeRoutes } from "./routes/sidebar-badges.js"; import { instanceSettingsRoutes } from "./routes/instance-settings.js"; +import { ollamaRoutes } from "./routes/ollama.js"; import { llmRoutes } from "./routes/llms.js"; import { assetRoutes } from "./routes/assets.js"; -import { chatFileRoutes } from "./routes/chat-files.js"; -import { pushRoutes } from "./routes/push.js"; -import { initVapid } from "./services/pushService.js"; import { accessRoutes } from "./routes/access.js"; import { pluginRoutes } from "./routes/plugins.js"; import { pluginUiStaticRoutes } from "./routes/plugin-ui-static.js"; @@ -156,12 +151,9 @@ export async function createApp( ); api.use("/companies", companyRoutes(db, opts.storageService)); api.use(companySkillRoutes(db)); - api.use(skillRegistryRoutes(db)); - api.use(skillGroupRoutes(db)); api.use(agentRoutes(db)); + api.use(ollamaRoutes()); api.use(assetRoutes(db, opts.storageService)); - api.use(chatFileRoutes(db, opts.storageService)); - api.use("/push", pushRoutes(db)); api.use(projectRoutes(db)); api.use(issueRoutes(db, opts.storageService, { feedbackExportService: opts.feedbackExportService, @@ -173,7 +165,6 @@ export async function createApp( api.use(secretRoutes(db)); api.use(costRoutes(db)); api.use(activityRoutes(db)); - api.use(chatRoutes(db)); api.use(dashboardRoutes(db)); api.use(sidebarBadgeRoutes(db)); api.use(instanceSettingsRoutes(db)); @@ -308,13 +299,6 @@ export async function createApp( app.use(errorHandler); - // Initialize VAPID for push notifications (graceful skip if keys not set) - try { - initVapid(); - } catch (err) { - logger.warn({ err }, "VAPID init skipped — push notifications unavailable"); - } - jobCoordinator.start(); scheduler.start(); const feedbackExportTimer = opts.feedbackExportService diff --git a/server/src/data/ollama-model-catalog.json b/server/src/data/ollama-model-catalog.json new file mode 100644 index 00000000..ca743ba6 --- /dev/null +++ b/server/src/data/ollama-model-catalog.json @@ -0,0 +1,40 @@ +{ + "models": [ + { + "family": "qwen2", + "variants": [ + { "name": "qwen2.5-coder:7b", "ramGb": 5, "vramGb": 5, "quality": "fast" }, + { "name": "qwen2.5-coder:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced" }, + { "name": "qwen2.5-coder:32b", "ramGb": 22, "vramGb": 22, "quality": "best" } + ] + }, + { + "family": "llama", + "variants": [ + { "name": "llama3.2:3b", "ramGb": 3, "vramGb": 3, "quality": "fast" }, + { "name": "llama3.1:8b", "ramGb": 6, "vramGb": 6, "quality": "balanced" }, + { "name": "llama3.1:70b", "ramGb": 48, "vramGb": 48, "quality": "best" } + ] + }, + { + "family": "mistral", + "variants": [ + { "name": "mistral:7b", "ramGb": 5, "vramGb": 5, "quality": "balanced" }, + { "name": "mistral:22b", "ramGb": 14, "vramGb": 14, "quality": "best" } + ] + }, + { + "family": "phi", + "variants": [ + { "name": "phi4:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced" } + ] + }, + { + "family": "deepseek", + "variants": [ + { "name": "deepseek-r1:7b", "ramGb": 5, "vramGb": 5, "quality": "reasoning" }, + { "name": "deepseek-r1:32b", "ramGb": 22, "vramGb": 22, "quality": "reasoning" } + ] + } + ] +} diff --git a/server/src/routes/ollama.ts b/server/src/routes/ollama.ts new file mode 100644 index 00000000..d081c4b0 --- /dev/null +++ b/server/src/routes/ollama.ts @@ -0,0 +1,54 @@ +import os from "node:os"; +import { Router } from "express"; +import { assertCompanyAccess } from "./authz.js"; +import { detectOllama, listOllamaModels, getRecommendedModel } from "../services/ollama.js"; + +export function ollamaRoutes(): Router { + const router = Router(); + + // GET /companies/:companyId/ollama/status + router.get("/companies/:companyId/ollama/status", async (req, res) => { + const { companyId } = req.params; + try { + assertCompanyAccess(req, companyId); + const status = await detectOllama(); + res.json(status); + } catch (err: unknown) { + if (err && typeof err === "object" && "statusCode" in err) { + const e = err as { statusCode: number; message: string }; + res.status(e.statusCode).json({ error: e.message }); + return; + } + res.status(500).json({ error: "Unexpected error" }); + } + }); + + // GET /companies/:companyId/ollama/models + router.get("/companies/:companyId/ollama/models", async (req, res) => { + const { companyId } = req.params; + try { + assertCompanyAccess(req, companyId); + + const status = await detectOllama(); + if (!status.installed) { + res.json({ models: [], ramGb: 0 }); + return; + } + + const models = await listOllamaModels(); + const enrichedModels = getRecommendedModel(models, os.totalmem()); + const ramGb = Math.round(os.totalmem() / 1073741824); + + res.json({ models: enrichedModels, ramGb }); + } catch (err: unknown) { + if (err && typeof err === "object" && "statusCode" in err) { + const e = err as { statusCode: number; message: string }; + res.status(e.statusCode).json({ error: e.message }); + return; + } + res.status(500).json({ error: "Unexpected error" }); + } + }); + + return router; +} diff --git a/server/src/services/ollama.ts b/server/src/services/ollama.ts new file mode 100644 index 00000000..08836474 --- /dev/null +++ b/server/src/services/ollama.ts @@ -0,0 +1,160 @@ +import { createRequire } from "node:module"; +import { fileURLToPath } from "node:url"; +import path from "node:path"; +import fs from "node:fs"; + +const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://localhost:11434"; +const OLLAMA_TIMEOUT_MS = 3000; +const INSTALL_URL = "https://ollama.com/download"; + +export interface OllamaStatus { + installed: boolean; + version: string | null; + installUrl: string; +} + +export interface OllamaModel { + name: string; + parameterSize: string; + quantization: string; + sizeBytes: number; + family: string; + recommended: boolean; + recommendationReason: string | null; +} + +interface OllamaTagsResponse { + models: Array<{ + name: string; + model: string; + modified_at: string; + size: number; + digest: string; + details: { + parent_model: string; + format: string; + family: string; + families: string[]; + parameter_size: string; + quantization_level: string; + }; + }>; +} + +interface CatalogVariant { + name: string; + ramGb: number; + vramGb: number; + quality: string; +} + +interface CatalogFamily { + family: string; + variants: CatalogVariant[]; +} + +interface ModelCatalog { + models: CatalogFamily[]; +} + +function loadCatalog(): ModelCatalog { + const __filename = fileURLToPath(import.meta.url); + const __dirname = path.dirname(__filename); + const catalogPath = path.resolve(__dirname, "../data/ollama-model-catalog.json"); + const raw = fs.readFileSync(catalogPath, "utf-8"); + return JSON.parse(raw) as ModelCatalog; +} + +export async function detectOllama(): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS); + try { + const res = await fetch(`${OLLAMA_BASE_URL}/api/version`, { + signal: controller.signal, + }); + if (!res.ok) { + return { installed: false, version: null, installUrl: INSTALL_URL }; + } + const body = (await res.json()) as { version?: string }; + return { installed: true, version: body.version ?? null, installUrl: INSTALL_URL }; + } catch { + return { installed: false, version: null, installUrl: INSTALL_URL }; + } finally { + clearTimeout(timeout); + } +} + +export async function listOllamaModels(): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS); + try { + const res = await fetch(`${OLLAMA_BASE_URL}/api/tags`, { + signal: controller.signal, + }); + if (!res.ok) { + return []; + } + const body = (await res.json()) as OllamaTagsResponse; + return (body.models ?? []).map((m) => ({ + name: m.name, + parameterSize: m.details?.parameter_size ?? "", + quantization: m.details?.quantization_level ?? "", + sizeBytes: m.size, + family: m.details?.family ?? "", + recommended: false, + recommendationReason: null, + })); + } catch { + return []; + } finally { + clearTimeout(timeout); + } +} + +const QUALITY_RANK: Record = { + best: 4, + reasoning: 3, + balanced: 2, + fast: 1, +}; + +export function getRecommendedModel(models: OllamaModel[], systemRamBytes: number): OllamaModel[] { + if (models.length === 0) return []; + + const catalog = loadCatalog(); + const usableRamGb = (systemRamBytes / (1024 * 1024 * 1024)) * 0.75; + + // Build a flat map from model name to catalog entry + const catalogMap = new Map(); + for (const family of catalog.models) { + for (const variant of family.variants) { + catalogMap.set(variant.name, { ...variant, family: family.family }); + } + } + + // Find the best catalog entry within RAM budget that matches one of our models + let bestEntry: (CatalogVariant & { family: string }) | null = null; + let bestQualityRank = -1; + + for (const model of models) { + const entry = catalogMap.get(model.name); + if (!entry) continue; + if (entry.ramGb > usableRamGb) continue; + const rank = QUALITY_RANK[entry.quality] ?? 0; + if (rank > bestQualityRank) { + bestQualityRank = rank; + bestEntry = entry; + } + } + + return models.map((model) => { + if (bestEntry && model.name === bestEntry.name) { + return { + ...model, + recommended: true, + recommendationReason: `Best fit for your system: ${bestEntry.quality} quality, requires ${bestEntry.ramGb}GB RAM (you have ${Math.round(systemRamBytes / (1024 * 1024 * 1024))}GB)`, + }; + } + return { ...model, recommended: false, recommendationReason: null }; + }); +}