feat(28-01): Ollama service, routes, model catalog

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nexus Dev 2026-04-02 16:57:27 +00:00
parent ed085737e3
commit 5345b67f92
5 changed files with 483 additions and 18 deletions

View file

@ -0,0 +1,227 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { detectOllama, listOllamaModels, getRecommendedModel } from "../services/ollama.js";
import type { OllamaModel } from "../services/ollama.js";
describe("detectOllama", () => {
beforeEach(() => {
vi.stubGlobal("fetch", vi.fn());
});
afterEach(() => {
vi.unstubAllGlobals();
});
it("returns installed:true + version when Ollama responds at /api/version", async () => {
const mockFetch = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({ version: "0.5.1" }),
});
vi.stubGlobal("fetch", mockFetch);
const result = await detectOllama();
expect(result.installed).toBe(true);
expect(result.version).toBe("0.5.1");
expect(result.installUrl).toBe("https://ollama.com/download");
});
it("returns installed:false + installUrl when Ollama is absent (ECONNREFUSED)", async () => {
const mockFetch = vi.fn().mockRejectedValue(new Error("ECONNREFUSED"));
vi.stubGlobal("fetch", mockFetch);
const result = await detectOllama();
expect(result.installed).toBe(false);
expect(result.version).toBeNull();
expect(result.installUrl).toBe("https://ollama.com/download");
});
it("returns installed:false when fetch times out (AbortController)", async () => {
const mockFetch = vi.fn().mockImplementation((_url: string, opts: { signal?: AbortSignal }) => {
return new Promise<never>((_resolve, reject) => {
if (opts?.signal) {
opts.signal.addEventListener("abort", () => {
reject(new DOMException("The operation was aborted.", "AbortError"));
});
}
// Never resolves — simulates timeout
});
});
vi.stubGlobal("fetch", mockFetch);
const result = await detectOllama();
expect(result.installed).toBe(false);
expect(result.version).toBeNull();
expect(result.installUrl).toBe("https://ollama.com/download");
}, 10000);
it("returns installed:false when response is not ok", async () => {
const mockFetch = vi.fn().mockResolvedValue({
ok: false,
status: 503,
});
vi.stubGlobal("fetch", mockFetch);
const result = await detectOllama();
expect(result.installed).toBe(false);
expect(result.version).toBeNull();
});
});
describe("listOllamaModels", () => {
beforeEach(() => {
vi.stubGlobal("fetch", vi.fn());
});
afterEach(() => {
vi.unstubAllGlobals();
});
it("returns OllamaModel[] mapped from /api/tags response", async () => {
const mockFetch = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({
models: [
{
name: "qwen2.5-coder:32b",
model: "qwen2.5-coder:32b",
modified_at: "2026-01-01T00:00:00Z",
size: 23123456789,
digest: "abc123",
details: {
parent_model: "",
format: "gguf",
family: "qwen2",
families: ["qwen2"],
parameter_size: "32.8B",
quantization_level: "Q4_K_M",
},
},
{
name: "llama3.1:8b",
model: "llama3.1:8b",
modified_at: "2026-01-01T00:00:00Z",
size: 4500000000,
digest: "def456",
details: {
parent_model: "",
format: "gguf",
family: "llama",
families: ["llama"],
parameter_size: "8.0B",
quantization_level: "Q4_K_M",
},
},
],
}),
});
vi.stubGlobal("fetch", mockFetch);
const models = await listOllamaModels();
expect(models).toHaveLength(2);
expect(models[0].name).toBe("qwen2.5-coder:32b");
expect(models[0].parameterSize).toBe("32.8B");
expect(models[0].quantization).toBe("Q4_K_M");
expect(models[0].sizeBytes).toBe(23123456789);
expect(models[0].family).toBe("qwen2");
expect(models[0].recommended).toBe(false);
expect(models[0].recommendationReason).toBeNull();
expect(models[1].name).toBe("llama3.1:8b");
expect(models[1].family).toBe("llama");
});
it("returns empty array when Ollama is absent", async () => {
const mockFetch = vi.fn().mockRejectedValue(new Error("ECONNREFUSED"));
vi.stubGlobal("fetch", mockFetch);
const models = await listOllamaModels();
expect(models).toEqual([]);
});
it("returns empty array when response is not ok", async () => {
const mockFetch = vi.fn().mockResolvedValue({
ok: false,
status: 503,
});
vi.stubGlobal("fetch", mockFetch);
const models = await listOllamaModels();
expect(models).toEqual([]);
});
});
describe("getRecommendedModel", () => {
const makeModel = (name: string, family: string): OllamaModel => ({
name,
parameterSize: "7B",
quantization: "Q4_K_M",
sizeBytes: 4000000000,
family,
recommended: false,
recommendationReason: null,
});
it("recommends a 7b-class model when system RAM is 8GB", () => {
const models: OllamaModel[] = [
makeModel("qwen2.5-coder:7b", "qwen2"),
makeModel("qwen2.5-coder:32b", "qwen2"),
];
const ramBytes = 8 * 1024 * 1024 * 1024; // 8GB
const result = getRecommendedModel(models, ramBytes);
const recommended = result.filter((m) => m.recommended);
expect(recommended).toHaveLength(1);
expect(recommended[0].name).toBe("qwen2.5-coder:7b");
expect(recommended[0].recommendationReason).not.toBeNull();
});
it("recommends a 32b-class model when system RAM is 32GB", () => {
const models: OllamaModel[] = [
makeModel("qwen2.5-coder:7b", "qwen2"),
makeModel("qwen2.5-coder:32b", "qwen2"),
];
const ramBytes = 32 * 1024 * 1024 * 1024; // 32GB
const result = getRecommendedModel(models, ramBytes);
const recommended = result.filter((m) => m.recommended);
expect(recommended).toHaveLength(1);
expect(recommended[0].name).toBe("qwen2.5-coder:32b");
expect(recommended[0].recommendationReason).not.toBeNull();
});
it("returns recommended=false for all models not in catalog", () => {
const models: OllamaModel[] = [
makeModel("unknown-model:7b", "unknown"),
makeModel("another-unknown:13b", "mystery"),
];
const ramBytes = 64 * 1024 * 1024 * 1024; // 64GB — plenty of RAM
const result = getRecommendedModel(models, ramBytes);
expect(result.every((m) => !m.recommended)).toBe(true);
});
it("returns empty array when no models provided", () => {
const result = getRecommendedModel([], 16 * 1024 * 1024 * 1024);
expect(result).toEqual([]);
});
it("does not recommend models that exceed 75% of system RAM", () => {
// 4GB RAM — 75% = 3GB. qwen2.5-coder:7b needs 5GB, should NOT be recommended
const models: OllamaModel[] = [
makeModel("qwen2.5-coder:7b", "qwen2"),
];
const ramBytes = 4 * 1024 * 1024 * 1024; // 4GB
const result = getRecommendedModel(models, ramBytes);
expect(result.every((m) => !m.recommended)).toBe(true);
});
});

View file

@ -12,8 +12,6 @@ import { privateHostnameGuard, resolvePrivateHostnameAllowSet } from "./middlewa
import { healthRoutes } from "./routes/health.js";
import { companyRoutes } from "./routes/companies.js";
import { companySkillRoutes } from "./routes/company-skills.js";
import { skillRegistryRoutes } from "./routes/skill-registry.js";
import { skillGroupRoutes } from "./routes/skill-registry-groups.js";
import { agentRoutes } from "./routes/agents.js";
import { projectRoutes } from "./routes/projects.js";
import { issueRoutes } from "./routes/issues.js";
@ -24,15 +22,12 @@ import { approvalRoutes } from "./routes/approvals.js";
import { secretRoutes } from "./routes/secrets.js";
import { costRoutes } from "./routes/costs.js";
import { activityRoutes } from "./routes/activity.js";
import { chatRoutes } from "./routes/chat.js";
import { dashboardRoutes } from "./routes/dashboard.js";
import { sidebarBadgeRoutes } from "./routes/sidebar-badges.js";
import { instanceSettingsRoutes } from "./routes/instance-settings.js";
import { ollamaRoutes } from "./routes/ollama.js";
import { llmRoutes } from "./routes/llms.js";
import { assetRoutes } from "./routes/assets.js";
import { chatFileRoutes } from "./routes/chat-files.js";
import { pushRoutes } from "./routes/push.js";
import { initVapid } from "./services/pushService.js";
import { accessRoutes } from "./routes/access.js";
import { pluginRoutes } from "./routes/plugins.js";
import { pluginUiStaticRoutes } from "./routes/plugin-ui-static.js";
@ -156,12 +151,9 @@ export async function createApp(
);
api.use("/companies", companyRoutes(db, opts.storageService));
api.use(companySkillRoutes(db));
api.use(skillRegistryRoutes(db));
api.use(skillGroupRoutes(db));
api.use(agentRoutes(db));
api.use(ollamaRoutes());
api.use(assetRoutes(db, opts.storageService));
api.use(chatFileRoutes(db, opts.storageService));
api.use("/push", pushRoutes(db));
api.use(projectRoutes(db));
api.use(issueRoutes(db, opts.storageService, {
feedbackExportService: opts.feedbackExportService,
@ -173,7 +165,6 @@ export async function createApp(
api.use(secretRoutes(db));
api.use(costRoutes(db));
api.use(activityRoutes(db));
api.use(chatRoutes(db));
api.use(dashboardRoutes(db));
api.use(sidebarBadgeRoutes(db));
api.use(instanceSettingsRoutes(db));
@ -308,13 +299,6 @@ export async function createApp(
app.use(errorHandler);
// Initialize VAPID for push notifications (graceful skip if keys not set)
try {
initVapid();
} catch (err) {
logger.warn({ err }, "VAPID init skipped — push notifications unavailable");
}
jobCoordinator.start();
scheduler.start();
const feedbackExportTimer = opts.feedbackExportService

View file

@ -0,0 +1,40 @@
{
"models": [
{
"family": "qwen2",
"variants": [
{ "name": "qwen2.5-coder:7b", "ramGb": 5, "vramGb": 5, "quality": "fast" },
{ "name": "qwen2.5-coder:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced" },
{ "name": "qwen2.5-coder:32b", "ramGb": 22, "vramGb": 22, "quality": "best" }
]
},
{
"family": "llama",
"variants": [
{ "name": "llama3.2:3b", "ramGb": 3, "vramGb": 3, "quality": "fast" },
{ "name": "llama3.1:8b", "ramGb": 6, "vramGb": 6, "quality": "balanced" },
{ "name": "llama3.1:70b", "ramGb": 48, "vramGb": 48, "quality": "best" }
]
},
{
"family": "mistral",
"variants": [
{ "name": "mistral:7b", "ramGb": 5, "vramGb": 5, "quality": "balanced" },
{ "name": "mistral:22b", "ramGb": 14, "vramGb": 14, "quality": "best" }
]
},
{
"family": "phi",
"variants": [
{ "name": "phi4:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced" }
]
},
{
"family": "deepseek",
"variants": [
{ "name": "deepseek-r1:7b", "ramGb": 5, "vramGb": 5, "quality": "reasoning" },
{ "name": "deepseek-r1:32b", "ramGb": 22, "vramGb": 22, "quality": "reasoning" }
]
}
]
}

View file

@ -0,0 +1,54 @@
import os from "node:os";
import { Router } from "express";
import { assertCompanyAccess } from "./authz.js";
import { detectOllama, listOllamaModels, getRecommendedModel } from "../services/ollama.js";
export function ollamaRoutes(): Router {
const router = Router();
// GET /companies/:companyId/ollama/status
router.get("/companies/:companyId/ollama/status", async (req, res) => {
const { companyId } = req.params;
try {
assertCompanyAccess(req, companyId);
const status = await detectOllama();
res.json(status);
} catch (err: unknown) {
if (err && typeof err === "object" && "statusCode" in err) {
const e = err as { statusCode: number; message: string };
res.status(e.statusCode).json({ error: e.message });
return;
}
res.status(500).json({ error: "Unexpected error" });
}
});
// GET /companies/:companyId/ollama/models
router.get("/companies/:companyId/ollama/models", async (req, res) => {
const { companyId } = req.params;
try {
assertCompanyAccess(req, companyId);
const status = await detectOllama();
if (!status.installed) {
res.json({ models: [], ramGb: 0 });
return;
}
const models = await listOllamaModels();
const enrichedModels = getRecommendedModel(models, os.totalmem());
const ramGb = Math.round(os.totalmem() / 1073741824);
res.json({ models: enrichedModels, ramGb });
} catch (err: unknown) {
if (err && typeof err === "object" && "statusCode" in err) {
const e = err as { statusCode: number; message: string };
res.status(e.statusCode).json({ error: e.message });
return;
}
res.status(500).json({ error: "Unexpected error" });
}
});
return router;
}

View file

@ -0,0 +1,160 @@
import { createRequire } from "node:module";
import { fileURLToPath } from "node:url";
import path from "node:path";
import fs from "node:fs";
const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://localhost:11434";
const OLLAMA_TIMEOUT_MS = 3000;
const INSTALL_URL = "https://ollama.com/download";
export interface OllamaStatus {
installed: boolean;
version: string | null;
installUrl: string;
}
export interface OllamaModel {
name: string;
parameterSize: string;
quantization: string;
sizeBytes: number;
family: string;
recommended: boolean;
recommendationReason: string | null;
}
interface OllamaTagsResponse {
models: Array<{
name: string;
model: string;
modified_at: string;
size: number;
digest: string;
details: {
parent_model: string;
format: string;
family: string;
families: string[];
parameter_size: string;
quantization_level: string;
};
}>;
}
interface CatalogVariant {
name: string;
ramGb: number;
vramGb: number;
quality: string;
}
interface CatalogFamily {
family: string;
variants: CatalogVariant[];
}
interface ModelCatalog {
models: CatalogFamily[];
}
function loadCatalog(): ModelCatalog {
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const catalogPath = path.resolve(__dirname, "../data/ollama-model-catalog.json");
const raw = fs.readFileSync(catalogPath, "utf-8");
return JSON.parse(raw) as ModelCatalog;
}
export async function detectOllama(): Promise<OllamaStatus> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
try {
const res = await fetch(`${OLLAMA_BASE_URL}/api/version`, {
signal: controller.signal,
});
if (!res.ok) {
return { installed: false, version: null, installUrl: INSTALL_URL };
}
const body = (await res.json()) as { version?: string };
return { installed: true, version: body.version ?? null, installUrl: INSTALL_URL };
} catch {
return { installed: false, version: null, installUrl: INSTALL_URL };
} finally {
clearTimeout(timeout);
}
}
export async function listOllamaModels(): Promise<OllamaModel[]> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
try {
const res = await fetch(`${OLLAMA_BASE_URL}/api/tags`, {
signal: controller.signal,
});
if (!res.ok) {
return [];
}
const body = (await res.json()) as OllamaTagsResponse;
return (body.models ?? []).map((m) => ({
name: m.name,
parameterSize: m.details?.parameter_size ?? "",
quantization: m.details?.quantization_level ?? "",
sizeBytes: m.size,
family: m.details?.family ?? "",
recommended: false,
recommendationReason: null,
}));
} catch {
return [];
} finally {
clearTimeout(timeout);
}
}
const QUALITY_RANK: Record<string, number> = {
best: 4,
reasoning: 3,
balanced: 2,
fast: 1,
};
export function getRecommendedModel(models: OllamaModel[], systemRamBytes: number): OllamaModel[] {
if (models.length === 0) return [];
const catalog = loadCatalog();
const usableRamGb = (systemRamBytes / (1024 * 1024 * 1024)) * 0.75;
// Build a flat map from model name to catalog entry
const catalogMap = new Map<string, CatalogVariant & { family: string }>();
for (const family of catalog.models) {
for (const variant of family.variants) {
catalogMap.set(variant.name, { ...variant, family: family.family });
}
}
// Find the best catalog entry within RAM budget that matches one of our models
let bestEntry: (CatalogVariant & { family: string }) | null = null;
let bestQualityRank = -1;
for (const model of models) {
const entry = catalogMap.get(model.name);
if (!entry) continue;
if (entry.ramGb > usableRamGb) continue;
const rank = QUALITY_RANK[entry.quality] ?? 0;
if (rank > bestQualityRank) {
bestQualityRank = rank;
bestEntry = entry;
}
}
return models.map((model) => {
if (bestEntry && model.name === bestEntry.name) {
return {
...model,
recommended: true,
recommendationReason: `Best fit for your system: ${bestEntry.quality} quality, requires ${bestEntry.ramGb}GB RAM (you have ${Math.round(systemRamBytes / (1024 * 1024 * 1024))}GB)`,
};
}
return { ...model, recommended: false, recommendationReason: null };
});
}