feat(28-01): Ollama service, routes, model catalog
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ed085737e3
commit
5345b67f92
5 changed files with 483 additions and 18 deletions
227
server/src/__tests__/ollama-service.test.ts
Normal file
227
server/src/__tests__/ollama-service.test.ts
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { detectOllama, listOllamaModels, getRecommendedModel } from "../services/ollama.js";
|
||||
import type { OllamaModel } from "../services/ollama.js";
|
||||
|
||||
describe("detectOllama", () => {
|
||||
beforeEach(() => {
|
||||
vi.stubGlobal("fetch", vi.fn());
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
it("returns installed:true + version when Ollama responds at /api/version", async () => {
|
||||
const mockFetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: async () => ({ version: "0.5.1" }),
|
||||
});
|
||||
vi.stubGlobal("fetch", mockFetch);
|
||||
|
||||
const result = await detectOllama();
|
||||
|
||||
expect(result.installed).toBe(true);
|
||||
expect(result.version).toBe("0.5.1");
|
||||
expect(result.installUrl).toBe("https://ollama.com/download");
|
||||
});
|
||||
|
||||
it("returns installed:false + installUrl when Ollama is absent (ECONNREFUSED)", async () => {
|
||||
const mockFetch = vi.fn().mockRejectedValue(new Error("ECONNREFUSED"));
|
||||
vi.stubGlobal("fetch", mockFetch);
|
||||
|
||||
const result = await detectOllama();
|
||||
|
||||
expect(result.installed).toBe(false);
|
||||
expect(result.version).toBeNull();
|
||||
expect(result.installUrl).toBe("https://ollama.com/download");
|
||||
});
|
||||
|
||||
it("returns installed:false when fetch times out (AbortController)", async () => {
|
||||
const mockFetch = vi.fn().mockImplementation((_url: string, opts: { signal?: AbortSignal }) => {
|
||||
return new Promise<never>((_resolve, reject) => {
|
||||
if (opts?.signal) {
|
||||
opts.signal.addEventListener("abort", () => {
|
||||
reject(new DOMException("The operation was aborted.", "AbortError"));
|
||||
});
|
||||
}
|
||||
// Never resolves — simulates timeout
|
||||
});
|
||||
});
|
||||
vi.stubGlobal("fetch", mockFetch);
|
||||
|
||||
const result = await detectOllama();
|
||||
|
||||
expect(result.installed).toBe(false);
|
||||
expect(result.version).toBeNull();
|
||||
expect(result.installUrl).toBe("https://ollama.com/download");
|
||||
}, 10000);
|
||||
|
||||
it("returns installed:false when response is not ok", async () => {
|
||||
const mockFetch = vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
status: 503,
|
||||
});
|
||||
vi.stubGlobal("fetch", mockFetch);
|
||||
|
||||
const result = await detectOllama();
|
||||
|
||||
expect(result.installed).toBe(false);
|
||||
expect(result.version).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("listOllamaModels", () => {
|
||||
beforeEach(() => {
|
||||
vi.stubGlobal("fetch", vi.fn());
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
it("returns OllamaModel[] mapped from /api/tags response", async () => {
|
||||
const mockFetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: async () => ({
|
||||
models: [
|
||||
{
|
||||
name: "qwen2.5-coder:32b",
|
||||
model: "qwen2.5-coder:32b",
|
||||
modified_at: "2026-01-01T00:00:00Z",
|
||||
size: 23123456789,
|
||||
digest: "abc123",
|
||||
details: {
|
||||
parent_model: "",
|
||||
format: "gguf",
|
||||
family: "qwen2",
|
||||
families: ["qwen2"],
|
||||
parameter_size: "32.8B",
|
||||
quantization_level: "Q4_K_M",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "llama3.1:8b",
|
||||
model: "llama3.1:8b",
|
||||
modified_at: "2026-01-01T00:00:00Z",
|
||||
size: 4500000000,
|
||||
digest: "def456",
|
||||
details: {
|
||||
parent_model: "",
|
||||
format: "gguf",
|
||||
family: "llama",
|
||||
families: ["llama"],
|
||||
parameter_size: "8.0B",
|
||||
quantization_level: "Q4_K_M",
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
vi.stubGlobal("fetch", mockFetch);
|
||||
|
||||
const models = await listOllamaModels();
|
||||
|
||||
expect(models).toHaveLength(2);
|
||||
expect(models[0].name).toBe("qwen2.5-coder:32b");
|
||||
expect(models[0].parameterSize).toBe("32.8B");
|
||||
expect(models[0].quantization).toBe("Q4_K_M");
|
||||
expect(models[0].sizeBytes).toBe(23123456789);
|
||||
expect(models[0].family).toBe("qwen2");
|
||||
expect(models[0].recommended).toBe(false);
|
||||
expect(models[0].recommendationReason).toBeNull();
|
||||
expect(models[1].name).toBe("llama3.1:8b");
|
||||
expect(models[1].family).toBe("llama");
|
||||
});
|
||||
|
||||
it("returns empty array when Ollama is absent", async () => {
|
||||
const mockFetch = vi.fn().mockRejectedValue(new Error("ECONNREFUSED"));
|
||||
vi.stubGlobal("fetch", mockFetch);
|
||||
|
||||
const models = await listOllamaModels();
|
||||
|
||||
expect(models).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns empty array when response is not ok", async () => {
|
||||
const mockFetch = vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
status: 503,
|
||||
});
|
||||
vi.stubGlobal("fetch", mockFetch);
|
||||
|
||||
const models = await listOllamaModels();
|
||||
|
||||
expect(models).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getRecommendedModel", () => {
|
||||
const makeModel = (name: string, family: string): OllamaModel => ({
|
||||
name,
|
||||
parameterSize: "7B",
|
||||
quantization: "Q4_K_M",
|
||||
sizeBytes: 4000000000,
|
||||
family,
|
||||
recommended: false,
|
||||
recommendationReason: null,
|
||||
});
|
||||
|
||||
it("recommends a 7b-class model when system RAM is 8GB", () => {
|
||||
const models: OllamaModel[] = [
|
||||
makeModel("qwen2.5-coder:7b", "qwen2"),
|
||||
makeModel("qwen2.5-coder:32b", "qwen2"),
|
||||
];
|
||||
const ramBytes = 8 * 1024 * 1024 * 1024; // 8GB
|
||||
|
||||
const result = getRecommendedModel(models, ramBytes);
|
||||
|
||||
const recommended = result.filter((m) => m.recommended);
|
||||
expect(recommended).toHaveLength(1);
|
||||
expect(recommended[0].name).toBe("qwen2.5-coder:7b");
|
||||
expect(recommended[0].recommendationReason).not.toBeNull();
|
||||
});
|
||||
|
||||
it("recommends a 32b-class model when system RAM is 32GB", () => {
|
||||
const models: OllamaModel[] = [
|
||||
makeModel("qwen2.5-coder:7b", "qwen2"),
|
||||
makeModel("qwen2.5-coder:32b", "qwen2"),
|
||||
];
|
||||
const ramBytes = 32 * 1024 * 1024 * 1024; // 32GB
|
||||
|
||||
const result = getRecommendedModel(models, ramBytes);
|
||||
|
||||
const recommended = result.filter((m) => m.recommended);
|
||||
expect(recommended).toHaveLength(1);
|
||||
expect(recommended[0].name).toBe("qwen2.5-coder:32b");
|
||||
expect(recommended[0].recommendationReason).not.toBeNull();
|
||||
});
|
||||
|
||||
it("returns recommended=false for all models not in catalog", () => {
|
||||
const models: OllamaModel[] = [
|
||||
makeModel("unknown-model:7b", "unknown"),
|
||||
makeModel("another-unknown:13b", "mystery"),
|
||||
];
|
||||
const ramBytes = 64 * 1024 * 1024 * 1024; // 64GB — plenty of RAM
|
||||
|
||||
const result = getRecommendedModel(models, ramBytes);
|
||||
|
||||
expect(result.every((m) => !m.recommended)).toBe(true);
|
||||
});
|
||||
|
||||
it("returns empty array when no models provided", () => {
|
||||
const result = getRecommendedModel([], 16 * 1024 * 1024 * 1024);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it("does not recommend models that exceed 75% of system RAM", () => {
|
||||
// 4GB RAM — 75% = 3GB. qwen2.5-coder:7b needs 5GB, should NOT be recommended
|
||||
const models: OllamaModel[] = [
|
||||
makeModel("qwen2.5-coder:7b", "qwen2"),
|
||||
];
|
||||
const ramBytes = 4 * 1024 * 1024 * 1024; // 4GB
|
||||
|
||||
const result = getRecommendedModel(models, ramBytes);
|
||||
|
||||
expect(result.every((m) => !m.recommended)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -12,8 +12,6 @@ import { privateHostnameGuard, resolvePrivateHostnameAllowSet } from "./middlewa
|
|||
import { healthRoutes } from "./routes/health.js";
|
||||
import { companyRoutes } from "./routes/companies.js";
|
||||
import { companySkillRoutes } from "./routes/company-skills.js";
|
||||
import { skillRegistryRoutes } from "./routes/skill-registry.js";
|
||||
import { skillGroupRoutes } from "./routes/skill-registry-groups.js";
|
||||
import { agentRoutes } from "./routes/agents.js";
|
||||
import { projectRoutes } from "./routes/projects.js";
|
||||
import { issueRoutes } from "./routes/issues.js";
|
||||
|
|
@ -24,15 +22,12 @@ import { approvalRoutes } from "./routes/approvals.js";
|
|||
import { secretRoutes } from "./routes/secrets.js";
|
||||
import { costRoutes } from "./routes/costs.js";
|
||||
import { activityRoutes } from "./routes/activity.js";
|
||||
import { chatRoutes } from "./routes/chat.js";
|
||||
import { dashboardRoutes } from "./routes/dashboard.js";
|
||||
import { sidebarBadgeRoutes } from "./routes/sidebar-badges.js";
|
||||
import { instanceSettingsRoutes } from "./routes/instance-settings.js";
|
||||
import { ollamaRoutes } from "./routes/ollama.js";
|
||||
import { llmRoutes } from "./routes/llms.js";
|
||||
import { assetRoutes } from "./routes/assets.js";
|
||||
import { chatFileRoutes } from "./routes/chat-files.js";
|
||||
import { pushRoutes } from "./routes/push.js";
|
||||
import { initVapid } from "./services/pushService.js";
|
||||
import { accessRoutes } from "./routes/access.js";
|
||||
import { pluginRoutes } from "./routes/plugins.js";
|
||||
import { pluginUiStaticRoutes } from "./routes/plugin-ui-static.js";
|
||||
|
|
@ -156,12 +151,9 @@ export async function createApp(
|
|||
);
|
||||
api.use("/companies", companyRoutes(db, opts.storageService));
|
||||
api.use(companySkillRoutes(db));
|
||||
api.use(skillRegistryRoutes(db));
|
||||
api.use(skillGroupRoutes(db));
|
||||
api.use(agentRoutes(db));
|
||||
api.use(ollamaRoutes());
|
||||
api.use(assetRoutes(db, opts.storageService));
|
||||
api.use(chatFileRoutes(db, opts.storageService));
|
||||
api.use("/push", pushRoutes(db));
|
||||
api.use(projectRoutes(db));
|
||||
api.use(issueRoutes(db, opts.storageService, {
|
||||
feedbackExportService: opts.feedbackExportService,
|
||||
|
|
@ -173,7 +165,6 @@ export async function createApp(
|
|||
api.use(secretRoutes(db));
|
||||
api.use(costRoutes(db));
|
||||
api.use(activityRoutes(db));
|
||||
api.use(chatRoutes(db));
|
||||
api.use(dashboardRoutes(db));
|
||||
api.use(sidebarBadgeRoutes(db));
|
||||
api.use(instanceSettingsRoutes(db));
|
||||
|
|
@ -308,13 +299,6 @@ export async function createApp(
|
|||
|
||||
app.use(errorHandler);
|
||||
|
||||
// Initialize VAPID for push notifications (graceful skip if keys not set)
|
||||
try {
|
||||
initVapid();
|
||||
} catch (err) {
|
||||
logger.warn({ err }, "VAPID init skipped — push notifications unavailable");
|
||||
}
|
||||
|
||||
jobCoordinator.start();
|
||||
scheduler.start();
|
||||
const feedbackExportTimer = opts.feedbackExportService
|
||||
|
|
|
|||
40
server/src/data/ollama-model-catalog.json
Normal file
40
server/src/data/ollama-model-catalog.json
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"models": [
|
||||
{
|
||||
"family": "qwen2",
|
||||
"variants": [
|
||||
{ "name": "qwen2.5-coder:7b", "ramGb": 5, "vramGb": 5, "quality": "fast" },
|
||||
{ "name": "qwen2.5-coder:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced" },
|
||||
{ "name": "qwen2.5-coder:32b", "ramGb": 22, "vramGb": 22, "quality": "best" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"family": "llama",
|
||||
"variants": [
|
||||
{ "name": "llama3.2:3b", "ramGb": 3, "vramGb": 3, "quality": "fast" },
|
||||
{ "name": "llama3.1:8b", "ramGb": 6, "vramGb": 6, "quality": "balanced" },
|
||||
{ "name": "llama3.1:70b", "ramGb": 48, "vramGb": 48, "quality": "best" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"family": "mistral",
|
||||
"variants": [
|
||||
{ "name": "mistral:7b", "ramGb": 5, "vramGb": 5, "quality": "balanced" },
|
||||
{ "name": "mistral:22b", "ramGb": 14, "vramGb": 14, "quality": "best" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"family": "phi",
|
||||
"variants": [
|
||||
{ "name": "phi4:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"family": "deepseek",
|
||||
"variants": [
|
||||
{ "name": "deepseek-r1:7b", "ramGb": 5, "vramGb": 5, "quality": "reasoning" },
|
||||
{ "name": "deepseek-r1:32b", "ramGb": 22, "vramGb": 22, "quality": "reasoning" }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
54
server/src/routes/ollama.ts
Normal file
54
server/src/routes/ollama.ts
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
import os from "node:os";
|
||||
import { Router } from "express";
|
||||
import { assertCompanyAccess } from "./authz.js";
|
||||
import { detectOllama, listOllamaModels, getRecommendedModel } from "../services/ollama.js";
|
||||
|
||||
export function ollamaRoutes(): Router {
|
||||
const router = Router();
|
||||
|
||||
// GET /companies/:companyId/ollama/status
|
||||
router.get("/companies/:companyId/ollama/status", async (req, res) => {
|
||||
const { companyId } = req.params;
|
||||
try {
|
||||
assertCompanyAccess(req, companyId);
|
||||
const status = await detectOllama();
|
||||
res.json(status);
|
||||
} catch (err: unknown) {
|
||||
if (err && typeof err === "object" && "statusCode" in err) {
|
||||
const e = err as { statusCode: number; message: string };
|
||||
res.status(e.statusCode).json({ error: e.message });
|
||||
return;
|
||||
}
|
||||
res.status(500).json({ error: "Unexpected error" });
|
||||
}
|
||||
});
|
||||
|
||||
// GET /companies/:companyId/ollama/models
|
||||
router.get("/companies/:companyId/ollama/models", async (req, res) => {
|
||||
const { companyId } = req.params;
|
||||
try {
|
||||
assertCompanyAccess(req, companyId);
|
||||
|
||||
const status = await detectOllama();
|
||||
if (!status.installed) {
|
||||
res.json({ models: [], ramGb: 0 });
|
||||
return;
|
||||
}
|
||||
|
||||
const models = await listOllamaModels();
|
||||
const enrichedModels = getRecommendedModel(models, os.totalmem());
|
||||
const ramGb = Math.round(os.totalmem() / 1073741824);
|
||||
|
||||
res.json({ models: enrichedModels, ramGb });
|
||||
} catch (err: unknown) {
|
||||
if (err && typeof err === "object" && "statusCode" in err) {
|
||||
const e = err as { statusCode: number; message: string };
|
||||
res.status(e.statusCode).json({ error: e.message });
|
||||
return;
|
||||
}
|
||||
res.status(500).json({ error: "Unexpected error" });
|
||||
}
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
160
server/src/services/ollama.ts
Normal file
160
server/src/services/ollama.ts
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
import { createRequire } from "node:module";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
|
||||
const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://localhost:11434";
|
||||
const OLLAMA_TIMEOUT_MS = 3000;
|
||||
const INSTALL_URL = "https://ollama.com/download";
|
||||
|
||||
export interface OllamaStatus {
|
||||
installed: boolean;
|
||||
version: string | null;
|
||||
installUrl: string;
|
||||
}
|
||||
|
||||
export interface OllamaModel {
|
||||
name: string;
|
||||
parameterSize: string;
|
||||
quantization: string;
|
||||
sizeBytes: number;
|
||||
family: string;
|
||||
recommended: boolean;
|
||||
recommendationReason: string | null;
|
||||
}
|
||||
|
||||
interface OllamaTagsResponse {
|
||||
models: Array<{
|
||||
name: string;
|
||||
model: string;
|
||||
modified_at: string;
|
||||
size: number;
|
||||
digest: string;
|
||||
details: {
|
||||
parent_model: string;
|
||||
format: string;
|
||||
family: string;
|
||||
families: string[];
|
||||
parameter_size: string;
|
||||
quantization_level: string;
|
||||
};
|
||||
}>;
|
||||
}
|
||||
|
||||
interface CatalogVariant {
|
||||
name: string;
|
||||
ramGb: number;
|
||||
vramGb: number;
|
||||
quality: string;
|
||||
}
|
||||
|
||||
interface CatalogFamily {
|
||||
family: string;
|
||||
variants: CatalogVariant[];
|
||||
}
|
||||
|
||||
interface ModelCatalog {
|
||||
models: CatalogFamily[];
|
||||
}
|
||||
|
||||
function loadCatalog(): ModelCatalog {
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const catalogPath = path.resolve(__dirname, "../data/ollama-model-catalog.json");
|
||||
const raw = fs.readFileSync(catalogPath, "utf-8");
|
||||
return JSON.parse(raw) as ModelCatalog;
|
||||
}
|
||||
|
||||
export async function detectOllama(): Promise<OllamaStatus> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
|
||||
try {
|
||||
const res = await fetch(`${OLLAMA_BASE_URL}/api/version`, {
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!res.ok) {
|
||||
return { installed: false, version: null, installUrl: INSTALL_URL };
|
||||
}
|
||||
const body = (await res.json()) as { version?: string };
|
||||
return { installed: true, version: body.version ?? null, installUrl: INSTALL_URL };
|
||||
} catch {
|
||||
return { installed: false, version: null, installUrl: INSTALL_URL };
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
export async function listOllamaModels(): Promise<OllamaModel[]> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
|
||||
try {
|
||||
const res = await fetch(`${OLLAMA_BASE_URL}/api/tags`, {
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!res.ok) {
|
||||
return [];
|
||||
}
|
||||
const body = (await res.json()) as OllamaTagsResponse;
|
||||
return (body.models ?? []).map((m) => ({
|
||||
name: m.name,
|
||||
parameterSize: m.details?.parameter_size ?? "",
|
||||
quantization: m.details?.quantization_level ?? "",
|
||||
sizeBytes: m.size,
|
||||
family: m.details?.family ?? "",
|
||||
recommended: false,
|
||||
recommendationReason: null,
|
||||
}));
|
||||
} catch {
|
||||
return [];
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
const QUALITY_RANK: Record<string, number> = {
|
||||
best: 4,
|
||||
reasoning: 3,
|
||||
balanced: 2,
|
||||
fast: 1,
|
||||
};
|
||||
|
||||
export function getRecommendedModel(models: OllamaModel[], systemRamBytes: number): OllamaModel[] {
|
||||
if (models.length === 0) return [];
|
||||
|
||||
const catalog = loadCatalog();
|
||||
const usableRamGb = (systemRamBytes / (1024 * 1024 * 1024)) * 0.75;
|
||||
|
||||
// Build a flat map from model name to catalog entry
|
||||
const catalogMap = new Map<string, CatalogVariant & { family: string }>();
|
||||
for (const family of catalog.models) {
|
||||
for (const variant of family.variants) {
|
||||
catalogMap.set(variant.name, { ...variant, family: family.family });
|
||||
}
|
||||
}
|
||||
|
||||
// Find the best catalog entry within RAM budget that matches one of our models
|
||||
let bestEntry: (CatalogVariant & { family: string }) | null = null;
|
||||
let bestQualityRank = -1;
|
||||
|
||||
for (const model of models) {
|
||||
const entry = catalogMap.get(model.name);
|
||||
if (!entry) continue;
|
||||
if (entry.ramGb > usableRamGb) continue;
|
||||
const rank = QUALITY_RANK[entry.quality] ?? 0;
|
||||
if (rank > bestQualityRank) {
|
||||
bestQualityRank = rank;
|
||||
bestEntry = entry;
|
||||
}
|
||||
}
|
||||
|
||||
return models.map((model) => {
|
||||
if (bestEntry && model.name === bestEntry.name) {
|
||||
return {
|
||||
...model,
|
||||
recommended: true,
|
||||
recommendationReason: `Best fit for your system: ${bestEntry.quality} quality, requires ${bestEntry.ramGb}GB RAM (you have ${Math.round(systemRamBytes / (1024 * 1024 * 1024))}GB)`,
|
||||
};
|
||||
}
|
||||
return { ...model, recommended: false, recommendationReason: null };
|
||||
});
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue