nexus/server/src/services/ollama.ts
Nexus Dev a9817a9659 feat(30-01): hardware detection, nexus-settings, extended model catalog
- Add hardwareService with Apple Silicon / GPU / cpu_only tier detection
- Add 3s Promise.race timeout for si.graphics() with cpu_only fallback
- Add nexusSettingsService with Zod validation and file-backed persistence
- Extend ollama-model-catalog.json with tier arrays on every variant
- Add qwen3:8b family to catalog
- Update getRecommendedModel to accept optional hardwareTier parameter
- All 13 unit tests pass (TDD green)
2026-04-04 03:55:49 +00:00

204 lines
5.6 KiB
TypeScript

import { fileURLToPath } from "node:url";
import path from "node:path";
import fs from "node:fs";
const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://localhost:11434";
const OLLAMA_TIMEOUT_MS = 3000;
const INSTALL_URL = "https://ollama.com/download";
export interface OllamaStatus {
installed: boolean;
version: string | null;
installUrl: string;
}
export interface OllamaModel {
name: string;
parameterSize: string;
quantization: string;
sizeBytes: number;
family: string;
recommended: boolean;
recommendationReason: string | null;
}
interface OllamaTagsResponse {
models: Array<{
name: string;
model: string;
modified_at: string;
size: number;
digest: string;
details: {
parent_model: string;
format: string;
family: string;
families: string[];
parameter_size: string;
quantization_level: string;
};
}>;
}
interface OllamaPsResponse {
models: Array<{
name: string;
model: string;
size: number;
digest: string;
details: {
parent_model: string;
format: string;
family: string;
families: string[];
parameter_size: string;
quantization_level: string;
};
expires_at: string;
size_vram: number;
}>;
}
interface CatalogVariant {
name: string;
ramGb: number;
vramGb: number;
quality: string;
tier?: string[];
}
interface CatalogFamily {
family: string;
variants: CatalogVariant[];
}
interface ModelCatalog {
models: CatalogFamily[];
}
function loadCatalog(): ModelCatalog {
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const catalogPath = path.resolve(__dirname, "../data/ollama-model-catalog.json");
const raw = fs.readFileSync(catalogPath, "utf-8");
return JSON.parse(raw) as ModelCatalog;
}
export async function detectOllama(): Promise<OllamaStatus> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
try {
const res = await fetch(`${OLLAMA_BASE_URL}/api/version`, {
signal: controller.signal,
});
if (!res.ok) {
return { installed: false, version: null, installUrl: INSTALL_URL };
}
const body = (await res.json()) as { version?: string };
return { installed: true, version: body.version ?? null, installUrl: INSTALL_URL };
} catch {
return { installed: false, version: null, installUrl: INSTALL_URL };
} finally {
clearTimeout(timeout);
}
}
export async function listOllamaModels(): Promise<OllamaModel[]> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
try {
const res = await fetch(`${OLLAMA_BASE_URL}/api/tags`, {
signal: controller.signal,
});
if (!res.ok) {
return [];
}
const body = (await res.json()) as OllamaTagsResponse;
return (body.models ?? []).map((m) => ({
name: m.name,
parameterSize: m.details?.parameter_size ?? "",
quantization: m.details?.quantization_level ?? "",
sizeBytes: m.size,
family: m.details?.family ?? "",
recommended: false,
recommendationReason: null,
}));
} catch {
return [];
} finally {
clearTimeout(timeout);
}
}
export async function getOllamaMemoryUsage(modelName: string | null): Promise<number | null> {
if (!modelName) return null;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
try {
const res = await fetch(`${OLLAMA_BASE_URL}/api/ps`, {
signal: controller.signal,
});
if (!res.ok) return null;
const body = (await res.json()) as OllamaPsResponse;
const match = (body.models ?? []).find((m) => m.name === modelName || m.model === modelName);
return match?.size_vram ?? null;
} catch {
return null;
} finally {
clearTimeout(timeout);
}
}
const QUALITY_RANK: Record<string, number> = {
best: 4,
reasoning: 3,
balanced: 2,
fast: 1,
};
export function getRecommendedModel(
models: OllamaModel[],
systemRamBytes: number,
hardwareTier?: "gpu" | "apple_silicon" | "cpu_only",
): OllamaModel[] {
if (models.length === 0) return [];
const catalog = loadCatalog();
const usableRamGb = (systemRamBytes / (1024 * 1024 * 1024)) * 0.75;
// Build a flat map from model name to catalog entry
const catalogMap = new Map<string, CatalogVariant & { family: string }>();
for (const family of catalog.models) {
for (const variant of family.variants) {
catalogMap.set(variant.name, { ...variant, family: family.family });
}
}
// Find the best catalog entry within RAM budget that matches one of our models
let bestEntry: (CatalogVariant & { family: string }) | null = null;
let bestQualityRank = -1;
for (const model of models) {
const entry = catalogMap.get(model.name);
if (!entry) continue;
if (entry.ramGb > usableRamGb) continue;
// Filter by hardware tier if provided
if (hardwareTier && entry.tier && !entry.tier.includes(hardwareTier)) continue;
const rank = QUALITY_RANK[entry.quality] ?? 0;
if (rank > bestQualityRank) {
bestQualityRank = rank;
bestEntry = entry;
}
}
return models.map((model) => {
if (bestEntry && model.name === bestEntry.name) {
return {
...model,
recommended: true,
recommendationReason: `Best fit for your system: ${bestEntry.quality} quality, requires ${bestEntry.ramGb}GB RAM (you have ${Math.round(systemRamBytes / (1024 * 1024 * 1024))}GB)`,
};
}
return { ...model, recommended: false, recommendationReason: null };
});
}