- Add hardwareService with Apple Silicon / GPU / cpu_only tier detection - Add 3s Promise.race timeout for si.graphics() with cpu_only fallback - Add nexusSettingsService with Zod validation and file-backed persistence - Extend ollama-model-catalog.json with tier arrays on every variant - Add qwen3:8b family to catalog - Update getRecommendedModel to accept optional hardwareTier parameter - All 13 unit tests pass (TDD green)
204 lines
5.6 KiB
TypeScript
204 lines
5.6 KiB
TypeScript
import { fileURLToPath } from "node:url";
|
|
import path from "node:path";
|
|
import fs from "node:fs";
|
|
|
|
const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://localhost:11434";
|
|
const OLLAMA_TIMEOUT_MS = 3000;
|
|
const INSTALL_URL = "https://ollama.com/download";
|
|
|
|
export interface OllamaStatus {
|
|
installed: boolean;
|
|
version: string | null;
|
|
installUrl: string;
|
|
}
|
|
|
|
export interface OllamaModel {
|
|
name: string;
|
|
parameterSize: string;
|
|
quantization: string;
|
|
sizeBytes: number;
|
|
family: string;
|
|
recommended: boolean;
|
|
recommendationReason: string | null;
|
|
}
|
|
|
|
interface OllamaTagsResponse {
|
|
models: Array<{
|
|
name: string;
|
|
model: string;
|
|
modified_at: string;
|
|
size: number;
|
|
digest: string;
|
|
details: {
|
|
parent_model: string;
|
|
format: string;
|
|
family: string;
|
|
families: string[];
|
|
parameter_size: string;
|
|
quantization_level: string;
|
|
};
|
|
}>;
|
|
}
|
|
|
|
interface OllamaPsResponse {
|
|
models: Array<{
|
|
name: string;
|
|
model: string;
|
|
size: number;
|
|
digest: string;
|
|
details: {
|
|
parent_model: string;
|
|
format: string;
|
|
family: string;
|
|
families: string[];
|
|
parameter_size: string;
|
|
quantization_level: string;
|
|
};
|
|
expires_at: string;
|
|
size_vram: number;
|
|
}>;
|
|
}
|
|
|
|
interface CatalogVariant {
|
|
name: string;
|
|
ramGb: number;
|
|
vramGb: number;
|
|
quality: string;
|
|
tier?: string[];
|
|
}
|
|
|
|
interface CatalogFamily {
|
|
family: string;
|
|
variants: CatalogVariant[];
|
|
}
|
|
|
|
interface ModelCatalog {
|
|
models: CatalogFamily[];
|
|
}
|
|
|
|
function loadCatalog(): ModelCatalog {
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
const catalogPath = path.resolve(__dirname, "../data/ollama-model-catalog.json");
|
|
const raw = fs.readFileSync(catalogPath, "utf-8");
|
|
return JSON.parse(raw) as ModelCatalog;
|
|
}
|
|
|
|
export async function detectOllama(): Promise<OllamaStatus> {
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
|
|
try {
|
|
const res = await fetch(`${OLLAMA_BASE_URL}/api/version`, {
|
|
signal: controller.signal,
|
|
});
|
|
if (!res.ok) {
|
|
return { installed: false, version: null, installUrl: INSTALL_URL };
|
|
}
|
|
const body = (await res.json()) as { version?: string };
|
|
return { installed: true, version: body.version ?? null, installUrl: INSTALL_URL };
|
|
} catch {
|
|
return { installed: false, version: null, installUrl: INSTALL_URL };
|
|
} finally {
|
|
clearTimeout(timeout);
|
|
}
|
|
}
|
|
|
|
export async function listOllamaModels(): Promise<OllamaModel[]> {
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
|
|
try {
|
|
const res = await fetch(`${OLLAMA_BASE_URL}/api/tags`, {
|
|
signal: controller.signal,
|
|
});
|
|
if (!res.ok) {
|
|
return [];
|
|
}
|
|
const body = (await res.json()) as OllamaTagsResponse;
|
|
return (body.models ?? []).map((m) => ({
|
|
name: m.name,
|
|
parameterSize: m.details?.parameter_size ?? "",
|
|
quantization: m.details?.quantization_level ?? "",
|
|
sizeBytes: m.size,
|
|
family: m.details?.family ?? "",
|
|
recommended: false,
|
|
recommendationReason: null,
|
|
}));
|
|
} catch {
|
|
return [];
|
|
} finally {
|
|
clearTimeout(timeout);
|
|
}
|
|
}
|
|
|
|
export async function getOllamaMemoryUsage(modelName: string | null): Promise<number | null> {
|
|
if (!modelName) return null;
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), OLLAMA_TIMEOUT_MS);
|
|
try {
|
|
const res = await fetch(`${OLLAMA_BASE_URL}/api/ps`, {
|
|
signal: controller.signal,
|
|
});
|
|
if (!res.ok) return null;
|
|
const body = (await res.json()) as OllamaPsResponse;
|
|
const match = (body.models ?? []).find((m) => m.name === modelName || m.model === modelName);
|
|
return match?.size_vram ?? null;
|
|
} catch {
|
|
return null;
|
|
} finally {
|
|
clearTimeout(timeout);
|
|
}
|
|
}
|
|
|
|
const QUALITY_RANK: Record<string, number> = {
|
|
best: 4,
|
|
reasoning: 3,
|
|
balanced: 2,
|
|
fast: 1,
|
|
};
|
|
|
|
export function getRecommendedModel(
|
|
models: OllamaModel[],
|
|
systemRamBytes: number,
|
|
hardwareTier?: "gpu" | "apple_silicon" | "cpu_only",
|
|
): OllamaModel[] {
|
|
if (models.length === 0) return [];
|
|
|
|
const catalog = loadCatalog();
|
|
const usableRamGb = (systemRamBytes / (1024 * 1024 * 1024)) * 0.75;
|
|
|
|
// Build a flat map from model name to catalog entry
|
|
const catalogMap = new Map<string, CatalogVariant & { family: string }>();
|
|
for (const family of catalog.models) {
|
|
for (const variant of family.variants) {
|
|
catalogMap.set(variant.name, { ...variant, family: family.family });
|
|
}
|
|
}
|
|
|
|
// Find the best catalog entry within RAM budget that matches one of our models
|
|
let bestEntry: (CatalogVariant & { family: string }) | null = null;
|
|
let bestQualityRank = -1;
|
|
|
|
for (const model of models) {
|
|
const entry = catalogMap.get(model.name);
|
|
if (!entry) continue;
|
|
if (entry.ramGb > usableRamGb) continue;
|
|
// Filter by hardware tier if provided
|
|
if (hardwareTier && entry.tier && !entry.tier.includes(hardwareTier)) continue;
|
|
const rank = QUALITY_RANK[entry.quality] ?? 0;
|
|
if (rank > bestQualityRank) {
|
|
bestQualityRank = rank;
|
|
bestEntry = entry;
|
|
}
|
|
}
|
|
|
|
return models.map((model) => {
|
|
if (bestEntry && model.name === bestEntry.name) {
|
|
return {
|
|
...model,
|
|
recommended: true,
|
|
recommendationReason: `Best fit for your system: ${bestEntry.quality} quality, requires ${bestEntry.ramGb}GB RAM (you have ${Math.round(systemRamBytes / (1024 * 1024 * 1024))}GB)`,
|
|
};
|
|
}
|
|
return { ...model, recommended: false, recommendationReason: null };
|
|
});
|
|
}
|