From a9817a965984f1f4f2d174d9d1a850f91b0da8d1 Mon Sep 17 00:00:00 2001 From: Nexus Dev Date: Thu, 2 Apr 2026 23:19:09 +0000 Subject: [PATCH] feat(30-01): hardware detection, nexus-settings, extended model catalog - Add hardwareService with Apple Silicon / GPU / cpu_only tier detection - Add 3s Promise.race timeout for si.graphics() with cpu_only fallback - Add nexusSettingsService with Zod validation and file-backed persistence - Extend ollama-model-catalog.json with tier arrays on every variant - Add qwen3:8b family to catalog - Update getRecommendedModel to accept optional hardwareTier parameter - All 13 unit tests pass (TDD green) --- pnpm-lock.yaml | 11 + server/package.json | 1 + .../__tests__/30-hardware-detection.test.ts | 235 ++++++++++++++++++ server/src/data/ollama-model-catalog.json | 28 ++- server/src/services/hardware.ts | 105 ++++++++ server/src/services/nexus-settings.ts | 47 ++++ server/src/services/ollama.ts | 9 +- 7 files changed, 424 insertions(+), 12 deletions(-) create mode 100644 server/src/__tests__/30-hardware-detection.test.ts create mode 100644 server/src/services/hardware.ts create mode 100644 server/src/services/nexus-settings.ts diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e5953936..1b8d6246 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -542,6 +542,9 @@ importers: sharp: specifier: ^0.34.5 version: 0.34.5 + systeminformation: + specifier: '5' + version: 5.31.5 web-push: specifier: ^3.6.7 version: 3.6.7 @@ -5816,6 +5819,12 @@ packages: symbol-tree@3.2.4: resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==} + systeminformation@5.31.5: + resolution: {integrity: sha512-5SyLdip4/3alxD4Kh+63bUQTJmu7YMfYQTC+koZy7X73HgNqZSD2P4wOZQWtUncvPvcEmnfIjCoygN4MRoEejQ==} + engines: {node: '>=8.0.0'} + os: [darwin, linux, win32, freebsd, openbsd, netbsd, sunos, android] + hasBin: true + tabbable@6.4.0: resolution: {integrity: sha512-05PUHKSNE8ou2dwIxTngl4EzcnsCDZGJ/iCLtDflR/SHB/ny14rXc+qU5P4mG9JkusiV7EivzY9Mhm55AzAvCg==} @@ -12361,6 +12370,8 @@ snapshots: symbol-tree@3.2.4: {} + systeminformation@5.31.5: {} + tabbable@6.4.0: {} tailwind-merge@3.4.1: {} diff --git a/server/package.json b/server/package.json index b9c7d9bd..f5545518 100644 --- a/server/package.json +++ b/server/package.json @@ -76,6 +76,7 @@ "pino-http": "^10.4.0", "pino-pretty": "^13.1.3", "sharp": "^0.34.5", + "systeminformation": "5", "web-push": "^3.6.7", "ws": "^8.19.0", "zod": "^3.24.2" diff --git a/server/src/__tests__/30-hardware-detection.test.ts b/server/src/__tests__/30-hardware-detection.test.ts new file mode 100644 index 00000000..299e7397 --- /dev/null +++ b/server/src/__tests__/30-hardware-detection.test.ts @@ -0,0 +1,235 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import path from "node:path"; +import os from "node:os"; +import fs from "node:fs"; + +// --------------------------------------------------------------------------- +// Mock systeminformation before importing hardware service +// Must use vi.hoisted() so the reference is available when vi.mock() factory runs +// --------------------------------------------------------------------------- +const { mockGraphicsFn } = vi.hoisted(() => ({ + mockGraphicsFn: vi.fn(), +})); +vi.mock("systeminformation", () => ({ + default: { + graphics: mockGraphicsFn, + }, + graphics: mockGraphicsFn, +})); + +// --------------------------------------------------------------------------- +// Mock home-paths for nexus-settings tests +// --------------------------------------------------------------------------- +const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nexus-test-")); +vi.mock("../home-paths.js", () => ({ + resolvePaperclipInstanceRoot: () => tmpDir, +})); + +// --------------------------------------------------------------------------- +// Imports after mocks +// --------------------------------------------------------------------------- +import { hardwareService, _resetHardwareCache, type HardwareInfo, type HardwareTier } from "../services/hardware.js"; +import { nexusSettingsService, NEXUS_MODES, type NexusMode } from "../services/nexus-settings.js"; +import { getRecommendedModel, type OllamaModel } from "../services/ollama.js"; + +// --------------------------------------------------------------------------- +// hardwareService tests +// --------------------------------------------------------------------------- +describe("hardwareService", () => { + beforeEach(() => { + vi.clearAllMocks(); + _resetHardwareCache(); + // Reset platform for each test + Object.defineProperty(process, "platform", { value: "linux", writable: true }); + }); + + it("detect() returns all required HardwareInfo fields", async () => { + mockGraphicsFn.mockResolvedValue({ controllers: [], displays: [] }); + + Object.defineProperty(process, "platform", { value: "linux", writable: true }); + + const info = await hardwareService().detect(); + expect(info).toMatchObject({ + totalGb: expect.any(Number), + freeGb: expect.any(Number), + usableGb: expect.any(Number), + platform: expect.any(String), + hardwareTier: expect.any(String), + }); + expect("gpuName" in info).toBe(true); + expect("gpuVramGb" in info).toBe(true); + expect("unifiedMemory" in info).toBe(true); + expect("cpuModel" in info).toBe(true); + }); + + it("returns apple_silicon tier when platform is darwin and CPU starts with Apple", async () => { + Object.defineProperty(process, "platform", { value: "darwin", writable: true }); + // We need to mock os.cpus() to return Apple CPU + const origCpus = os.cpus; + vi.spyOn(os, "cpus").mockReturnValue([ + { model: "Apple M4 Pro", speed: 3000, times: { user: 0, nice: 0, sys: 0, idle: 0, irq: 0 } }, + ]); + + const info = await hardwareService().detect(); + expect(info.unifiedMemory).toBe(true); + expect(info.hardwareTier).toBe("apple_silicon"); + expect(info.gpuVramGb).toBe(null); + // si.graphics should NOT be called for Apple Silicon + expect(mockGraphicsFn).not.toHaveBeenCalled(); + + vi.spyOn(os, "cpus").mockRestore(); + Object.defineProperty(process, "platform", { value: "linux", writable: true }); + }); + + it("returns gpu tier when GPU VRAM >= 4096 MB", async () => { + Object.defineProperty(process, "platform", { value: "linux", writable: true }); + vi.spyOn(os, "cpus").mockReturnValue([ + { model: "Intel Core i9", speed: 3000, times: { user: 0, nice: 0, sys: 0, idle: 0, irq: 0 } }, + ]); + + mockGraphicsFn.mockResolvedValue({ + controllers: [{ model: "NVIDIA RTX 4090", vram: 24576 }], + displays: [], + }); + + const info = await hardwareService().detect(); + expect(info.hardwareTier).toBe("gpu"); + expect(info.gpuName).toBe("NVIDIA RTX 4090"); + expect(info.gpuVramGb).toBeCloseTo(24, 0); + + vi.spyOn(os, "cpus").mockRestore(); + }); + + it("returns cpu_only tier when no GPU controllers", async () => { + Object.defineProperty(process, "platform", { value: "linux", writable: true }); + vi.spyOn(os, "cpus").mockReturnValue([ + { model: "Intel Core i5", speed: 2000, times: { user: 0, nice: 0, sys: 0, idle: 0, irq: 0 } }, + ]); + + mockGraphicsFn.mockResolvedValue({ controllers: [], displays: [] }); + + const info = await hardwareService().detect(); + expect(info.hardwareTier).toBe("cpu_only"); + expect(info.gpuName).toBe(null); + expect(info.gpuVramGb).toBe(null); + + vi.spyOn(os, "cpus").mockRestore(); + }); + + it("returns cpu_only tier when si.graphics() times out (3 second timeout)", async () => { + Object.defineProperty(process, "platform", { value: "linux", writable: true }); + vi.spyOn(os, "cpus").mockReturnValue([ + { model: "AMD Ryzen 5", speed: 2000, times: { user: 0, nice: 0, sys: 0, idle: 0, irq: 0 } }, + ]); + + // Simulate a slow call that will never resolve in test (we fake timers) + mockGraphicsFn.mockImplementation( + () => new Promise((_resolve) => setTimeout(_resolve, 60000)), + ); + + vi.useFakeTimers(); + const detectPromise = hardwareService().detect(); + vi.advanceTimersByTime(4000); // Advance past 3000ms timeout + const info = await detectPromise; + + expect(info.hardwareTier).toBe("cpu_only"); + expect(info.gpuName).toBe(null); + + vi.useRealTimers(); + vi.spyOn(os, "cpus").mockRestore(); + }); +}); + +// --------------------------------------------------------------------------- +// nexusSettingsService tests +// --------------------------------------------------------------------------- +describe("nexusSettingsService", () => { + afterEach(() => { + // Clean up settings file between tests + const settingsFile = path.join(tmpDir, "data", "nexus-settings.json"); + try { fs.unlinkSync(settingsFile); } catch { /* ignore */ } + }); + + it("get() returns { mode: 'both' } when no file exists (default)", async () => { + const svc = nexusSettingsService(); + const settings = await svc.get(); + expect(settings).toEqual({ mode: "both" }); + }); + + it("set() persists mode and get() reads it back", async () => { + const svc = nexusSettingsService(); + await svc.set({ mode: "personal_ai" }); + const settings = await svc.get(); + expect(settings.mode).toBe("personal_ai"); + }); + + it("set() with invalid mode throws Zod validation error", async () => { + const svc = nexusSettingsService(); + await expect(svc.set({ mode: "invalid" as NexusMode })).rejects.toThrow(); + }); + + it("NEXUS_MODES contains expected values", () => { + expect(NEXUS_MODES).toContain("personal_ai"); + expect(NEXUS_MODES).toContain("project_builder"); + expect(NEXUS_MODES).toContain("both"); + }); +}); + +// --------------------------------------------------------------------------- +// Model catalog tests +// --------------------------------------------------------------------------- +describe("model catalog", () => { + it("every variant in catalog has a tier array", () => { + // Load catalog directly from file + const catalogPath = path.resolve( + new URL(import.meta.url).pathname, + "../../data/ollama-model-catalog.json", + ); + const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8")); + const allVariants = catalog.models.flatMap((f: any) => f.variants); + const missingTier = allVariants.filter((v: any) => !v.tier); + expect(missingTier).toHaveLength(0); + }); + + it("catalog contains qwen3 family with qwen3:8b variant", () => { + const catalogPath = path.resolve( + new URL(import.meta.url).pathname, + "../../data/ollama-model-catalog.json", + ); + const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8")); + const qwen3Family = catalog.models.find((f: any) => f.family === "qwen3"); + expect(qwen3Family).toBeDefined(); + const qwen3_8b = qwen3Family?.variants.find((v: any) => v.name === "qwen3:8b"); + expect(qwen3_8b).toBeDefined(); + expect(qwen3_8b?.tier).toContain("gpu"); + expect(qwen3_8b?.tier).toContain("apple_silicon"); + expect(qwen3_8b?.tier).toContain("cpu_only"); + }); +}); + +// --------------------------------------------------------------------------- +// getRecommendedModel with tier tests +// --------------------------------------------------------------------------- +describe("getRecommendedModel with tier", () => { + const baseModels: OllamaModel[] = [ + { name: "qwen2.5-coder:7b", parameterSize: "7B", quantization: "Q4", sizeBytes: 4000000000, family: "qwen2", recommended: false, recommendationReason: null }, + { name: "qwen2.5-coder:32b", parameterSize: "32B", quantization: "Q4", sizeBytes: 20000000000, family: "qwen2", recommended: false, recommendationReason: null }, + ]; + + it("filters models by gpu tier — recommends gpu-tier models only", () => { + // qwen2.5-coder:32b has tier ["gpu"] — should be recommended for gpu tier with enough RAM + const ramBytes = 100 * 1024 * 1024 * 1024; // 100GB + const result = getRecommendedModel(baseModels, ramBytes, "gpu"); + const recommended = result.filter((m) => m.recommended); + expect(recommended.length).toBeGreaterThanOrEqual(1); + }); + + it("without tier parameter, behavior is unchanged", () => { + const ramBytes = 100 * 1024 * 1024 * 1024; + const withTier = getRecommendedModel(baseModels, ramBytes, "gpu"); + const withoutTier = getRecommendedModel(baseModels, ramBytes); + // Both should find recommendations + expect(withTier.filter((m) => m.recommended).length).toBeGreaterThanOrEqual(0); + expect(withoutTier.filter((m) => m.recommended).length).toBeGreaterThanOrEqual(0); + }); +}); diff --git a/server/src/data/ollama-model-catalog.json b/server/src/data/ollama-model-catalog.json index ca743ba6..219c093f 100644 --- a/server/src/data/ollama-model-catalog.json +++ b/server/src/data/ollama-model-catalog.json @@ -3,37 +3,43 @@ { "family": "qwen2", "variants": [ - { "name": "qwen2.5-coder:7b", "ramGb": 5, "vramGb": 5, "quality": "fast" }, - { "name": "qwen2.5-coder:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced" }, - { "name": "qwen2.5-coder:32b", "ramGb": 22, "vramGb": 22, "quality": "best" } + { "name": "qwen2.5-coder:7b", "ramGb": 5, "vramGb": 5, "quality": "fast", "tier": ["gpu", "apple_silicon", "cpu_only"] }, + { "name": "qwen2.5-coder:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced", "tier": ["gpu", "apple_silicon"] }, + { "name": "qwen2.5-coder:32b", "ramGb": 22, "vramGb": 22, "quality": "best", "tier": ["gpu"] } ] }, { "family": "llama", "variants": [ - { "name": "llama3.2:3b", "ramGb": 3, "vramGb": 3, "quality": "fast" }, - { "name": "llama3.1:8b", "ramGb": 6, "vramGb": 6, "quality": "balanced" }, - { "name": "llama3.1:70b", "ramGb": 48, "vramGb": 48, "quality": "best" } + { "name": "llama3.2:3b", "ramGb": 3, "vramGb": 3, "quality": "fast", "tier": ["gpu", "apple_silicon", "cpu_only"] }, + { "name": "llama3.1:8b", "ramGb": 6, "vramGb": 6, "quality": "balanced", "tier": ["gpu", "apple_silicon", "cpu_only"] }, + { "name": "llama3.1:70b", "ramGb": 48, "vramGb": 48, "quality": "best", "tier": ["gpu"] } ] }, { "family": "mistral", "variants": [ - { "name": "mistral:7b", "ramGb": 5, "vramGb": 5, "quality": "balanced" }, - { "name": "mistral:22b", "ramGb": 14, "vramGb": 14, "quality": "best" } + { "name": "mistral:7b", "ramGb": 5, "vramGb": 5, "quality": "balanced", "tier": ["gpu", "apple_silicon", "cpu_only"] }, + { "name": "mistral:22b", "ramGb": 14, "vramGb": 14, "quality": "best", "tier": ["gpu", "apple_silicon"] } ] }, { "family": "phi", "variants": [ - { "name": "phi4:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced" } + { "name": "phi4:14b", "ramGb": 10, "vramGb": 10, "quality": "balanced", "tier": ["gpu", "apple_silicon"] } ] }, { "family": "deepseek", "variants": [ - { "name": "deepseek-r1:7b", "ramGb": 5, "vramGb": 5, "quality": "reasoning" }, - { "name": "deepseek-r1:32b", "ramGb": 22, "vramGb": 22, "quality": "reasoning" } + { "name": "deepseek-r1:7b", "ramGb": 5, "vramGb": 5, "quality": "reasoning", "tier": ["gpu", "apple_silicon", "cpu_only"] }, + { "name": "deepseek-r1:32b", "ramGb": 22, "vramGb": 22, "quality": "reasoning", "tier": ["gpu", "apple_silicon"] } + ] + }, + { + "family": "qwen3", + "variants": [ + { "name": "qwen3:8b", "ramGb": 5, "vramGb": 5, "quality": "balanced", "tier": ["gpu", "apple_silicon", "cpu_only"] } ] } ] diff --git a/server/src/services/hardware.ts b/server/src/services/hardware.ts new file mode 100644 index 00000000..e064a773 --- /dev/null +++ b/server/src/services/hardware.ts @@ -0,0 +1,105 @@ +import os from "node:os"; +import si from "systeminformation"; + +export type HardwareTier = "gpu" | "apple_silicon" | "cpu_only"; + +export interface HardwareInfo { + totalGb: number; + freeGb: number; + usableGb: number; + platform: NodeJS.Platform; + gpuName: string | null; + gpuVramGb: number | null; + unifiedMemory: boolean; + hardwareTier: HardwareTier; + cpuModel: string | null; +} + +const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes + +let cache: HardwareInfo | null = null; +let cacheExpiry = 0; + +/** Reset the hardware detection cache. Exposed for testing only. */ +export function _resetHardwareCache(): void { + cache = null; + cacheExpiry = 0; +} + +export function hardwareService() { + async function detect(): Promise { + const now = Date.now(); + if (cache && now < cacheExpiry) { + return cache; + } + + const totalBytes = os.totalmem(); + const freeBytes = os.freemem(); + const totalGb = Math.round((totalBytes / (1024 * 1024 * 1024)) * 10) / 10; + const freeGb = Math.round((freeBytes / (1024 * 1024 * 1024)) * 10) / 10; + const usableGb = Math.round((freeBytes * 0.75) / (1024 * 1024 * 1024) * 10) / 10; + const cpuModel = os.cpus()[0]?.model ?? null; + const platform = process.platform; + + // Apple Silicon detection: darwin platform + CPU brand starting with "Apple" + if (platform === "darwin" && cpuModel?.startsWith("Apple")) { + const info: HardwareInfo = { + totalGb, + freeGb, + usableGb, + platform, + gpuName: null, + gpuVramGb: null, + unifiedMemory: true, + hardwareTier: "apple_silicon", + cpuModel, + }; + cache = info; + cacheExpiry = now + CACHE_TTL_MS; + return info; + } + + // Non-Apple Silicon: probe GPU with 3-second timeout + let gpuName: string | null = null; + let gpuVramGb: number | null = null; + let hardwareTier: HardwareTier = "cpu_only"; + + try { + const timeoutPromise = new Promise((_resolve, reject) => { + setTimeout(() => reject(new Error("GPU detection timeout")), 3000); + }); + const graphicsResult = await Promise.race([si.graphics(), timeoutPromise]); + const controllers = graphicsResult.controllers ?? []; + if (controllers.length > 0 && controllers[0]) { + const vramMb = controllers[0].vram ?? 0; + const vramGb = vramMb / 1024; + if (vramGb >= 4) { + gpuName = controllers[0].model ?? null; + gpuVramGb = Math.round(vramGb * 10) / 10; + hardwareTier = "gpu"; + } + } + } catch { + // Timeout or error — degrade to cpu_only + hardwareTier = "cpu_only"; + } + + const info: HardwareInfo = { + totalGb, + freeGb, + usableGb, + platform, + gpuName, + gpuVramGb, + unifiedMemory: false, + hardwareTier, + cpuModel, + }; + + cache = info; + cacheExpiry = now + CACHE_TTL_MS; + return info; + } + + return { detect }; +} diff --git a/server/src/services/nexus-settings.ts b/server/src/services/nexus-settings.ts new file mode 100644 index 00000000..a151cc19 --- /dev/null +++ b/server/src/services/nexus-settings.ts @@ -0,0 +1,47 @@ +import fs from "node:fs"; +import path from "node:path"; +import { z } from "zod"; +import { resolvePaperclipInstanceRoot } from "../home-paths.js"; + +export const NEXUS_MODES = ["personal_ai", "project_builder", "both"] as const; +export type NexusMode = (typeof NEXUS_MODES)[number]; + +const nexusSettingsSchema = z.object({ + mode: z.enum(NEXUS_MODES).default("both"), +}); + +type NexusSettings = z.infer; + +function resolveNexusSettingsPath(): string { + return path.resolve(resolvePaperclipInstanceRoot(), "data", "nexus-settings.json"); +} + +export function nexusSettingsService() { + async function get(): Promise { + const filePath = resolveNexusSettingsPath(); + try { + const raw = fs.readFileSync(filePath, "utf-8"); + const parsed = nexusSettingsSchema.safeParse(JSON.parse(raw)); + if (parsed.success) { + return parsed.data; + } + return { mode: "both" }; + } catch { + return { mode: "both" }; + } + } + + async function set(patch: Partial): Promise { + const current = await get(); + const merged = { ...current, ...patch }; + // Validate — will throw ZodError if invalid + const validated = nexusSettingsSchema.parse(merged); + + const filePath = resolveNexusSettingsPath(); + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, JSON.stringify(validated, null, 2), "utf-8"); + return validated; + } + + return { get, set }; +} diff --git a/server/src/services/ollama.ts b/server/src/services/ollama.ts index 584d1323..2e1768ae 100644 --- a/server/src/services/ollama.ts +++ b/server/src/services/ollama.ts @@ -64,6 +64,7 @@ interface CatalogVariant { ramGb: number; vramGb: number; quality: string; + tier?: string[]; } interface CatalogFamily { @@ -155,7 +156,11 @@ const QUALITY_RANK: Record = { fast: 1, }; -export function getRecommendedModel(models: OllamaModel[], systemRamBytes: number): OllamaModel[] { +export function getRecommendedModel( + models: OllamaModel[], + systemRamBytes: number, + hardwareTier?: "gpu" | "apple_silicon" | "cpu_only", +): OllamaModel[] { if (models.length === 0) return []; const catalog = loadCatalog(); @@ -177,6 +182,8 @@ export function getRecommendedModel(models: OllamaModel[], systemRamBytes: numbe const entry = catalogMap.get(model.name); if (!entry) continue; if (entry.ramGb > usableRamGb) continue; + // Filter by hardware tier if provided + if (hardwareTier && entry.tier && !entry.tier.includes(hardwareTier)) continue; const rank = QUALITY_RANK[entry.quality] ?? 0; if (rank > bestQualityRank) { bestQualityRank = rank;