From 9c2569ebb00bb716781cc717192d5b036ed8eb33 Mon Sep 17 00:00:00 2001 From: Mikkel Georgsen Date: Wed, 1 Apr 2026 01:06:24 +0200 Subject: [PATCH] feat(09-02): implement multi-source skill fetcher with file caching - SkillSourceConfig type + BUILT_IN_SOURCES (3 sources: anthropic, schwepps, daymade) - fetchAllSources() fetches from anthropic-marketplace and github-tree source types - parseSkillFrontmatter() extracts name/description from SKILL.md YAML blocks - Idempotency: checks version exists before fetching, skips re-download on same SHA - Caches SKILL.md to skills/cache///SKILL.md on disk - Inserts skills, skill_versions, and skill_files rows into registry.db - All 7 tests passing (TDD GREEN) --- server/src/services/skill-registry-fetcher.ts | 377 ++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 server/src/services/skill-registry-fetcher.ts diff --git a/server/src/services/skill-registry-fetcher.ts b/server/src/services/skill-registry-fetcher.ts new file mode 100644 index 00000000..00738640 --- /dev/null +++ b/server/src/services/skill-registry-fetcher.ts @@ -0,0 +1,377 @@ +import crypto from "node:crypto"; +import { mkdir, writeFile } from "node:fs/promises"; +import { existsSync } from "node:fs"; +import path from "node:path"; +import { eq } from "drizzle-orm"; +import { getSkillRegistryDb, type SkillRegistryDb } from "./skill-registry-db.js"; +import { skills, skillVersions, skillFiles } from "./skill-registry-schema.js"; +import { + fetchText, + fetchJson, + resolveGitHubCommitSha, + resolveRawGitHubUrl, +} from "./github-skill-helpers.js"; +import { resolveSkillCacheDir } from "../home-paths.js"; + +// --------------------------------------------------------------------------- +// Source config +// --------------------------------------------------------------------------- + +export type SkillSourceConfig = { + id: string; + type: "anthropic-marketplace" | "github-tree"; + owner: string; + repo: string; + ref: string; + label: string; +}; + +export const BUILT_IN_SOURCES: SkillSourceConfig[] = [ + { + id: "anthropic-official", + type: "anthropic-marketplace", + owner: "anthropics", + repo: "skills", + ref: "main", + label: "Anthropic Official", + }, + { + id: "schwepps-skills", + type: "github-tree", + owner: "schwepps", + repo: "skills", + ref: "main", + label: "Schwepps Community", + }, + { + id: "daymade-skills", + type: "github-tree", + owner: "daymade", + repo: "claude-code-skills", + ref: "main", + label: "Daymade Community", + }, +]; + +// --------------------------------------------------------------------------- +// Frontmatter parsing +// --------------------------------------------------------------------------- + +/** + * Parse YAML frontmatter from a SKILL.md string. + * Only extracts `name` and `description` fields. + */ +export function parseSkillFrontmatter(markdown: string): { + name?: string; + description?: string; +} { + const match = /^---\r?\n([\s\S]*?)\r?\n---/m.exec(markdown); + if (!match) return {}; + + const block = match[1] ?? ""; + const nameMatch = /^name:\s*(.+)$/m.exec(block); + const descMatch = /^description:\s*(.+)$/m.exec(block); + + const name = nameMatch?.[1]?.trim(); + const description = descMatch?.[1]?.trim(); + + return { + name: name && name.length > 0 ? name : undefined, + description: description && description.length > 0 ? description : undefined, + }; +} + +/** + * Convert a path segment to a URL-safe skill slug. + * e.g. "My Skill Name" → "my-skill-name" + */ +export function slugFromPath(sourcePath: string): string { + // Take the last non-empty path segment (the directory name of the skill) + const parts = sourcePath.split("/").filter(Boolean); + const segment = parts[parts.length - 1] ?? sourcePath; + return segment + .toLowerCase() + .replace(/[^a-z0-9-]+/g, "-") + .replace(/^-+|-+$/g, ""); +} + +// --------------------------------------------------------------------------- +// Core fetch helpers +// --------------------------------------------------------------------------- + +type GitHubTreeEntry = { + path: string; + type: string; + size?: number; +}; + +type GitHubTreeResponse = { + tree: GitHubTreeEntry[]; +}; + +type MarketplaceJson = { + skills: Array<{ path: string }>; +}; + +/** + * Upsert a skill row and return its id. + */ +async function upsertSkill( + db: SkillRegistryDb, + opts: { + skillId: string; + sourceId: string; + name: string; + description: string | undefined; + sourceUrl: string; + }, +): Promise { + const now = Date.now(); + await db + .insert(skills) + .values({ + id: opts.skillId, + sourceId: opts.sourceId, + name: opts.name, + description: opts.description ?? null, + sourceUrl: opts.sourceUrl, + activeVersionId: null, + removedAt: null, + createdAt: now, + updatedAt: now, + }) + .onConflictDoUpdate({ + target: skills.id, + set: { + name: opts.name, + description: opts.description ?? null, + updatedAt: now, + }, + }); +} + +/** + * Check whether a version with this SHA already exists in the DB. + * Returns true if already present (skip download). + */ +async function versionExists(db: SkillRegistryDb, versionId: string): Promise { + const existing = await db + .select({ id: skillVersions.id }) + .from(skillVersions) + .where(eq(skillVersions.id, versionId)); + return existing.length > 0; +} + +/** + * Cache SKILL.md to disk and insert skill_versions + skill_files rows. + */ +async function cacheSkillVersion( + db: SkillRegistryDb, + opts: { + skillId: string; + sha: string; + skillMdContent: string; + skillMdUrl: string; + }, +): Promise { + const versionId = `${opts.skillId}@${opts.sha}`; + + // Idempotency check — skip if version already cached + if (await versionExists(db, versionId)) { + return; + } + + const cacheDir = resolveSkillCacheDir(opts.skillId, opts.sha); + await mkdir(cacheDir, { recursive: true }); + + const skillMdPath = path.join(cacheDir, "SKILL.md"); + await writeFile(skillMdPath, opts.skillMdContent, "utf-8"); + + const now = Date.now(); + + // Insert skill_versions row + await db.insert(skillVersions).values({ + id: versionId, + skillId: opts.skillId, + version: opts.sha, + fetchedAt: now, + cacheDir, + }); + + // Insert skill_files row for SKILL.md + const sizeBytes = Buffer.byteLength(opts.skillMdContent, "utf-8"); + await db.insert(skillFiles).values({ + id: crypto.randomUUID(), + versionId, + path: "SKILL.md", + kind: "skill", + sizeBytes, + }); +} + +// --------------------------------------------------------------------------- +// Source-type handlers +// --------------------------------------------------------------------------- + +async function fetchAnthropicMarketplace( + source: SkillSourceConfig, + db: SkillRegistryDb, +): Promise { + const marketplaceUrl = resolveRawGitHubUrl( + source.owner, + source.repo, + source.ref, + ".claude-plugin/marketplace.json", + ); + + const marketplaceText = await fetchText(marketplaceUrl); + const marketplace: MarketplaceJson = JSON.parse(marketplaceText); + const sha = await resolveGitHubCommitSha(source.owner, source.repo, source.ref); + + let fetched = 0; + + for (const entry of marketplace.skills ?? []) { + const skillPath = entry.path; + const slug = slugFromPath(skillPath); + const skillId = `${source.id}/${slug}`; + + // Idempotency check before downloading — skip if version already cached + const versionId = `${skillId}@${sha}`; + if (await versionExists(db, versionId)) { + fetched++; + continue; + } + + const skillMdUrl = resolveRawGitHubUrl(source.owner, source.repo, source.ref, `${skillPath}/SKILL.md`); + let skillMdContent: string; + try { + skillMdContent = await fetchText(skillMdUrl); + } catch { + // Skip skills that don't have a SKILL.md + continue; + } + + const { name, description } = parseSkillFrontmatter(skillMdContent); + const sourceUrl = `https://github.com/${source.owner}/${source.repo}/tree/${source.ref}/${skillPath}`; + + await upsertSkill(db, { + skillId, + sourceId: source.id, + name: name ?? slug, + description, + sourceUrl, + }); + + await cacheSkillVersion(db, { + skillId, + sha, + skillMdContent, + skillMdUrl, + }); + + fetched++; + } + + return fetched; +} + +async function fetchGitHubTree( + source: SkillSourceConfig, + db: SkillRegistryDb, +): Promise { + const treeUrl = `https://api.github.com/repos/${source.owner}/${source.repo}/git/trees/${encodeURIComponent(source.ref)}?recursive=1`; + const treeResponse = await fetchJson(treeUrl); + + const sha = await resolveGitHubCommitSha(source.owner, source.repo, source.ref); + + // Find all SKILL.md files + const skillMdEntries = (treeResponse.tree ?? []).filter( + (entry) => entry.type === "blob" && entry.path.endsWith("SKILL.md"), + ); + + let fetched = 0; + + for (const entry of skillMdEntries) { + // entry.path is like "code-review/SKILL.md" — dirname is the skill dir + const skillDir = path.posix.dirname(entry.path); + if (!skillDir || skillDir === ".") continue; + + const slug = slugFromPath(skillDir); + const skillId = `${source.id}/${slug}`; + + // Idempotency check before downloading — skip if version already cached + const versionId = `${skillId}@${sha}`; + if (await versionExists(db, versionId)) { + fetched++; + continue; + } + + const skillMdUrl = resolveRawGitHubUrl(source.owner, source.repo, source.ref, entry.path); + let skillMdContent: string; + try { + skillMdContent = await fetchText(skillMdUrl); + } catch { + continue; + } + + const { name, description } = parseSkillFrontmatter(skillMdContent); + const sourceUrl = `https://github.com/${source.owner}/${source.repo}/tree/${source.ref}/${skillDir}`; + + await upsertSkill(db, { + skillId, + sourceId: source.id, + name: name ?? slug, + description, + sourceUrl, + }); + + await cacheSkillVersion(db, { + skillId, + sha, + skillMdContent, + skillMdUrl, + }); + + fetched++; + } + + return fetched; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export type FetchAllSourcesResult = { + fetched: number; + errors: string[]; +}; + +/** + * Fetch skills from all configured sources and populate the registry DB. + * Uses BUILT_IN_SOURCES if no sources are provided. + */ +export async function fetchAllSources( + sources: SkillSourceConfig[] = BUILT_IN_SOURCES, +): Promise { + const db = await getSkillRegistryDb(); + let fetched = 0; + const errors: string[] = []; + + for (const source of sources) { + try { + if (source.type === "anthropic-marketplace") { + fetched += await fetchAnthropicMarketplace(source, db); + } else if (source.type === "github-tree") { + fetched += await fetchGitHubTree(source, db); + } else { + errors.push(`Unknown source type for ${source.id}`); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + errors.push(`Source ${source.id}: ${message}`); + } + } + + return { fetched, errors }; +}