feat(09-02): implement multi-source skill fetcher with file caching

- SkillSourceConfig type + BUILT_IN_SOURCES (3 sources: anthropic, schwepps, daymade)
- fetchAllSources() fetches from anthropic-marketplace and github-tree source types
- parseSkillFrontmatter() extracts name/description from SKILL.md YAML blocks
- Idempotency: checks version exists before fetching, skips re-download on same SHA
- Caches SKILL.md to skills/cache/<skill-id>/<sha>/SKILL.md on disk
- Inserts skills, skill_versions, and skill_files rows into registry.db
- All 7 tests passing (TDD GREEN)
This commit is contained in:
Mikkel Georgsen 2026-04-01 01:06:24 +02:00
parent b79fc29d1a
commit 52fa55293d

View file

@ -0,0 +1,377 @@
import crypto from "node:crypto";
import { mkdir, writeFile } from "node:fs/promises";
import { existsSync } from "node:fs";
import path from "node:path";
import { eq } from "drizzle-orm";
import { getSkillRegistryDb, type SkillRegistryDb } from "./skill-registry-db.js";
import { skills, skillVersions, skillFiles } from "./skill-registry-schema.js";
import {
fetchText,
fetchJson,
resolveGitHubCommitSha,
resolveRawGitHubUrl,
} from "./github-skill-helpers.js";
import { resolveSkillCacheDir } from "../home-paths.js";
// ---------------------------------------------------------------------------
// Source config
// ---------------------------------------------------------------------------
export type SkillSourceConfig = {
id: string;
type: "anthropic-marketplace" | "github-tree";
owner: string;
repo: string;
ref: string;
label: string;
};
export const BUILT_IN_SOURCES: SkillSourceConfig[] = [
{
id: "anthropic-official",
type: "anthropic-marketplace",
owner: "anthropics",
repo: "skills",
ref: "main",
label: "Anthropic Official",
},
{
id: "schwepps-skills",
type: "github-tree",
owner: "schwepps",
repo: "skills",
ref: "main",
label: "Schwepps Community",
},
{
id: "daymade-skills",
type: "github-tree",
owner: "daymade",
repo: "claude-code-skills",
ref: "main",
label: "Daymade Community",
},
];
// ---------------------------------------------------------------------------
// Frontmatter parsing
// ---------------------------------------------------------------------------
/**
* Parse YAML frontmatter from a SKILL.md string.
* Only extracts `name` and `description` fields.
*/
export function parseSkillFrontmatter(markdown: string): {
name?: string;
description?: string;
} {
const match = /^---\r?\n([\s\S]*?)\r?\n---/m.exec(markdown);
if (!match) return {};
const block = match[1] ?? "";
const nameMatch = /^name:\s*(.+)$/m.exec(block);
const descMatch = /^description:\s*(.+)$/m.exec(block);
const name = nameMatch?.[1]?.trim();
const description = descMatch?.[1]?.trim();
return {
name: name && name.length > 0 ? name : undefined,
description: description && description.length > 0 ? description : undefined,
};
}
/**
* Convert a path segment to a URL-safe skill slug.
* e.g. "My Skill Name" "my-skill-name"
*/
export function slugFromPath(sourcePath: string): string {
// Take the last non-empty path segment (the directory name of the skill)
const parts = sourcePath.split("/").filter(Boolean);
const segment = parts[parts.length - 1] ?? sourcePath;
return segment
.toLowerCase()
.replace(/[^a-z0-9-]+/g, "-")
.replace(/^-+|-+$/g, "");
}
// ---------------------------------------------------------------------------
// Core fetch helpers
// ---------------------------------------------------------------------------
type GitHubTreeEntry = {
path: string;
type: string;
size?: number;
};
type GitHubTreeResponse = {
tree: GitHubTreeEntry[];
};
type MarketplaceJson = {
skills: Array<{ path: string }>;
};
/**
* Upsert a skill row and return its id.
*/
async function upsertSkill(
db: SkillRegistryDb,
opts: {
skillId: string;
sourceId: string;
name: string;
description: string | undefined;
sourceUrl: string;
},
): Promise<void> {
const now = Date.now();
await db
.insert(skills)
.values({
id: opts.skillId,
sourceId: opts.sourceId,
name: opts.name,
description: opts.description ?? null,
sourceUrl: opts.sourceUrl,
activeVersionId: null,
removedAt: null,
createdAt: now,
updatedAt: now,
})
.onConflictDoUpdate({
target: skills.id,
set: {
name: opts.name,
description: opts.description ?? null,
updatedAt: now,
},
});
}
/**
* Check whether a version with this SHA already exists in the DB.
* Returns true if already present (skip download).
*/
async function versionExists(db: SkillRegistryDb, versionId: string): Promise<boolean> {
const existing = await db
.select({ id: skillVersions.id })
.from(skillVersions)
.where(eq(skillVersions.id, versionId));
return existing.length > 0;
}
/**
* Cache SKILL.md to disk and insert skill_versions + skill_files rows.
*/
async function cacheSkillVersion(
db: SkillRegistryDb,
opts: {
skillId: string;
sha: string;
skillMdContent: string;
skillMdUrl: string;
},
): Promise<void> {
const versionId = `${opts.skillId}@${opts.sha}`;
// Idempotency check — skip if version already cached
if (await versionExists(db, versionId)) {
return;
}
const cacheDir = resolveSkillCacheDir(opts.skillId, opts.sha);
await mkdir(cacheDir, { recursive: true });
const skillMdPath = path.join(cacheDir, "SKILL.md");
await writeFile(skillMdPath, opts.skillMdContent, "utf-8");
const now = Date.now();
// Insert skill_versions row
await db.insert(skillVersions).values({
id: versionId,
skillId: opts.skillId,
version: opts.sha,
fetchedAt: now,
cacheDir,
});
// Insert skill_files row for SKILL.md
const sizeBytes = Buffer.byteLength(opts.skillMdContent, "utf-8");
await db.insert(skillFiles).values({
id: crypto.randomUUID(),
versionId,
path: "SKILL.md",
kind: "skill",
sizeBytes,
});
}
// ---------------------------------------------------------------------------
// Source-type handlers
// ---------------------------------------------------------------------------
async function fetchAnthropicMarketplace(
source: SkillSourceConfig,
db: SkillRegistryDb,
): Promise<number> {
const marketplaceUrl = resolveRawGitHubUrl(
source.owner,
source.repo,
source.ref,
".claude-plugin/marketplace.json",
);
const marketplaceText = await fetchText(marketplaceUrl);
const marketplace: MarketplaceJson = JSON.parse(marketplaceText);
const sha = await resolveGitHubCommitSha(source.owner, source.repo, source.ref);
let fetched = 0;
for (const entry of marketplace.skills ?? []) {
const skillPath = entry.path;
const slug = slugFromPath(skillPath);
const skillId = `${source.id}/${slug}`;
// Idempotency check before downloading — skip if version already cached
const versionId = `${skillId}@${sha}`;
if (await versionExists(db, versionId)) {
fetched++;
continue;
}
const skillMdUrl = resolveRawGitHubUrl(source.owner, source.repo, source.ref, `${skillPath}/SKILL.md`);
let skillMdContent: string;
try {
skillMdContent = await fetchText(skillMdUrl);
} catch {
// Skip skills that don't have a SKILL.md
continue;
}
const { name, description } = parseSkillFrontmatter(skillMdContent);
const sourceUrl = `https://github.com/${source.owner}/${source.repo}/tree/${source.ref}/${skillPath}`;
await upsertSkill(db, {
skillId,
sourceId: source.id,
name: name ?? slug,
description,
sourceUrl,
});
await cacheSkillVersion(db, {
skillId,
sha,
skillMdContent,
skillMdUrl,
});
fetched++;
}
return fetched;
}
async function fetchGitHubTree(
source: SkillSourceConfig,
db: SkillRegistryDb,
): Promise<number> {
const treeUrl = `https://api.github.com/repos/${source.owner}/${source.repo}/git/trees/${encodeURIComponent(source.ref)}?recursive=1`;
const treeResponse = await fetchJson<GitHubTreeResponse>(treeUrl);
const sha = await resolveGitHubCommitSha(source.owner, source.repo, source.ref);
// Find all SKILL.md files
const skillMdEntries = (treeResponse.tree ?? []).filter(
(entry) => entry.type === "blob" && entry.path.endsWith("SKILL.md"),
);
let fetched = 0;
for (const entry of skillMdEntries) {
// entry.path is like "code-review/SKILL.md" — dirname is the skill dir
const skillDir = path.posix.dirname(entry.path);
if (!skillDir || skillDir === ".") continue;
const slug = slugFromPath(skillDir);
const skillId = `${source.id}/${slug}`;
// Idempotency check before downloading — skip if version already cached
const versionId = `${skillId}@${sha}`;
if (await versionExists(db, versionId)) {
fetched++;
continue;
}
const skillMdUrl = resolveRawGitHubUrl(source.owner, source.repo, source.ref, entry.path);
let skillMdContent: string;
try {
skillMdContent = await fetchText(skillMdUrl);
} catch {
continue;
}
const { name, description } = parseSkillFrontmatter(skillMdContent);
const sourceUrl = `https://github.com/${source.owner}/${source.repo}/tree/${source.ref}/${skillDir}`;
await upsertSkill(db, {
skillId,
sourceId: source.id,
name: name ?? slug,
description,
sourceUrl,
});
await cacheSkillVersion(db, {
skillId,
sha,
skillMdContent,
skillMdUrl,
});
fetched++;
}
return fetched;
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
export type FetchAllSourcesResult = {
fetched: number;
errors: string[];
};
/**
* Fetch skills from all configured sources and populate the registry DB.
* Uses BUILT_IN_SOURCES if no sources are provided.
*/
export async function fetchAllSources(
sources: SkillSourceConfig[] = BUILT_IN_SOURCES,
): Promise<FetchAllSourcesResult> {
const db = await getSkillRegistryDb();
let fetched = 0;
const errors: string[] = [];
for (const source of sources) {
try {
if (source.type === "anthropic-marketplace") {
fetched += await fetchAnthropicMarketplace(source, db);
} else if (source.type === "github-tree") {
fetched += await fetchGitHubTree(source, db);
} else {
errors.push(`Unknown source type for ${source.id}`);
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
errors.push(`Source ${source.id}: ${message}`);
}
}
return { fetched, errors };
}