feat(09-02): implement multi-source skill fetcher with file caching
- SkillSourceConfig type + BUILT_IN_SOURCES (3 sources: anthropic, schwepps, daymade) - fetchAllSources() fetches from anthropic-marketplace and github-tree source types - parseSkillFrontmatter() extracts name/description from SKILL.md YAML blocks - Idempotency: checks version exists before fetching, skips re-download on same SHA - Caches SKILL.md to skills/cache/<skill-id>/<sha>/SKILL.md on disk - Inserts skills, skill_versions, and skill_files rows into registry.db - All 7 tests passing (TDD GREEN)
This commit is contained in:
parent
9950091e0d
commit
e443ea9411
1 changed files with 377 additions and 0 deletions
377
server/src/services/skill-registry-fetcher.ts
Normal file
377
server/src/services/skill-registry-fetcher.ts
Normal file
|
|
@ -0,0 +1,377 @@
|
|||
import crypto from "node:crypto";
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { existsSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import { eq } from "drizzle-orm";
|
||||
import { getSkillRegistryDb, type SkillRegistryDb } from "./skill-registry-db.js";
|
||||
import { skills, skillVersions, skillFiles } from "./skill-registry-schema.js";
|
||||
import {
|
||||
fetchText,
|
||||
fetchJson,
|
||||
resolveGitHubCommitSha,
|
||||
resolveRawGitHubUrl,
|
||||
} from "./github-skill-helpers.js";
|
||||
import { resolveSkillCacheDir } from "../home-paths.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type SkillSourceConfig = {
|
||||
id: string;
|
||||
type: "anthropic-marketplace" | "github-tree";
|
||||
owner: string;
|
||||
repo: string;
|
||||
ref: string;
|
||||
label: string;
|
||||
};
|
||||
|
||||
export const BUILT_IN_SOURCES: SkillSourceConfig[] = [
|
||||
{
|
||||
id: "anthropic-official",
|
||||
type: "anthropic-marketplace",
|
||||
owner: "anthropics",
|
||||
repo: "skills",
|
||||
ref: "main",
|
||||
label: "Anthropic Official",
|
||||
},
|
||||
{
|
||||
id: "schwepps-skills",
|
||||
type: "github-tree",
|
||||
owner: "schwepps",
|
||||
repo: "skills",
|
||||
ref: "main",
|
||||
label: "Schwepps Community",
|
||||
},
|
||||
{
|
||||
id: "daymade-skills",
|
||||
type: "github-tree",
|
||||
owner: "daymade",
|
||||
repo: "claude-code-skills",
|
||||
ref: "main",
|
||||
label: "Daymade Community",
|
||||
},
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Frontmatter parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse YAML frontmatter from a SKILL.md string.
|
||||
* Only extracts `name` and `description` fields.
|
||||
*/
|
||||
export function parseSkillFrontmatter(markdown: string): {
|
||||
name?: string;
|
||||
description?: string;
|
||||
} {
|
||||
const match = /^---\r?\n([\s\S]*?)\r?\n---/m.exec(markdown);
|
||||
if (!match) return {};
|
||||
|
||||
const block = match[1] ?? "";
|
||||
const nameMatch = /^name:\s*(.+)$/m.exec(block);
|
||||
const descMatch = /^description:\s*(.+)$/m.exec(block);
|
||||
|
||||
const name = nameMatch?.[1]?.trim();
|
||||
const description = descMatch?.[1]?.trim();
|
||||
|
||||
return {
|
||||
name: name && name.length > 0 ? name : undefined,
|
||||
description: description && description.length > 0 ? description : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a path segment to a URL-safe skill slug.
|
||||
* e.g. "My Skill Name" → "my-skill-name"
|
||||
*/
|
||||
export function slugFromPath(sourcePath: string): string {
|
||||
// Take the last non-empty path segment (the directory name of the skill)
|
||||
const parts = sourcePath.split("/").filter(Boolean);
|
||||
const segment = parts[parts.length - 1] ?? sourcePath;
|
||||
return segment
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9-]+/g, "-")
|
||||
.replace(/^-+|-+$/g, "");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Core fetch helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type GitHubTreeEntry = {
|
||||
path: string;
|
||||
type: string;
|
||||
size?: number;
|
||||
};
|
||||
|
||||
type GitHubTreeResponse = {
|
||||
tree: GitHubTreeEntry[];
|
||||
};
|
||||
|
||||
type MarketplaceJson = {
|
||||
skills: Array<{ path: string }>;
|
||||
};
|
||||
|
||||
/**
|
||||
* Upsert a skill row and return its id.
|
||||
*/
|
||||
async function upsertSkill(
|
||||
db: SkillRegistryDb,
|
||||
opts: {
|
||||
skillId: string;
|
||||
sourceId: string;
|
||||
name: string;
|
||||
description: string | undefined;
|
||||
sourceUrl: string;
|
||||
},
|
||||
): Promise<void> {
|
||||
const now = Date.now();
|
||||
await db
|
||||
.insert(skills)
|
||||
.values({
|
||||
id: opts.skillId,
|
||||
sourceId: opts.sourceId,
|
||||
name: opts.name,
|
||||
description: opts.description ?? null,
|
||||
sourceUrl: opts.sourceUrl,
|
||||
activeVersionId: null,
|
||||
removedAt: null,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: skills.id,
|
||||
set: {
|
||||
name: opts.name,
|
||||
description: opts.description ?? null,
|
||||
updatedAt: now,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a version with this SHA already exists in the DB.
|
||||
* Returns true if already present (skip download).
|
||||
*/
|
||||
async function versionExists(db: SkillRegistryDb, versionId: string): Promise<boolean> {
|
||||
const existing = await db
|
||||
.select({ id: skillVersions.id })
|
||||
.from(skillVersions)
|
||||
.where(eq(skillVersions.id, versionId));
|
||||
return existing.length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache SKILL.md to disk and insert skill_versions + skill_files rows.
|
||||
*/
|
||||
async function cacheSkillVersion(
|
||||
db: SkillRegistryDb,
|
||||
opts: {
|
||||
skillId: string;
|
||||
sha: string;
|
||||
skillMdContent: string;
|
||||
skillMdUrl: string;
|
||||
},
|
||||
): Promise<void> {
|
||||
const versionId = `${opts.skillId}@${opts.sha}`;
|
||||
|
||||
// Idempotency check — skip if version already cached
|
||||
if (await versionExists(db, versionId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const cacheDir = resolveSkillCacheDir(opts.skillId, opts.sha);
|
||||
await mkdir(cacheDir, { recursive: true });
|
||||
|
||||
const skillMdPath = path.join(cacheDir, "SKILL.md");
|
||||
await writeFile(skillMdPath, opts.skillMdContent, "utf-8");
|
||||
|
||||
const now = Date.now();
|
||||
|
||||
// Insert skill_versions row
|
||||
await db.insert(skillVersions).values({
|
||||
id: versionId,
|
||||
skillId: opts.skillId,
|
||||
version: opts.sha,
|
||||
fetchedAt: now,
|
||||
cacheDir,
|
||||
});
|
||||
|
||||
// Insert skill_files row for SKILL.md
|
||||
const sizeBytes = Buffer.byteLength(opts.skillMdContent, "utf-8");
|
||||
await db.insert(skillFiles).values({
|
||||
id: crypto.randomUUID(),
|
||||
versionId,
|
||||
path: "SKILL.md",
|
||||
kind: "skill",
|
||||
sizeBytes,
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source-type handlers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function fetchAnthropicMarketplace(
|
||||
source: SkillSourceConfig,
|
||||
db: SkillRegistryDb,
|
||||
): Promise<number> {
|
||||
const marketplaceUrl = resolveRawGitHubUrl(
|
||||
source.owner,
|
||||
source.repo,
|
||||
source.ref,
|
||||
".claude-plugin/marketplace.json",
|
||||
);
|
||||
|
||||
const marketplaceText = await fetchText(marketplaceUrl);
|
||||
const marketplace: MarketplaceJson = JSON.parse(marketplaceText);
|
||||
const sha = await resolveGitHubCommitSha(source.owner, source.repo, source.ref);
|
||||
|
||||
let fetched = 0;
|
||||
|
||||
for (const entry of marketplace.skills ?? []) {
|
||||
const skillPath = entry.path;
|
||||
const slug = slugFromPath(skillPath);
|
||||
const skillId = `${source.id}/${slug}`;
|
||||
|
||||
// Idempotency check before downloading — skip if version already cached
|
||||
const versionId = `${skillId}@${sha}`;
|
||||
if (await versionExists(db, versionId)) {
|
||||
fetched++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const skillMdUrl = resolveRawGitHubUrl(source.owner, source.repo, source.ref, `${skillPath}/SKILL.md`);
|
||||
let skillMdContent: string;
|
||||
try {
|
||||
skillMdContent = await fetchText(skillMdUrl);
|
||||
} catch {
|
||||
// Skip skills that don't have a SKILL.md
|
||||
continue;
|
||||
}
|
||||
|
||||
const { name, description } = parseSkillFrontmatter(skillMdContent);
|
||||
const sourceUrl = `https://github.com/${source.owner}/${source.repo}/tree/${source.ref}/${skillPath}`;
|
||||
|
||||
await upsertSkill(db, {
|
||||
skillId,
|
||||
sourceId: source.id,
|
||||
name: name ?? slug,
|
||||
description,
|
||||
sourceUrl,
|
||||
});
|
||||
|
||||
await cacheSkillVersion(db, {
|
||||
skillId,
|
||||
sha,
|
||||
skillMdContent,
|
||||
skillMdUrl,
|
||||
});
|
||||
|
||||
fetched++;
|
||||
}
|
||||
|
||||
return fetched;
|
||||
}
|
||||
|
||||
async function fetchGitHubTree(
|
||||
source: SkillSourceConfig,
|
||||
db: SkillRegistryDb,
|
||||
): Promise<number> {
|
||||
const treeUrl = `https://api.github.com/repos/${source.owner}/${source.repo}/git/trees/${encodeURIComponent(source.ref)}?recursive=1`;
|
||||
const treeResponse = await fetchJson<GitHubTreeResponse>(treeUrl);
|
||||
|
||||
const sha = await resolveGitHubCommitSha(source.owner, source.repo, source.ref);
|
||||
|
||||
// Find all SKILL.md files
|
||||
const skillMdEntries = (treeResponse.tree ?? []).filter(
|
||||
(entry) => entry.type === "blob" && entry.path.endsWith("SKILL.md"),
|
||||
);
|
||||
|
||||
let fetched = 0;
|
||||
|
||||
for (const entry of skillMdEntries) {
|
||||
// entry.path is like "code-review/SKILL.md" — dirname is the skill dir
|
||||
const skillDir = path.posix.dirname(entry.path);
|
||||
if (!skillDir || skillDir === ".") continue;
|
||||
|
||||
const slug = slugFromPath(skillDir);
|
||||
const skillId = `${source.id}/${slug}`;
|
||||
|
||||
// Idempotency check before downloading — skip if version already cached
|
||||
const versionId = `${skillId}@${sha}`;
|
||||
if (await versionExists(db, versionId)) {
|
||||
fetched++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const skillMdUrl = resolveRawGitHubUrl(source.owner, source.repo, source.ref, entry.path);
|
||||
let skillMdContent: string;
|
||||
try {
|
||||
skillMdContent = await fetchText(skillMdUrl);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const { name, description } = parseSkillFrontmatter(skillMdContent);
|
||||
const sourceUrl = `https://github.com/${source.owner}/${source.repo}/tree/${source.ref}/${skillDir}`;
|
||||
|
||||
await upsertSkill(db, {
|
||||
skillId,
|
||||
sourceId: source.id,
|
||||
name: name ?? slug,
|
||||
description,
|
||||
sourceUrl,
|
||||
});
|
||||
|
||||
await cacheSkillVersion(db, {
|
||||
skillId,
|
||||
sha,
|
||||
skillMdContent,
|
||||
skillMdUrl,
|
||||
});
|
||||
|
||||
fetched++;
|
||||
}
|
||||
|
||||
return fetched;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type FetchAllSourcesResult = {
|
||||
fetched: number;
|
||||
errors: string[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetch skills from all configured sources and populate the registry DB.
|
||||
* Uses BUILT_IN_SOURCES if no sources are provided.
|
||||
*/
|
||||
export async function fetchAllSources(
|
||||
sources: SkillSourceConfig[] = BUILT_IN_SOURCES,
|
||||
): Promise<FetchAllSourcesResult> {
|
||||
const db = await getSkillRegistryDb();
|
||||
let fetched = 0;
|
||||
const errors: string[] = [];
|
||||
|
||||
for (const source of sources) {
|
||||
try {
|
||||
if (source.type === "anthropic-marketplace") {
|
||||
fetched += await fetchAnthropicMarketplace(source, db);
|
||||
} else if (source.type === "github-tree") {
|
||||
fetched += await fetchGitHubTree(source, db);
|
||||
} else {
|
||||
errors.push(`Unknown source type for ${source.id}`);
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
errors.push(`Source ${source.id}: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { fetched, errors };
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue