From a3537a86e38fb31fbea1be569ec263149d37d332 Mon Sep 17 00:00:00 2001 From: dotta Date: Thu, 26 Mar 2026 18:22:15 -0500 Subject: [PATCH] Add filtered Paperclip commit exports Co-Authored-By: Paperclip --- scripts/paperclip-commit-metrics.ts | 162 +++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 1 deletion(-) diff --git a/scripts/paperclip-commit-metrics.ts b/scripts/paperclip-commit-metrics.ts index e23cff2f..5f989ca1 100644 --- a/scripts/paperclip-commit-metrics.ts +++ b/scripts/paperclip-commit-metrics.ts @@ -21,8 +21,11 @@ const PAPERCLIP_NAME = "paperclip"; interface CliOptions { cacheFile: string; end: Date; + excludeOwners: string[]; + exportFormat: "csv" | "json"; includePrivate: boolean; json: boolean; + output: string | null; query: string; refreshSearch: boolean; refreshStats: boolean; @@ -130,6 +133,9 @@ interface Summary { searchField: CliOptions["searchField"]; start: string; }; + filters: { + excludedOwners: string[]; + }; repos: { count: number; sample: string[]; @@ -159,7 +165,13 @@ async function main() { cache.updatedAt = new Date().toISOString(); await saveCache(options.cacheFile, cache); - const summary = buildSummary(cache, options, sortedShas, fetchedThisRun); + const filteredShas = sortFilteredShas(cache, filterShas(cache, sortedShas, options)); + const summary = buildSummary(cache, options, filteredShas, fetchedThisRun); + + if (options.output) { + await writeExport(options.output, options.exportFormat, cache, filteredShas, summary); + } + if (options.json) { console.log(JSON.stringify(summary, null, 2)); return; @@ -172,8 +184,11 @@ function parseArgs(argv: string[]): CliOptions { const options: CliOptions = { cacheFile: DEFAULT_CACHE_FILE, end: new Date(), + excludeOwners: [], + exportFormat: "csv", includePrivate: false, json: false, + output: null, query: DEFAULT_QUERY, refreshSearch: false, refreshStats: false, @@ -193,12 +208,26 @@ function parseArgs(argv: string[]): CliOptions { case "--end": options.end = parseDateArg(requireValue(argv, ++index, arg), arg); break; + case "--exclude-owner": + options.excludeOwners.push(requireValue(argv, ++index, arg).toLowerCase()); + break; + case "--export-format": { + const value = requireValue(argv, ++index, arg); + if (value !== "csv" && value !== "json") { + throw new Error(`Invalid --export-format value: ${value}`); + } + options.exportFormat = value; + break; + } case "--include-private": options.includePrivate = true; break; case "--json": options.json = true; break; + case "--output": + options.output = requireValue(argv, ++index, arg); + break; case "--query": options.query = requireValue(argv, ++index, arg); break; @@ -288,10 +317,13 @@ Options: --query Commit search string (default: ${DEFAULT_QUERY}) --search-field author-date | committer-date (default: ${DEFAULT_SEARCH_FIELD}) --include-private Include repos visible to the current token + --exclude-owner Exclude repositories owned by this GitHub owner/org (repeatable) --cache-file Cache path (default: ${DEFAULT_CACHE_FILE}) --skip-stats Skip additions/deletions enrichment --stats-fetch-limit Max uncached commit stats to fetch this run (default: ${DEFAULT_STATS_FETCH_LIMIT}) --stats-concurrency Parallel commit stat requests (default: ${DEFAULT_STATS_CONCURRENCY}) + --output Write the full filtered result set to a file + --export-format csv | json for --output exports (default: csv) --refresh-search Ignore cached search windows --refresh-stats Re-fetch cached commit stats --json Print JSON summary @@ -443,6 +475,39 @@ function buildSearchQuery(options: CliOptions, start: Date, end: Date): string { return `${options.query} ${qualifiers.join(" ")}`.trim(); } +function filterShas(cache: CacheFile, shas: string[], options: CliOptions): string[] { + if (options.excludeOwners.length === 0) { + return shas; + } + + const excludedOwners = new Set(options.excludeOwners); + return shas.filter((sha) => { + const commit = cache.commits[sha]; + if (!commit) { + return false; + } + return !excludedOwners.has(getRepoOwner(commit.repositoryFullName)); + }); +} + +function sortFilteredShas(cache: CacheFile, shas: string[]): string[] { + return [...shas].sort((leftSha, rightSha) => { + const left = cache.commits[leftSha]; + const right = cache.commits[rightSha]; + const leftTime = left?.committedAt ? Date.parse(left.committedAt) : 0; + const rightTime = right?.committedAt ? Date.parse(right.committedAt) : 0; + if (rightTime !== leftTime) { + return rightTime - leftTime; + } + + const repoCompare = (left?.repositoryFullName ?? "").localeCompare(right?.repositoryFullName ?? ""); + if (repoCompare !== 0) { + return repoCompare; + } + return leftSha.localeCompare(rightSha); + }); +} + function formatQueryDate(value: Date): string { return value.toISOString().replace(".000Z", "Z"); } @@ -521,6 +586,10 @@ function normalizeOptional(value: string | null | undefined): string | null { return trimmed ? trimmed : null; } +function getRepoOwner(repositoryFullName: string): string { + return repositoryFullName.split("/", 1)[0]?.toLowerCase() ?? ""; +} + async function enrichCommitStats( client: GitHubClient, cache: CacheFile, @@ -617,6 +686,9 @@ function buildSummary(cache: CacheFile, options: CliOptions, shas: string[], fet searchField: options.searchField, start: options.start.toISOString(), }, + filters: { + excludedOwners: [...options.excludeOwners].sort(), + }, repos: { count: repoNames.size, sample: repoSample, @@ -635,6 +707,9 @@ function printSummary(summary: Summary) { console.log("Paperclip commit metrics"); console.log(`Query: ${summary.detectedQuery}`); console.log(`Range: ${summary.range.start} -> ${summary.range.end} (${summary.range.searchField})`); + if (summary.filters.excludedOwners.length > 0) { + console.log(`Excluded owners: ${summary.filters.excludedOwners.join(", ")}`); + } console.log(`Commits: ${summary.totals.commits}`); console.log(`Distinct repos: ${summary.repos.count}`); console.log(`Distinct contributors: ${summary.contributors.count}`); @@ -660,6 +735,91 @@ function printSummary(summary: Summary) { } } +async function writeExport( + outputPath: string, + format: CliOptions["exportFormat"], + cache: CacheFile, + shas: string[], + summary: Summary, +): Promise { + await fs.mkdir(path.dirname(outputPath), { recursive: true }); + if (format === "json") { + const report = { + summary, + commits: shas.map((sha) => buildExportRow(cache, sha)), + }; + await fs.writeFile(outputPath, JSON.stringify(report, null, 2), "utf8"); + return; + } + + const header = [ + "committedAt", + "repository", + "repositoryUrl", + "sha", + "commitUrl", + "authorLogin", + "authorName", + "authorEmail", + "contributors", + "additions", + "deletions", + "totalChanges", + ]; + const rows = [header.join(",")]; + for (const sha of shas) { + const row = buildExportRow(cache, sha); + rows.push( + [ + row.committedAt, + row.repository, + row.repositoryUrl, + row.sha, + row.commitUrl, + row.authorLogin, + row.authorName, + row.authorEmail, + row.contributors, + String(row.additions), + String(row.deletions), + String(row.totalChanges), + ] + .map(escapeCsv) + .join(","), + ); + } + await fs.writeFile(outputPath, `${rows.join("\n")}\n`, "utf8"); +} + +function buildExportRow(cache: CacheFile, sha: string) { + const commit = cache.commits[sha]; + if (!commit) { + throw new Error(`Missing cached commit for sha ${sha}`); + } + const stats = cache.stats[sha]; + return { + additions: stats?.additions ?? 0, + authorEmail: commit.authorEmail ?? "", + authorLogin: commit.authorLogin ?? "", + authorName: commit.authorName ?? "", + commitUrl: commit.htmlUrl, + committedAt: commit.committedAt ?? "", + contributors: commit.contributors.map((contributor) => contributor.login ?? contributor.displayName).join(" | "), + deletions: stats?.deletions ?? 0, + repository: commit.repositoryFullName, + repositoryUrl: commit.repositoryUrl, + sha: commit.sha, + totalChanges: stats?.total ?? 0, + }; +} + +function escapeCsv(value: string): string { + if (value.includes(",") || value.includes("\"") || value.includes("\n")) { + return `"${value.replaceAll("\"", "\"\"")}"`; + } + return value; +} + function makeWindowKey(start: Date, end: Date): string { return `${start.toISOString()}..${end.toISOString()}`; }