|
| 1 | +import { createHash } from 'node:crypto'; |
| 2 | +import type { SourceAdapter, RawRecord, SignalWeight } from './adapter.js'; |
| 3 | + |
| 4 | +export interface GitHubAdapterConfig { |
| 5 | + token: string; // GITHUB_TOKEN |
| 6 | + owner: string; // repo owner |
| 7 | + repo: string; // repo name |
| 8 | + baseUrl?: string; // default: https://api.github.com |
| 9 | +} |
| 10 | + |
| 11 | +const GITHUB_API = 'https://api.github.com'; |
| 12 | + |
| 13 | +// GitHub-specific confidence signals |
| 14 | +const GITHUB_SIGNALS: SignalWeight[] = [ |
| 15 | + { |
| 16 | + name: 'trigger_phrase', |
| 17 | + weight: 0.3, |
| 18 | + detect: (r) => { |
| 19 | + const phrases = [ |
| 20 | + 'we decided', |
| 21 | + 'going with', |
| 22 | + "won't do", |
| 23 | + "won't fix", |
| 24 | + 'not doing', |
| 25 | + 'deprioritizing', |
| 26 | + 'closing', |
| 27 | + 'agreed to', |
| 28 | + 'moving forward with', |
| 29 | + 'final call', |
| 30 | + 'decision:', |
| 31 | + 'resolved:', |
| 32 | + 'shipping', |
| 33 | + 'locking in', |
| 34 | + ]; |
| 35 | + const lower = r.content.toLowerCase(); |
| 36 | + return phrases.some((p) => lower.includes(p)); |
| 37 | + }, |
| 38 | + }, |
| 39 | + { |
| 40 | + name: 'trigger_phrase_multiple', |
| 41 | + weight: 0.3, |
| 42 | + detect: (r) => { |
| 43 | + const phrases = [ |
| 44 | + 'we decided', |
| 45 | + 'going with', |
| 46 | + "won't do", |
| 47 | + "won't fix", |
| 48 | + 'not doing', |
| 49 | + 'deprioritizing', |
| 50 | + 'closing', |
| 51 | + 'agreed to', |
| 52 | + 'moving forward with', |
| 53 | + 'final call', |
| 54 | + 'decision:', |
| 55 | + 'resolved:', |
| 56 | + 'shipping', |
| 57 | + 'locking in', |
| 58 | + ]; |
| 59 | + const lower = r.content.toLowerCase(); |
| 60 | + let count = 0; |
| 61 | + for (const p of phrases) { |
| 62 | + if (lower.includes(p)) count++; |
| 63 | + } |
| 64 | + return count >= 2; |
| 65 | + }, |
| 66 | + }, |
| 67 | + { |
| 68 | + name: 'imperative_verb', |
| 69 | + weight: 0.15, |
| 70 | + detect: (r) => { |
| 71 | + const patterns = |
| 72 | + /^(we will|we are|we're going to|let's|ship|build|remove|add|create|close|cut)\b/im; |
| 73 | + return patterns.test(r.content); |
| 74 | + }, |
| 75 | + }, |
| 76 | + { |
| 77 | + name: 'state_is_decisive', |
| 78 | + weight: 0.35, |
| 79 | + detect: (r) => { |
| 80 | + const meta = r.metadata as |
| 81 | + | { state?: string; merged?: boolean } |
| 82 | + | undefined; |
| 83 | + return meta?.state === 'closed' || meta?.merged === true; |
| 84 | + }, |
| 85 | + }, |
| 86 | + { |
| 87 | + name: 'merged_pr', |
| 88 | + weight: 0.3, |
| 89 | + detect: (r) => { |
| 90 | + const meta = r.metadata as { merged?: boolean } | undefined; |
| 91 | + return meta?.merged === true; |
| 92 | + }, |
| 93 | + }, |
| 94 | + { |
| 95 | + name: 'decision_label', |
| 96 | + weight: 0.25, |
| 97 | + detect: (r) => { |
| 98 | + const meta = r.metadata as { labels?: string[] } | undefined; |
| 99 | + const decisive = ['decision', 'rfc', 'accepted', 'approved']; |
| 100 | + return (meta?.labels ?? []).some((l) => |
| 101 | + decisive.includes(l.toLowerCase()) |
| 102 | + ); |
| 103 | + }, |
| 104 | + }, |
| 105 | + { |
| 106 | + name: 'explicit_actor', |
| 107 | + weight: 0.1, |
| 108 | + detect: (r) => r.actor != null && r.actor.length > 0, |
| 109 | + }, |
| 110 | + { |
| 111 | + name: 'is_comment', |
| 112 | + weight: 0.2, |
| 113 | + detect: (r) => { |
| 114 | + const meta = r.metadata as { recordType?: string } | undefined; |
| 115 | + return meta?.recordType === 'comment'; |
| 116 | + }, |
| 117 | + }, |
| 118 | + { |
| 119 | + name: 'question_framing', |
| 120 | + weight: -0.2, |
| 121 | + detect: (r) => { |
| 122 | + const lower = r.content.toLowerCase(); |
| 123 | + return ( |
| 124 | + lower.includes('?') || |
| 125 | + lower.startsWith('should we') || |
| 126 | + lower.startsWith('what if') |
| 127 | + ); |
| 128 | + }, |
| 129 | + }, |
| 130 | + { |
| 131 | + name: 'hedge_language', |
| 132 | + weight: -0.15, |
| 133 | + detect: (r) => { |
| 134 | + const hedges = [ |
| 135 | + 'maybe', |
| 136 | + 'probably', |
| 137 | + 'might', |
| 138 | + 'could be', |
| 139 | + 'not sure', |
| 140 | + 'i think', |
| 141 | + 'possibly', |
| 142 | + ]; |
| 143 | + const lower = r.content.toLowerCase(); |
| 144 | + return hedges.some((h) => lower.includes(h)); |
| 145 | + }, |
| 146 | + }, |
| 147 | +]; |
| 148 | + |
| 149 | +interface GitHubIssue { |
| 150 | + id: number; |
| 151 | + number: number; |
| 152 | + title: string; |
| 153 | + body: string | null; |
| 154 | + state: string; |
| 155 | + user: { login: string } | null; |
| 156 | + labels: Array<{ name: string }>; |
| 157 | + pull_request?: { merged_at: string | null }; |
| 158 | + created_at: string; |
| 159 | + updated_at: string; |
| 160 | + html_url: string; |
| 161 | +} |
| 162 | + |
| 163 | +interface GitHubComment { |
| 164 | + id: number; |
| 165 | + body: string; |
| 166 | + user: { login: string } | null; |
| 167 | + created_at: string; |
| 168 | + updated_at: string; |
| 169 | +} |
| 170 | + |
| 171 | +export class GitHubAdapter implements SourceAdapter { |
| 172 | + system = 'github'; |
| 173 | + signalModel = GITHUB_SIGNALS; |
| 174 | + |
| 175 | + private readonly token: string; |
| 176 | + private readonly owner: string; |
| 177 | + private readonly repo: string; |
| 178 | + private readonly baseUrl: string; |
| 179 | + |
| 180 | + constructor(config: GitHubAdapterConfig) { |
| 181 | + this.token = config.token; |
| 182 | + this.owner = config.owner; |
| 183 | + this.repo = config.repo; |
| 184 | + this.baseUrl = config.baseUrl ?? GITHUB_API; |
| 185 | + } |
| 186 | + |
| 187 | + static fromEnv(): GitHubAdapter | undefined { |
| 188 | + const token = process.env['GITHUB_TOKEN']; |
| 189 | + const owner = process.env['GITHUB_OWNER']; |
| 190 | + const repo = process.env['GITHUB_REPO']; |
| 191 | + if (!token || !owner || !repo) return undefined; |
| 192 | + return new GitHubAdapter({ token, owner, repo }); |
| 193 | + } |
| 194 | + |
| 195 | + hashRecord(record: RawRecord): string { |
| 196 | + return createHash('sha256').update(record.raw_payload).digest('hex'); |
| 197 | + } |
| 198 | + |
| 199 | + async fetch(since: Date): Promise<RawRecord[]> { |
| 200 | + const records: RawRecord[] = []; |
| 201 | + const sinceISO = since.toISOString(); |
| 202 | + |
| 203 | + // Fetch issues (includes PRs) with pagination |
| 204 | + let page = 1; |
| 205 | + let hasMore = true; |
| 206 | + |
| 207 | + while (hasMore) { |
| 208 | + const url = `${this.baseUrl}/repos/${this.owner}/${this.repo}/issues?since=${sinceISO}&state=all&per_page=100&page=${page}`; |
| 209 | + const issues = await this.api<GitHubIssue[]>(url); |
| 210 | + |
| 211 | + if (issues.length === 0) { |
| 212 | + hasMore = false; |
| 213 | + break; |
| 214 | + } |
| 215 | + |
| 216 | + for (const issue of issues) { |
| 217 | + const isPR = !!issue.pull_request; |
| 218 | + const merged = issue.pull_request?.merged_at != null; |
| 219 | + |
| 220 | + // Issue/PR as a record |
| 221 | + const content = this.formatIssueContent(issue); |
| 222 | + records.push({ |
| 223 | + external_id: `github-issue-${issue.id}`, |
| 224 | + content, |
| 225 | + raw_payload: JSON.stringify(issue), |
| 226 | + actor: issue.user?.login, |
| 227 | + created_at: new Date(issue.created_at).getTime(), |
| 228 | + metadata: { |
| 229 | + recordType: isPR ? 'pull_request' : 'issue', |
| 230 | + number: issue.number, |
| 231 | + state: issue.state, |
| 232 | + merged, |
| 233 | + labels: issue.labels.map((l) => l.name), |
| 234 | + url: issue.html_url, |
| 235 | + }, |
| 236 | + }); |
| 237 | + |
| 238 | + // Fetch comments for this issue/PR |
| 239 | + const comments = await this.fetchComments(issue.number); |
| 240 | + for (const comment of comments) { |
| 241 | + records.push({ |
| 242 | + external_id: `github-comment-${comment.id}`, |
| 243 | + content: comment.body, |
| 244 | + raw_payload: JSON.stringify({ |
| 245 | + ...comment, |
| 246 | + issueNumber: issue.number, |
| 247 | + issueTitle: issue.title, |
| 248 | + }), |
| 249 | + actor: comment.user?.login, |
| 250 | + created_at: new Date(comment.created_at).getTime(), |
| 251 | + metadata: { |
| 252 | + recordType: 'comment', |
| 253 | + issueNumber: issue.number, |
| 254 | + issueTitle: issue.title, |
| 255 | + state: issue.state, |
| 256 | + }, |
| 257 | + }); |
| 258 | + } |
| 259 | + } |
| 260 | + |
| 261 | + if (issues.length < 100) { |
| 262 | + hasMore = false; |
| 263 | + } else { |
| 264 | + page++; |
| 265 | + } |
| 266 | + } |
| 267 | + |
| 268 | + return records; |
| 269 | + } |
| 270 | + |
| 271 | + private async fetchComments(issueNumber: number): Promise<GitHubComment[]> { |
| 272 | + const all: GitHubComment[] = []; |
| 273 | + let page = 1; |
| 274 | + let hasMore = true; |
| 275 | + |
| 276 | + while (hasMore) { |
| 277 | + const url = `${this.baseUrl}/repos/${this.owner}/${this.repo}/issues/${issueNumber}/comments?per_page=100&page=${page}`; |
| 278 | + const comments = await this.api<GitHubComment[]>(url); |
| 279 | + |
| 280 | + all.push(...comments); |
| 281 | + |
| 282 | + if (comments.length < 100) { |
| 283 | + hasMore = false; |
| 284 | + } else { |
| 285 | + page++; |
| 286 | + } |
| 287 | + } |
| 288 | + |
| 289 | + return all; |
| 290 | + } |
| 291 | + |
| 292 | + private formatIssueContent(issue: GitHubIssue): string { |
| 293 | + const isPR = !!issue.pull_request; |
| 294 | + const type = isPR ? 'PR' : 'Issue'; |
| 295 | + const parts = [`[${type} #${issue.number}] ${issue.title}`]; |
| 296 | + if (issue.body) { |
| 297 | + parts.push(issue.body); |
| 298 | + } |
| 299 | + parts.push(`State: ${issue.state}`); |
| 300 | + if (issue.pull_request?.merged_at) { |
| 301 | + parts.push('Merged: yes'); |
| 302 | + } |
| 303 | + if (issue.labels.length > 0) { |
| 304 | + parts.push(`Labels: ${issue.labels.map((l) => l.name).join(', ')}`); |
| 305 | + } |
| 306 | + return parts.join('\n'); |
| 307 | + } |
| 308 | + |
| 309 | + private async api<T>(url: string): Promise<T> { |
| 310 | + const maxRetries = 3; |
| 311 | + for (let attempt = 0; attempt <= maxRetries; attempt++) { |
| 312 | + const response = await fetch(url, { |
| 313 | + headers: { |
| 314 | + Accept: 'application/vnd.github.v3+json', |
| 315 | + Authorization: `Bearer ${this.token}`, |
| 316 | + 'X-GitHub-Api-Version': '2022-11-28', |
| 317 | + }, |
| 318 | + }); |
| 319 | + |
| 320 | + // Rate limit check |
| 321 | + const remaining = response.headers.get('X-RateLimit-Remaining'); |
| 322 | + if (remaining && parseInt(remaining, 10) < 10) { |
| 323 | + const resetHeader = response.headers.get('X-RateLimit-Reset'); |
| 324 | + const resetMs = resetHeader |
| 325 | + ? parseInt(resetHeader, 10) * 1000 - Date.now() |
| 326 | + : 60_000; |
| 327 | + const waitMs = Math.max(1000, Math.min(resetMs, 120_000)); |
| 328 | + console.warn( |
| 329 | + `[provenant] GitHub rate limit low (${remaining} remaining), waiting ${Math.round(waitMs / 1000)}s...` |
| 330 | + ); |
| 331 | + await sleep(waitMs); |
| 332 | + } |
| 333 | + |
| 334 | + if (response.status === 429) { |
| 335 | + if (attempt >= maxRetries) { |
| 336 | + throw new Error('GitHub API rate limited after max retries'); |
| 337 | + } |
| 338 | + const retryAfter = response.headers.get('retry-after'); |
| 339 | + const waitMs = retryAfter |
| 340 | + ? parseInt(retryAfter, 10) * 1000 |
| 341 | + : (attempt + 1) * 5000; |
| 342 | + console.warn(`[provenant] GitHub rate limited, waiting ${waitMs}ms...`); |
| 343 | + await sleep(waitMs); |
| 344 | + continue; |
| 345 | + } |
| 346 | + |
| 347 | + if (!response.ok) { |
| 348 | + const body = await response.text(); |
| 349 | + throw new Error(`GitHub API error ${response.status}: ${body}`); |
| 350 | + } |
| 351 | + |
| 352 | + return response.json() as Promise<T>; |
| 353 | + } |
| 354 | + throw new Error('GitHub API request failed after retries'); |
| 355 | + } |
| 356 | +} |
| 357 | + |
| 358 | +function sleep(ms: number): Promise<void> { |
| 359 | + return new Promise((resolve) => setTimeout(resolve, ms)); |
| 360 | +} |
0 commit comments