Skip to content

Commit 4c38f8d

Browse files
author
StackMemory Bot (CLI)
committed
feat(provenant): add GitHub adapter for PR/issue ingestion
New SourceAdapter for GitHub repos: - Fetches issues + PRs via REST API with pagination - Fetches comments per issue - Rate limit handling (sleep when X-RateLimit-Remaining < 10) - Signal model: closed/merged PRs decisive, decision/rfc labels boost - fromEnv() reads GITHUB_TOKEN, GITHUB_OWNER, GITHUB_REPO
1 parent 20d980a commit 4c38f8d

2 files changed

Lines changed: 367 additions & 0 deletions

File tree

Lines changed: 360 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,360 @@
1+
import { createHash } from 'node:crypto';
2+
import type { SourceAdapter, RawRecord, SignalWeight } from './adapter.js';
3+
4+
export interface GitHubAdapterConfig {
5+
token: string; // GITHUB_TOKEN
6+
owner: string; // repo owner
7+
repo: string; // repo name
8+
baseUrl?: string; // default: https://api.github.com
9+
}
10+
11+
const GITHUB_API = 'https://api.github.com';
12+
13+
// GitHub-specific confidence signals
14+
const GITHUB_SIGNALS: SignalWeight[] = [
15+
{
16+
name: 'trigger_phrase',
17+
weight: 0.3,
18+
detect: (r) => {
19+
const phrases = [
20+
'we decided',
21+
'going with',
22+
"won't do",
23+
"won't fix",
24+
'not doing',
25+
'deprioritizing',
26+
'closing',
27+
'agreed to',
28+
'moving forward with',
29+
'final call',
30+
'decision:',
31+
'resolved:',
32+
'shipping',
33+
'locking in',
34+
];
35+
const lower = r.content.toLowerCase();
36+
return phrases.some((p) => lower.includes(p));
37+
},
38+
},
39+
{
40+
name: 'trigger_phrase_multiple',
41+
weight: 0.3,
42+
detect: (r) => {
43+
const phrases = [
44+
'we decided',
45+
'going with',
46+
"won't do",
47+
"won't fix",
48+
'not doing',
49+
'deprioritizing',
50+
'closing',
51+
'agreed to',
52+
'moving forward with',
53+
'final call',
54+
'decision:',
55+
'resolved:',
56+
'shipping',
57+
'locking in',
58+
];
59+
const lower = r.content.toLowerCase();
60+
let count = 0;
61+
for (const p of phrases) {
62+
if (lower.includes(p)) count++;
63+
}
64+
return count >= 2;
65+
},
66+
},
67+
{
68+
name: 'imperative_verb',
69+
weight: 0.15,
70+
detect: (r) => {
71+
const patterns =
72+
/^(we will|we are|we're going to|let's|ship|build|remove|add|create|close|cut)\b/im;
73+
return patterns.test(r.content);
74+
},
75+
},
76+
{
77+
name: 'state_is_decisive',
78+
weight: 0.35,
79+
detect: (r) => {
80+
const meta = r.metadata as
81+
| { state?: string; merged?: boolean }
82+
| undefined;
83+
return meta?.state === 'closed' || meta?.merged === true;
84+
},
85+
},
86+
{
87+
name: 'merged_pr',
88+
weight: 0.3,
89+
detect: (r) => {
90+
const meta = r.metadata as { merged?: boolean } | undefined;
91+
return meta?.merged === true;
92+
},
93+
},
94+
{
95+
name: 'decision_label',
96+
weight: 0.25,
97+
detect: (r) => {
98+
const meta = r.metadata as { labels?: string[] } | undefined;
99+
const decisive = ['decision', 'rfc', 'accepted', 'approved'];
100+
return (meta?.labels ?? []).some((l) =>
101+
decisive.includes(l.toLowerCase())
102+
);
103+
},
104+
},
105+
{
106+
name: 'explicit_actor',
107+
weight: 0.1,
108+
detect: (r) => r.actor != null && r.actor.length > 0,
109+
},
110+
{
111+
name: 'is_comment',
112+
weight: 0.2,
113+
detect: (r) => {
114+
const meta = r.metadata as { recordType?: string } | undefined;
115+
return meta?.recordType === 'comment';
116+
},
117+
},
118+
{
119+
name: 'question_framing',
120+
weight: -0.2,
121+
detect: (r) => {
122+
const lower = r.content.toLowerCase();
123+
return (
124+
lower.includes('?') ||
125+
lower.startsWith('should we') ||
126+
lower.startsWith('what if')
127+
);
128+
},
129+
},
130+
{
131+
name: 'hedge_language',
132+
weight: -0.15,
133+
detect: (r) => {
134+
const hedges = [
135+
'maybe',
136+
'probably',
137+
'might',
138+
'could be',
139+
'not sure',
140+
'i think',
141+
'possibly',
142+
];
143+
const lower = r.content.toLowerCase();
144+
return hedges.some((h) => lower.includes(h));
145+
},
146+
},
147+
];
148+
149+
interface GitHubIssue {
150+
id: number;
151+
number: number;
152+
title: string;
153+
body: string | null;
154+
state: string;
155+
user: { login: string } | null;
156+
labels: Array<{ name: string }>;
157+
pull_request?: { merged_at: string | null };
158+
created_at: string;
159+
updated_at: string;
160+
html_url: string;
161+
}
162+
163+
interface GitHubComment {
164+
id: number;
165+
body: string;
166+
user: { login: string } | null;
167+
created_at: string;
168+
updated_at: string;
169+
}
170+
171+
export class GitHubAdapter implements SourceAdapter {
172+
system = 'github';
173+
signalModel = GITHUB_SIGNALS;
174+
175+
private readonly token: string;
176+
private readonly owner: string;
177+
private readonly repo: string;
178+
private readonly baseUrl: string;
179+
180+
constructor(config: GitHubAdapterConfig) {
181+
this.token = config.token;
182+
this.owner = config.owner;
183+
this.repo = config.repo;
184+
this.baseUrl = config.baseUrl ?? GITHUB_API;
185+
}
186+
187+
static fromEnv(): GitHubAdapter | undefined {
188+
const token = process.env['GITHUB_TOKEN'];
189+
const owner = process.env['GITHUB_OWNER'];
190+
const repo = process.env['GITHUB_REPO'];
191+
if (!token || !owner || !repo) return undefined;
192+
return new GitHubAdapter({ token, owner, repo });
193+
}
194+
195+
hashRecord(record: RawRecord): string {
196+
return createHash('sha256').update(record.raw_payload).digest('hex');
197+
}
198+
199+
async fetch(since: Date): Promise<RawRecord[]> {
200+
const records: RawRecord[] = [];
201+
const sinceISO = since.toISOString();
202+
203+
// Fetch issues (includes PRs) with pagination
204+
let page = 1;
205+
let hasMore = true;
206+
207+
while (hasMore) {
208+
const url = `${this.baseUrl}/repos/${this.owner}/${this.repo}/issues?since=${sinceISO}&state=all&per_page=100&page=${page}`;
209+
const issues = await this.api<GitHubIssue[]>(url);
210+
211+
if (issues.length === 0) {
212+
hasMore = false;
213+
break;
214+
}
215+
216+
for (const issue of issues) {
217+
const isPR = !!issue.pull_request;
218+
const merged = issue.pull_request?.merged_at != null;
219+
220+
// Issue/PR as a record
221+
const content = this.formatIssueContent(issue);
222+
records.push({
223+
external_id: `github-issue-${issue.id}`,
224+
content,
225+
raw_payload: JSON.stringify(issue),
226+
actor: issue.user?.login,
227+
created_at: new Date(issue.created_at).getTime(),
228+
metadata: {
229+
recordType: isPR ? 'pull_request' : 'issue',
230+
number: issue.number,
231+
state: issue.state,
232+
merged,
233+
labels: issue.labels.map((l) => l.name),
234+
url: issue.html_url,
235+
},
236+
});
237+
238+
// Fetch comments for this issue/PR
239+
const comments = await this.fetchComments(issue.number);
240+
for (const comment of comments) {
241+
records.push({
242+
external_id: `github-comment-${comment.id}`,
243+
content: comment.body,
244+
raw_payload: JSON.stringify({
245+
...comment,
246+
issueNumber: issue.number,
247+
issueTitle: issue.title,
248+
}),
249+
actor: comment.user?.login,
250+
created_at: new Date(comment.created_at).getTime(),
251+
metadata: {
252+
recordType: 'comment',
253+
issueNumber: issue.number,
254+
issueTitle: issue.title,
255+
state: issue.state,
256+
},
257+
});
258+
}
259+
}
260+
261+
if (issues.length < 100) {
262+
hasMore = false;
263+
} else {
264+
page++;
265+
}
266+
}
267+
268+
return records;
269+
}
270+
271+
private async fetchComments(issueNumber: number): Promise<GitHubComment[]> {
272+
const all: GitHubComment[] = [];
273+
let page = 1;
274+
let hasMore = true;
275+
276+
while (hasMore) {
277+
const url = `${this.baseUrl}/repos/${this.owner}/${this.repo}/issues/${issueNumber}/comments?per_page=100&page=${page}`;
278+
const comments = await this.api<GitHubComment[]>(url);
279+
280+
all.push(...comments);
281+
282+
if (comments.length < 100) {
283+
hasMore = false;
284+
} else {
285+
page++;
286+
}
287+
}
288+
289+
return all;
290+
}
291+
292+
private formatIssueContent(issue: GitHubIssue): string {
293+
const isPR = !!issue.pull_request;
294+
const type = isPR ? 'PR' : 'Issue';
295+
const parts = [`[${type} #${issue.number}] ${issue.title}`];
296+
if (issue.body) {
297+
parts.push(issue.body);
298+
}
299+
parts.push(`State: ${issue.state}`);
300+
if (issue.pull_request?.merged_at) {
301+
parts.push('Merged: yes');
302+
}
303+
if (issue.labels.length > 0) {
304+
parts.push(`Labels: ${issue.labels.map((l) => l.name).join(', ')}`);
305+
}
306+
return parts.join('\n');
307+
}
308+
309+
private async api<T>(url: string): Promise<T> {
310+
const maxRetries = 3;
311+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
312+
const response = await fetch(url, {
313+
headers: {
314+
Accept: 'application/vnd.github.v3+json',
315+
Authorization: `Bearer ${this.token}`,
316+
'X-GitHub-Api-Version': '2022-11-28',
317+
},
318+
});
319+
320+
// Rate limit check
321+
const remaining = response.headers.get('X-RateLimit-Remaining');
322+
if (remaining && parseInt(remaining, 10) < 10) {
323+
const resetHeader = response.headers.get('X-RateLimit-Reset');
324+
const resetMs = resetHeader
325+
? parseInt(resetHeader, 10) * 1000 - Date.now()
326+
: 60_000;
327+
const waitMs = Math.max(1000, Math.min(resetMs, 120_000));
328+
console.warn(
329+
`[provenant] GitHub rate limit low (${remaining} remaining), waiting ${Math.round(waitMs / 1000)}s...`
330+
);
331+
await sleep(waitMs);
332+
}
333+
334+
if (response.status === 429) {
335+
if (attempt >= maxRetries) {
336+
throw new Error('GitHub API rate limited after max retries');
337+
}
338+
const retryAfter = response.headers.get('retry-after');
339+
const waitMs = retryAfter
340+
? parseInt(retryAfter, 10) * 1000
341+
: (attempt + 1) * 5000;
342+
console.warn(`[provenant] GitHub rate limited, waiting ${waitMs}ms...`);
343+
await sleep(waitMs);
344+
continue;
345+
}
346+
347+
if (!response.ok) {
348+
const body = await response.text();
349+
throw new Error(`GitHub API error ${response.status}: ${body}`);
350+
}
351+
352+
return response.json() as Promise<T>;
353+
}
354+
throw new Error('GitHub API request failed after retries');
355+
}
356+
}
357+
358+
function sleep(ms: number): Promise<void> {
359+
return new Promise((resolve) => setTimeout(resolve, ms));
360+
}

packages/provenant/src/cli/registry.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import type { SourceAdapter } from '../adapters/adapter.js';
22
import { ManualAdapter } from '../adapters/manual.js';
33
import { LinearAdapter } from '../adapters/linear.js';
44
import { SlackAdapter } from '../adapters/slack.js';
5+
import { GitHubAdapter } from '../adapters/github.js';
56

67
const adapters = new Map<string, SourceAdapter>();
78

@@ -20,6 +21,12 @@ if (slack) {
2021
adapters.set('slack', slack);
2122
}
2223

24+
// Register GitHub if token and repo are available
25+
const github = GitHubAdapter.fromEnv();
26+
if (github) {
27+
adapters.set('github', github);
28+
}
29+
2330
export function registerAdapter(adapter: SourceAdapter): void {
2431
adapters.set(adapter.system, adapter);
2532
}

0 commit comments

Comments
 (0)