Skip to content

Commit 5c06cf3

Browse files
sjarmakclaude
andcommitted
feat: US-003 - Create sg-benchmarks mirrors for missing repos
Created 7 sg-benchmarks mirror repos on GitHub using orphan-commit approach: - kubernetes-client-go (v12.0.0), kubernetes-api (v0.35.1) - expressjs-express (v5.2.1), lodash (4.17.21) - prisma-prisma (7.4.1), grafana-loki (v3.6.6), grafana-mimir (mimir-3.0.3) All repos public, pinned to stable releases, SHAs recorded in configs/sg_mirror_revisions.json. SG indexing pending (auto-crawl). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c56aceb commit 5c06cf3

File tree

3 files changed

+183
-0
lines changed

3 files changed

+183
-0
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"_description": "Tracks sg-benchmarks mirror repos: original source, pinned tag, original commit SHA, and mirror commit SHA. Used by repo-set fixtures to reference exact revisions.",
3+
"_created": "2026-02-20",
4+
"mirrors": [
5+
{
6+
"mirror_repo": "sg-benchmarks/kubernetes-client-go",
7+
"source_repo": "kubernetes/client-go",
8+
"tag": "v12.0.0",
9+
"original_sha": "78d2af792babf2dd937ba2e2a8d99c753a5eda89",
10+
"mirror_sha": "8020fc4fcf89965904a5f43689f169d6e01d1e80",
11+
"indexed": false,
12+
"created_date": "2026-02-20"
13+
},
14+
{
15+
"mirror_repo": "sg-benchmarks/kubernetes-api",
16+
"source_repo": "kubernetes/api",
17+
"tag": "v0.35.1",
18+
"original_sha": "4aa217d625944fe92a602d2ed6d3d28cc2748f0c",
19+
"mirror_sha": "fa23dd302759dbb681c1a41f09d24190a38c1d58",
20+
"indexed": false,
21+
"created_date": "2026-02-20"
22+
},
23+
{
24+
"mirror_repo": "sg-benchmarks/expressjs-express",
25+
"source_repo": "expressjs/express",
26+
"tag": "v5.2.1",
27+
"original_sha": "dbac741a49a5a64336b70c06e85c2e2706e36336",
28+
"mirror_sha": "9de5890d0dc6128b7f7eb15469d76aa60dacc48f",
29+
"indexed": false,
30+
"created_date": "2026-02-20",
31+
"notes": "Clean-name mirror. Legacy mirror sg-benchmarks/expressjs--express--815f7993 also exists and is indexed."
32+
},
33+
{
34+
"mirror_repo": "sg-benchmarks/lodash",
35+
"source_repo": "lodash/lodash",
36+
"tag": "4.17.21",
37+
"original_sha": "f299b52f39486275a9e6483b60a410e06520c538",
38+
"mirror_sha": "1dd1ecfd7875372efa8dde5dede50f6d2d323703",
39+
"indexed": false,
40+
"created_date": "2026-02-20"
41+
},
42+
{
43+
"mirror_repo": "sg-benchmarks/prisma-prisma",
44+
"source_repo": "prisma/prisma",
45+
"tag": "7.4.1",
46+
"original_sha": "533e22aa500fa87d6ac254b28f54a4446a1616c1",
47+
"mirror_sha": "20117d718fb0db9c8a586276e2052f5b130f994b",
48+
"indexed": false,
49+
"created_date": "2026-02-20"
50+
},
51+
{
52+
"mirror_repo": "sg-benchmarks/grafana-loki",
53+
"source_repo": "grafana/loki",
54+
"tag": "v3.6.6",
55+
"original_sha": "fb202465c6e9fdda198e06d6588de8381ded79e7",
56+
"mirror_sha": "a3af38d4da899032d3ee46a30932d072d37e1b9c",
57+
"indexed": false,
58+
"created_date": "2026-02-20"
59+
},
60+
{
61+
"mirror_repo": "sg-benchmarks/grafana-mimir",
62+
"source_repo": "grafana/mimir",
63+
"tag": "mimir-3.0.3",
64+
"original_sha": "0b00ebf5d3766c766d319a8c23cd165f5c60a5e5",
65+
"mirror_sha": "cfaa8c9a705a2417822a5c6224a9fc3128c416c2",
66+
"indexed": false,
67+
"created_date": "2026-02-20"
68+
}
69+
]
70+
}

ralph-mcp-unique/progress.txt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,29 @@
5757
- sourcegraph_mirror is the critical field linking to sg-benchmarks mirrors
5858
- access_mode on each repo entry is the authoritative source; local_checkout_repos/mcp_only_repos are convenience arrays
5959
---
60+
[2026-02-20 19:50:26 UTC] Iteration 1 no story markers found
61+
[2026-02-20 19:50:26 UTC] Iteration 1 complete
62+
[2026-02-20 19:50:28 UTC] Iteration 2 started
63+
64+
## 2026-02-20 - US-003: Create sg-benchmarks mirrors for missing repos
65+
- Created 7 sg-benchmarks mirror repos on GitHub using orphan-commit approach:
66+
- sg-benchmarks/kubernetes-client-go (kubernetes/client-go @ v12.0.0)
67+
- sg-benchmarks/kubernetes-api (kubernetes/api @ v0.35.1)
68+
- sg-benchmarks/expressjs-express (expressjs/express @ v5.2.1)
69+
- sg-benchmarks/lodash (lodash/lodash @ 4.17.21)
70+
- sg-benchmarks/prisma-prisma (prisma/prisma @ 7.4.1)
71+
- sg-benchmarks/grafana-loki (grafana/loki @ v3.6.6)
72+
- sg-benchmarks/grafana-mimir (grafana/mimir @ mimir-3.0.3)
73+
- All repos public, main branch, orphan commit (no history, minimal push size)
74+
- Created `ralph-mcp-unique/configs/sg_mirror_revisions.json` tracking all SHAs
75+
- Created `ralph-mcp-unique/scripts/verify_sg_indexing.py` to check SG indexing status
76+
- Legacy express mirror `expressjs--express--815f7993` also exists (indexed)
77+
- **PENDING**: Sourcegraph indexing — repos created but not yet indexed. SG auto-crawls sg-benchmarks org; indexing expected within hours. Run verify_sg_indexing.py to check.
78+
- Files changed: `ralph-mcp-unique/configs/sg_mirror_revisions.json` (new), `ralph-mcp-unique/scripts/verify_sg_indexing.py` (new)
79+
- **Learnings for future iterations:**
80+
- `gh api repos/sg-benchmarks/<name>` returns 404 for newly created repos; cannot use as existence check before the API propagates
81+
- Orphan-commit approach works for all repo sizes (even 500MB+ repos like loki/mimir push fine)
82+
- Clean naming convention (org-repo) preferred over SWE-bench hash-suffix convention (org--repo--hash)
83+
- SG indexing delay is the bottleneck — create mirrors early, verify later
84+
- `.gitignore` has `ralph-*/` pattern; must use `git add -f` for new files under ralph-mcp-unique/
85+
---
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#!/usr/bin/env python3
2+
"""Verify that sg-benchmarks mirrors are indexed in Sourcegraph.
3+
4+
Run periodically after creating mirrors until all are indexed.
5+
Uses the Sourcegraph GraphQL API via the `src` CLI or direct HTTP.
6+
7+
Usage:
8+
python3 scripts/verify_sg_indexing.py [--update]
9+
10+
--update: Update configs/sg_mirror_revisions.json indexed field
11+
"""
12+
import json
13+
import subprocess
14+
import sys
15+
from pathlib import Path
16+
17+
REVISIONS_FILE = Path(__file__).parent.parent / "configs" / "sg_mirror_revisions.json"
18+
19+
20+
def check_repo_indexed(mirror_repo: str) -> bool:
21+
"""Check if a repo is indexed in Sourcegraph using keyword_search."""
22+
# Use src CLI if available, otherwise use gh + SG API
23+
# For now, check if repo appears in SG search results
24+
try:
25+
# Try a simple search for any file in the repo
26+
result = subprocess.run(
27+
["src", "search", "-json", f"repo:^github.com/{mirror_repo}$ count:1 file:."],
28+
capture_output=True, text=True, timeout=30
29+
)
30+
if result.returncode == 0:
31+
data = json.loads(result.stdout)
32+
return len(data.get("Results", [])) > 0
33+
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError):
34+
pass
35+
36+
# Fallback: check via GitHub API if SG has cloned the repo
37+
# (This is a heuristic - repo existence on GH doesn't mean SG indexed it)
38+
print(f" Note: `src` CLI not available. Cannot verify SG indexing for {mirror_repo}.")
39+
print(f" Manually verify: search 'repo:^github.com/{mirror_repo}$' on Sourcegraph.")
40+
return False
41+
42+
43+
def main():
44+
update = "--update" in sys.argv
45+
46+
if not REVISIONS_FILE.exists():
47+
print(f"ERROR: {REVISIONS_FILE} not found")
48+
sys.exit(1)
49+
50+
data = json.loads(REVISIONS_FILE.read_text())
51+
mirrors = data["mirrors"]
52+
53+
all_indexed = True
54+
results = []
55+
56+
for mirror in mirrors:
57+
repo = mirror["mirror_repo"]
58+
was_indexed = mirror.get("indexed", False)
59+
60+
if was_indexed:
61+
status = "INDEXED (previously verified)"
62+
indexed = True
63+
else:
64+
indexed = check_repo_indexed(repo)
65+
status = "INDEXED" if indexed else "NOT YET INDEXED"
66+
67+
if not indexed:
68+
all_indexed = False
69+
70+
results.append((repo, status, indexed))
71+
print(f" {repo}: {status}")
72+
73+
if update and indexed and not was_indexed:
74+
mirror["indexed"] = True
75+
76+
if update:
77+
REVISIONS_FILE.write_text(json.dumps(data, indent=2) + "\n")
78+
print(f"\nUpdated {REVISIONS_FILE}")
79+
80+
print(f"\n{'ALL INDEXED' if all_indexed else 'SOME NOT YET INDEXED'}")
81+
print(f"Indexed: {sum(1 for _, _, i in results if i)}/{len(results)}")
82+
83+
sys.exit(0 if all_indexed else 1)
84+
85+
86+
if __name__ == "__main__":
87+
main()

0 commit comments

Comments
 (0)