-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathprompt_github.py
More file actions
83 lines (67 loc) · 2.43 KB
/
prompt_github.py
File metadata and controls
83 lines (67 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# /// script
# description = "Prompt with Markdown files"
# requires-python = ">=3.12, <3.13"
# dependencies = ["daft[openai]>=0.7.10", "numpy", "python-dotenv"]
# ///
from dotenv import load_dotenv
import daft
@daft.func()
def discover_github_urls(repo_url: str) -> list[str]:
"""
Discover HTTP URLs for files in a GitHub repository.
Args:
repo_url: GitHub repository URL (e.g., "https://github.com/user/repo/tree/branch/path")
Returns:
List of raw GitHub URLs for files in the repository
"""
import json
import re
import urllib.request
# Parse the GitHub URL to extract owner, repo, branch, and path
# Example: https://github.com/LeCoupa/awesome-cheatsheets/tree/88e5be6e4b01edf6c36c8f78b246c8fba70aa058/languages
pattern = r"github\.com/([^/]+)/([^/]+)/tree/([^/]+)/(.*)"
match = re.search(pattern, repo_url)
if not match:
return []
owner, repo, branch, path = match.groups()
# Use GitHub API to list files
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={branch}"
try:
req = urllib.request.Request(api_url)
req.add_header("User-Agent", "Mozilla/5.0")
with urllib.request.urlopen(req) as response:
contents = json.loads(response.read().decode())
# Extract raw URLs for files
urls = []
for item in contents:
if item["type"] == "file":
# Convert to raw GitHub URL
raw_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}/{item['name']}"
urls.append(raw_url)
return urls
except Exception as e:
print(f"Error fetching GitHub contents: {e}")
return []
if __name__ == "__main__":
load_dotenv()
#
# # Discover Markdown Files in your Documents Folder
# df = daft.from_glob_path("https://github.com/LeCoupa/awesome-cheatsheets/tree/88e5be6e4b01edf6c36c8f78b246c8fba70aa058/languages/*.md")
#
#
# df = (
# df
# # Create a daft.File column from the path
# .with_column("file", file(col("path")))
# # Prompt GPT-5-nano with markdown files as context
# .with_column(
# "response",
# prompt(
# [lit("What are in the contents of this file? \n"), col("file")],
# model="gpt-5-nano",
# provider="openai",
# )
# )
# )
# df.show(format="fancy", max_width=80)
print(discover_github_urls("Eventual-Inc/Daft"))