Skip to content

Commit 0f34733

Browse files
committed
add repo ingest as analysis
1 parent 305b430 commit 0f34733

File tree

7 files changed

+8534
-12569
lines changed

7 files changed

+8534
-12569
lines changed

app/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from fastapi.staticfiles import StaticFiles
44
from fastapi.templating import Jinja2Templates
55
from pathlib import Path
6-
from app.routes import install, actions
6+
from app.routes import install, actions, smart_ingest_route
77
from app.services.actions_loader import actions_loader
88
from api_analytics.fastapi import Analytics
99
from fastapi_mcp import FastApiMCP
@@ -28,6 +28,7 @@
2828
# Include routers
2929
app.include_router(install.router)
3030
app.include_router(actions.router)
31+
app.include_router(smart_ingest_route.router)
3132

3233
@app.get("/favicon.ico", operation_id="get_favicon")
3334
async def favicon():

app/routes/smart_ingest_route.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""
2+
Simple route for smart ingest functionality.
3+
"""
4+
5+
from fastapi import APIRouter, HTTPException
6+
from pydantic import BaseModel
7+
from typing import Optional
8+
from app.services.smart_ingest import use_gitingest, smart_ingest
9+
10+
router = APIRouter(prefix="/api", tags=["smart_ingest"])
11+
12+
13+
class IngestRequest(BaseModel):
14+
repo_url: str
15+
16+
17+
class IngestResponse(BaseModel):
18+
success: bool
19+
context_size: int
20+
message: str
21+
22+
23+
class AnalyzeRequest(BaseModel):
24+
context: str
25+
user_prompt: Optional[str] = "Analyze this repository and provide a comprehensive overview"
26+
27+
28+
class AnalyzeResponse(BaseModel):
29+
success: bool
30+
analysis: str
31+
32+
33+
@router.post("/ingest", response_model=IngestResponse)
34+
async def ingest_repository(request: IngestRequest):
35+
"""
36+
Ingest a repository and return the context.
37+
"""
38+
try:
39+
context = await use_gitingest(request.repo_url)
40+
return IngestResponse(
41+
success=True,
42+
context_size=len(context),
43+
message=context
44+
)
45+
except Exception as e:
46+
raise HTTPException(status_code=500, detail=str(e))
47+
48+
49+
@router.post("/analyze", response_model=AnalyzeResponse)
50+
async def analyze_context(request: AnalyzeRequest):
51+
"""
52+
Analyze the provided context using OpenAI.
53+
"""
54+
try:
55+
result = smart_ingest(request.context, request.user_prompt)
56+
return AnalyzeResponse(
57+
success=True,
58+
analysis=result.get("response", "")
59+
)
60+
except Exception as e:
61+
raise HTTPException(status_code=500, detail=str(e))
62+
63+
64+
# Keep the combined endpoint for backward compatibility
65+
class SmartIngestRequest(BaseModel):
66+
repo_url: str
67+
user_prompt: Optional[str] = "Analyze this repository and provide a comprehensive overview"
68+
69+
70+
class SmartIngestResponse(BaseModel):
71+
success: bool
72+
analysis: str
73+
74+
75+
@router.post("/smart_ingest", response_model=SmartIngestResponse)
76+
async def analyze_repository(request: SmartIngestRequest):
77+
"""
78+
Analyze a repository using smart ingest (combined endpoint).
79+
"""
80+
try:
81+
# Step 1: Ingest the repository (async)
82+
context = await use_gitingest(request.repo_url)
83+
84+
# Step 2: Send to OpenAI (still sync, but that's ok)
85+
result = smart_ingest(context, request.user_prompt)
86+
87+
return SmartIngestResponse(
88+
success=True,
89+
analysis=result.get("response", "")
90+
)
91+
except Exception as e:
92+
raise HTTPException(status_code=500, detail=str(e))

app/services/smart_ingest.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""
2+
Functions for ingesting repositories and sending context to OpenAI API.
3+
"""
4+
5+
import httpx
6+
from typing import Optional, Dict, Any
7+
from dotenv import load_dotenv
8+
import os
9+
from gitingest import ingest_async
10+
11+
# Load environment variables from .env file
12+
load_dotenv()
13+
14+
15+
async def use_gitingest(url: str, context_size: int = 50000) -> str:
16+
"""
17+
Ingest a repository using gitingest and trim to specified token size.
18+
19+
Args:
20+
url: Repository URL to ingest
21+
context_size: Maximum context size in tokens (default ~50k tokens)
22+
23+
Returns:
24+
String containing the repository context, trimmed to specified size
25+
"""
26+
# Ingest the repository
27+
summary, tree, content = await ingest_async(
28+
url,
29+
max_file_size=512000,
30+
include_patterns=None,
31+
exclude_patterns=None
32+
)
33+
34+
# Combine into single context
35+
full_context = f"{summary}\n\n{tree}\n\n{content}"
36+
37+
# Approximate token count (roughly 4 chars per token)
38+
# Trim to specified context size
39+
max_chars = context_size * 4
40+
if len(full_context) > max_chars:
41+
full_context = full_context[:max_chars]
42+
# Add ellipsis to indicate truncation
43+
full_context += "\n\n... (context truncated)"
44+
45+
return full_context
46+
47+
48+
def smart_ingest(
49+
context: str,
50+
user_prompt: str = "Analyze this repository and provide insights",
51+
api_key: Optional[str] = None
52+
) -> Dict[str, Any]:
53+
"""
54+
Send the ingested repository context to OpenAI API with a system prompt.
55+
56+
Args:
57+
context: The "big fat context" from use_git_ingest function
58+
user_prompt: The user's question or request about the repository
59+
api_key: Optional OpenAI API key (defaults to env var OPENAI_API_KEY)
60+
61+
Returns:
62+
Dictionary containing OpenAI's response and metadata
63+
64+
Raises:
65+
Exception: If the API call fails
66+
"""
67+
# Get API key from environment if not provided
68+
if not api_key:
69+
api_key = os.getenv("OPENAI_API_KEY")
70+
if not api_key:
71+
raise ValueError("OPENAI_API_KEY not found in environment variables")
72+
73+
# System prompt for repository analysis
74+
system_prompt = """You are an expert code analyst and software architect.
75+
You have been given the complete context of a repository including its structure and file contents.
76+
Analyze the repository thoroughly and provide insights based on the user's request.
77+
Focus on:
78+
- Code quality and architecture
79+
- Potential improvements
80+
- Security considerations
81+
- Documentation completeness
82+
- Dependencies and technical debt
83+
Be specific and provide actionable recommendations."""
84+
85+
# Prepare messages for OpenAI
86+
messages = [
87+
{
88+
"role": "system",
89+
"content": system_prompt
90+
},
91+
{
92+
"role": "user",
93+
"content": f"{user_prompt}\n\n{context}"
94+
}
95+
]
96+
97+
# OpenAI API endpoint
98+
url = "https://api.openai.com/v1/chat/completions"
99+
100+
# Headers for the API request
101+
headers = {
102+
"Authorization": f"Bearer {api_key}",
103+
"Content-Type": "application/json"
104+
}
105+
106+
# Request body
107+
data = {
108+
"model": "gpt-4o-mini", # Using GPT-4o-mini for cost efficiency
109+
"messages": messages,
110+
"temperature": 0.3, # Lower temperature for more focused analysis
111+
"max_tokens": 4096
112+
}
113+
114+
try:
115+
# Make the API call
116+
with httpx.Client(timeout=60.0) as client:
117+
response = client.post(url, json=data, headers=headers)
118+
response.raise_for_status()
119+
120+
result = response.json()
121+
122+
# Extract the response
123+
choice = result.get("choices", [{}])[0]
124+
message = choice.get("message", {})
125+
126+
return {
127+
"success": True,
128+
"response": message.get("content", ""),
129+
"model": result.get("model"),
130+
"usage": result.get("usage", {}),
131+
"finish_reason": choice.get("finish_reason")
132+
}
133+
134+
except httpx.HTTPStatusError as e:
135+
error_detail = e.response.text if e.response else str(e)
136+
raise Exception(f"OpenAI API error: {e.response.status_code} - {error_detail}")
137+
except Exception as e:
138+
raise Exception(f"Failed to send context to OpenAI: {str(e)}")

0 commit comments

Comments
 (0)