Skip to content

Commit 8a6a71c

Browse files
CopilotMte90
andcommitted
Major code reorganization: modular structure with separate folders
Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
1 parent 2264dfa commit 8a6a71c

20 files changed

+655
-481
lines changed

ai/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""
2+
AI and analysis modules.
3+
"""
4+
from .analyzer import (
5+
analyze_local_path_background,
6+
analyze_local_path_sync,
7+
search_semantic,
8+
call_coding_model,
9+
llama_index_retrieve_documents,
10+
)
11+
12+
__all__ = [
13+
'analyze_local_path_background',
14+
'analyze_local_path_sync',
15+
'search_semantic',
16+
'call_coding_model',
17+
'llama_index_retrieve_documents',
18+
]

analyzer.py renamed to ai/analyzer.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from db import store_file, needs_reindex, set_project_metadata_batch, get_project_metadata
1515
from external_api import get_embedding_for_text, call_coding_api
1616
from llama_index.core import Document
17-
from logger import get_logger
17+
from utils.logger import get_logger
1818
from smart_chunker import smart_chunk
1919
import logging
2020

@@ -578,18 +578,7 @@ def llama_index_retrieve_documents(query: str, database_path: str, top_k: int =
578578
"""
579579
Return llama_index.core.Document objects for the top_k matching chunks using sqlite-vector.
580580
"""
581-
q_emb = get_embedding_for_text(query)
582-
if not q_emb:
583-
return []
584-
585-
rows = _search_vectors(database_path, q_emb, top_k=top_k)
586-
docs: List[Document] = []
587-
for r in rows:
588-
fid = r.get("file_id")
589-
path = r.get("path")
590-
chunk_idx = r.get("chunk_index", 0)
591-
score = r.get("score", 0.0)
592-
chunk_text = _get_chunk_text(database_path, fid, chunk_idx) or ""
593-
doc = Document(text=chunk_text, extra_info={"path": path, "file_id": fid, "chunk_index": chunk_idx, "score": score})
594-
docs.append(doc)
595-
return docs
581+
from .llama_integration import llama_index_retrieve_documents as _llama_retrieve
582+
return _llama_retrieve(query, database_path, top_k,
583+
search_func=_search_vectors,
584+
get_chunk_func=_get_chunk_text)

ai/llama_integration.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""
2+
LlamaIndex integration for document retrieval.
3+
"""
4+
from typing import List
5+
from llama_index.core import Document
6+
7+
from external_api import get_embedding_for_text
8+
from utils.logger import get_logger
9+
10+
logger = get_logger(__name__)
11+
12+
13+
def llama_index_retrieve_documents(query: str, database_path: str, top_k: int = 5,
14+
search_func=None, get_chunk_func=None) -> List[Document]:
15+
"""
16+
Return llama_index.core.Document objects for the top_k matching chunks using sqlite-vector.
17+
18+
Args:
19+
query: Search query text
20+
database_path: Path to project database
21+
top_k: Number of results to return
22+
search_func: Function to search vectors (injected from analyzer)
23+
get_chunk_func: Function to get chunk text (injected from analyzer)
24+
25+
Returns:
26+
List of Document objects with chunk text and metadata
27+
"""
28+
if search_func is None or get_chunk_func is None:
29+
raise ValueError("search_func and get_chunk_func must be provided")
30+
31+
q_emb = get_embedding_for_text(query)
32+
if not q_emb:
33+
return []
34+
35+
rows = search_func(database_path, q_emb, top_k=top_k)
36+
docs: List[Document] = []
37+
for r in rows:
38+
fid = r.get("file_id")
39+
path = r.get("path")
40+
chunk_idx = r.get("chunk_index", 0)
41+
score = r.get("score", 0.0)
42+
chunk_text = get_chunk_func(database_path, fid, chunk_idx) or ""
43+
doc = Document(text=chunk_text, extra_info={"path": path, "file_id": fid, "chunk_index": chunk_idx, "score": score})
44+
docs.append(doc)
45+
return docs

db/__init__.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""
2+
Database operations module.
3+
"""
4+
from .operations import (
5+
# Connection and initialization
6+
init_db,
7+
# File operations
8+
store_file,
9+
get_file_by_path,
10+
needs_reindex,
11+
list_files,
12+
delete_file_by_path,
13+
clear_project_data,
14+
# Project registry operations
15+
create_project,
16+
get_project,
17+
get_project_by_id,
18+
list_projects,
19+
update_project_status,
20+
update_project_settings,
21+
delete_project,
22+
get_or_create_project,
23+
# Metadata operations
24+
set_project_metadata,
25+
set_project_metadata_batch,
26+
get_project_metadata,
27+
get_project_stats,
28+
# Chunk operations
29+
insert_chunk_row_with_null_embedding,
30+
)
31+
32+
from .models import (
33+
CreateProjectRequest,
34+
IndexProjectRequest,
35+
QueryRequest,
36+
)
37+
38+
__all__ = [
39+
# Connection and initialization
40+
'init_db',
41+
# File operations
42+
'store_file',
43+
'get_file_by_path',
44+
'needs_reindex',
45+
'list_files',
46+
'delete_file_by_path',
47+
'clear_project_data',
48+
# Project registry operations
49+
'create_project',
50+
'get_project',
51+
'get_project_by_id',
52+
'list_projects',
53+
'update_project_status',
54+
'update_project_settings',
55+
'delete_project',
56+
'get_or_create_project',
57+
# Metadata operations
58+
'set_project_metadata',
59+
'set_project_metadata_batch',
60+
'get_project_metadata',
61+
'get_project_stats',
62+
# Chunk operations
63+
'insert_chunk_row_with_null_embedding',
64+
# Models
65+
'CreateProjectRequest',
66+
'IndexProjectRequest',
67+
'QueryRequest',
68+
]

db/db_task.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""
2+
Database task class for queued write operations.
3+
"""
4+
import threading
5+
6+
7+
class _DBTask:
8+
"""Internal task class for queuing database write operations."""
9+
def __init__(self, sql, params):
10+
self.sql = sql
11+
self.params = params
12+
self.event = threading.Event()
13+
self.rowid = None
14+
self.exception = None
File renamed without changes.

db.py renamed to db/operations.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
from typing import Any, Dict, List, Optional
44
from functools import lru_cache
55

6-
from config import CFG # config (keeps chunk_size etc if needed)
6+
from utils.config import CFG # config (keeps chunk_size etc if needed)
77
import atexit
88
import threading
99
import queue
10-
from logger import get_logger
10+
from utils.logger import get_logger
1111
from cache import project_cache, stats_cache, file_cache
12+
from .db_task import _DBTask
1213

1314
_LOG = get_logger(__name__)
1415

@@ -20,13 +21,6 @@
2021
_WRITERS = {}
2122
_WRITERS_LOCK = threading.Lock()
2223

23-
class _DBTask:
24-
def __init__(self, sql, params):
25-
self.sql = sql
26-
self.params = params
27-
self.event = threading.Event()
28-
self.rowid = None
29-
self.exception = None
3024

3125
class DBWriter:
3226
def __init__(self, database_path, timeout_seconds=30):

endpoints/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""
2+
API endpoints module.
3+
"""
4+
from .project_endpoints import router as project_router
5+
from .query_endpoints import router as query_router
6+
from .web_endpoints import router as web_router
7+
8+
__all__ = ['project_router', 'query_router', 'web_router']

endpoints/project_endpoints.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
"""
2+
Project management API endpoints.
3+
"""
4+
from fastapi import APIRouter, Request, BackgroundTasks
5+
from fastapi.responses import JSONResponse
6+
import os
7+
from datetime import datetime
8+
9+
from db import (
10+
get_project_by_id, list_projects,
11+
update_project_status, delete_project, get_or_create_project,
12+
CreateProjectRequest, IndexProjectRequest
13+
)
14+
from ai.analyzer import analyze_local_path_background
15+
from utils.logger import get_logger
16+
from utils.config import CFG
17+
from .rate_limiter import indexing_limiter
18+
19+
logger = get_logger(__name__)
20+
router = APIRouter(prefix="/api", tags=["projects"])
21+
22+
MAX_FILE_SIZE = int(CFG.get("max_file_size", 200000))
23+
24+
25+
def _get_client_ip(request: Request) -> str:
26+
"""Get client IP address from request."""
27+
forwarded = request.headers.get("X-Forwarded-For")
28+
if forwarded:
29+
return forwarded.split(",")[0].strip()
30+
return request.client.host if request.client else "unknown"
31+
32+
33+
@router.post("/projects", summary="Create or get a project")
34+
def api_create_project(request: CreateProjectRequest):
35+
"""
36+
Create or get a project with per-project database.
37+
38+
- **path**: Absolute path to project directory (required)
39+
- **name**: Optional project name (defaults to directory name)
40+
41+
Returns project metadata including:
42+
- **id**: Unique project identifier
43+
- **database_path**: Path to project's SQLite database
44+
- **status**: Current project status
45+
"""
46+
try:
47+
project = get_or_create_project(request.path, request.name)
48+
return JSONResponse(project)
49+
except ValueError as e:
50+
# ValueError is expected for invalid inputs, safe to show message
51+
logger.warning(f"Validation error creating project: {e}")
52+
return JSONResponse({"error": "Invalid project path"}, status_code=400)
53+
except RuntimeError as e:
54+
# RuntimeError may contain sensitive details, use generic message
55+
logger.error(f"Runtime error creating project: {e}")
56+
return JSONResponse({"error": "Database operation failed"}, status_code=500)
57+
except Exception as e:
58+
logger.exception(f"Unexpected error creating project: {e}")
59+
return JSONResponse({"error": "Internal server error"}, status_code=500)
60+
61+
62+
@router.get("/projects", summary="List all projects")
63+
def api_list_projects():
64+
"""
65+
List all registered projects.
66+
67+
Returns array of project objects with metadata:
68+
- **id**: Unique project identifier
69+
- **name**: Project name
70+
- **path**: Project directory path
71+
- **status**: Current status (created, indexing, ready, error)
72+
- **last_indexed_at**: Last indexing timestamp
73+
"""
74+
try:
75+
projects = list_projects()
76+
return JSONResponse(projects)
77+
except Exception as e:
78+
logger.exception(f"Error listing projects: {e}")
79+
return JSONResponse({"error": "Failed to list projects"}, status_code=500)
80+
81+
82+
@router.get("/projects/{project_id}", summary="Get project by ID")
83+
def api_get_project(project_id: str):
84+
"""
85+
Get project details by ID.
86+
87+
- **project_id**: Unique project identifier
88+
89+
Returns project metadata or 404 if not found.
90+
"""
91+
try:
92+
project = get_project_by_id(project_id)
93+
if not project:
94+
return JSONResponse({"error": "Project not found"}, status_code=404)
95+
return JSONResponse(project)
96+
except Exception as e:
97+
logger.exception(f"Error getting project: {e}")
98+
return JSONResponse({"error": "Failed to retrieve project"}, status_code=500)
99+
100+
101+
@router.delete("/projects/{project_id}", summary="Delete a project")
102+
def api_delete_project(project_id: str):
103+
"""
104+
Delete a project and its database.
105+
106+
- **project_id**: Unique project identifier
107+
108+
Permanently removes the project and all indexed data.
109+
Returns 404 if project not found.
110+
"""
111+
try:
112+
delete_project(project_id)
113+
return JSONResponse({"success": True})
114+
except ValueError as e:
115+
logger.warning(f"Project not found for deletion: {e}")
116+
return JSONResponse({"error": "Project not found"}, status_code=404)
117+
except Exception as e:
118+
logger.exception(f"Error deleting project: {e}")
119+
return JSONResponse({"error": "Failed to delete project"}, status_code=500)
120+
121+
122+
@router.post("/projects/index", tags=["indexing"], summary="Index a project")
123+
def api_index_project(http_request: Request, request: IndexProjectRequest, background_tasks: BackgroundTasks):
124+
"""
125+
Index or re-index a project in the background.
126+
127+
- **project_id**: Unique project identifier
128+
129+
Starts background indexing process:
130+
- Scans project directory for code files
131+
- Generates embeddings for semantic search
132+
- Uses incremental indexing (skips unchanged files)
133+
134+
Rate limit: 10 requests per minute per IP.
135+
136+
Returns immediately with status "indexing".
137+
Poll project status to check completion.
138+
"""
139+
# Rate limiting for indexing operations (more strict)
140+
client_ip = _get_client_ip(http_request)
141+
allowed, retry_after = indexing_limiter.is_allowed(client_ip)
142+
if not allowed:
143+
return JSONResponse(
144+
{"error": "Rate limit exceeded for indexing", "retry_after": retry_after},
145+
status_code=429,
146+
headers={"Retry-After": str(retry_after)}
147+
)
148+
149+
try:
150+
project = get_project_by_id(request.project_id)
151+
if not project:
152+
return JSONResponse({"error": "Project not found"}, status_code=404)
153+
154+
project_path = project["path"]
155+
db_path = project["database_path"]
156+
157+
if not os.path.exists(project_path):
158+
return JSONResponse({"error": "Project path does not exist"}, status_code=400)
159+
160+
# Update status to indexing
161+
update_project_status(request.project_id, "indexing")
162+
163+
# Start background indexing
164+
venv_path = CFG.get("venv_path")
165+
166+
def index_callback():
167+
try:
168+
analyze_local_path_background(project_path, db_path, venv_path, MAX_FILE_SIZE, CFG)
169+
update_project_status(request.project_id, "ready", datetime.utcnow().isoformat())
170+
except Exception as e:
171+
update_project_status(request.project_id, "error")
172+
raise
173+
174+
background_tasks.add_task(index_callback)
175+
176+
return JSONResponse({"status": "indexing", "project_id": request.project_id})
177+
except Exception as e:
178+
logger.exception(f"Error starting project indexing: {e}")
179+
return JSONResponse({"error": "Failed to start indexing"}, status_code=500)

0 commit comments

Comments
 (0)