Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 185 additions & 3 deletions backend/app/services/file_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import xml.etree.ElementTree as ET
from typing import List, Dict, Any
import urllib.request
import urllib.error


def detect_package_manager(filename: str, content: str) -> str:
Expand Down Expand Up @@ -68,6 +70,60 @@ def local_name(tag: str) -> str:

return deps

def parse_maven_pom(text: str) -> List[Dict[str, Any]]:
"""Parse Maven pom.xml and extract dependencies.

Returns list of {name, version, groupId, artifactId}.
"""
deps: List[Dict[str, Any]] = []
try:
root = ET.fromstring(text)
except ET.ParseError:
return []

def local_name(tag: str) -> str:
return tag.split('}')[-1] if '}' in tag else tag

# Find all <dependency> elements (handle XML namespaces)
for elem in root.iter():
if local_name(elem.tag) != 'dependency':
continue

group_id = None
artifact_id = None
version = None
scope = None

for child in list(elem):
child_name = local_name(child.tag)
child_text = (child.text or '').strip()

if child_name == 'groupId':
group_id = child_text
elif child_name == 'artifactId':
artifact_id = child_text
elif child_name == 'version':
version = child_text
elif child_name == 'scope':
scope = child_text

# Skip test dependencies
if scope == 'test':
continue

if group_id and artifact_id:
# Maven convention: groupId:artifactId
name = f"{group_id}:{artifact_id}"
deps.append({
"name": name,
"version": version,
"groupId": group_id,
"artifactId": artifact_id
})

return deps


def parse_requirements(text: str, filename: str="") -> List[Dict[str, Any]]:
# Support two common formats:
# 1) pip-style requirements (lines with optional ==version)
Expand Down Expand Up @@ -122,20 +178,146 @@ def parse_packages_config(text: str) -> List[Dict[str, Any]]:
return deps


def get_latest_npm_version(package_name: str) -> str | None:
"""Get the latest version of an npm package."""
try:
url = f"https://registry.npmjs.org/{package_name}/latest"
with urllib.request.urlopen(url, timeout=5) as response:
data = json.loads(response.read())
return data.get("version")
except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, KeyError, Exception):
return None


def get_latest_pypi_version(package_name: str) -> str | None:
"""Get the latest version of a PyPI package."""
try:
url = f"https://pypi.org/pypi/{package_name}/json"
with urllib.request.urlopen(url, timeout=5) as response:
data = json.loads(response.read())
info = data.get("info", {})
version = info.get("version")
return version
except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, KeyError, Exception):
return None


def get_latest_maven_version(group_id: str, artifact_id: str) -> str | None:
"""Get the latest version of a Maven artifact."""
try:
# Maven Central search API
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&rows=1&wt=json"
with urllib.request.urlopen(url, timeout=5) as response:
data = json.loads(response.read())
docs = data.get("response", {}).get("docs", [])
if docs:
return docs[0].get("latestVersion")
except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, KeyError, Exception):
return None


def get_latest_nuget_version(package_name: str) -> str | None:
"""Get the latest version of a NuGet package."""
try:
url = f"https://api.nuget.org/v3-flatcontainer/{package_name.lower()}/index.json"
with urllib.request.urlopen(url, timeout=5) as response:
data = json.loads(response.read())
versions = data.get("versions", [])
if versions:
# Return the last (latest) version
return versions[-1]
except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, KeyError, Exception):
return None


def get_latest_go_version(module_path: str) -> str | None:
"""Get the latest version of a Go module."""
try:
# Go module proxy
url = f"https://proxy.golang.org/{module_path}/@latest"
with urllib.request.urlopen(url, timeout=5) as response:
data = json.loads(response.read())
return data.get("Version")
except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, KeyError, Exception):
return None


def enrich_dependencies_with_latest_versions(dependencies: List[Dict[str, Any]], ecosystem: str) -> List[Dict[str, Any]]:
"""Enrich dependencies that don't have versions with their latest versions.

Supports: npm, pypi, maven, nuget, go
"""
enriched = []
for dep in dependencies:
name = dep.get("name")
version = dep.get("version")

# Skip if version already exists
if version:
enriched.append(dep)
continue

latest_version = None
version_source = None

if ecosystem == "npm" and name:
latest_version = get_latest_npm_version(name)
version_source = "latest_from_npm"
elif ecosystem == "pypi" and name:
latest_version = get_latest_pypi_version(name)
version_source = "latest_from_pypi"
elif ecosystem == "maven" and name:
# Maven format: groupId:artifactId
if ":" in name:
group_id, artifact_id = name.split(":", 1)
latest_version = get_latest_maven_version(group_id, artifact_id)
version_source = "latest_from_maven"
# Also check if we have separate groupId/artifactId fields
elif dep.get("groupId") and dep.get("artifactId"):
latest_version = get_latest_maven_version(dep["groupId"], dep["artifactId"])
version_source = "latest_from_maven"
elif ecosystem == "nuget" and name:
latest_version = get_latest_nuget_version(name)
version_source = "latest_from_nuget"
elif ecosystem == "go" and name:
# Use full_name if available (includes module path), otherwise use name
module_path = dep.get("full_name") or name
latest_version = get_latest_go_version(module_path)
version_source = "latest_from_goproxy"

if latest_version:
dep = dep.copy()
dep["version"] = latest_version
dep["version_source"] = version_source

enriched.append(dep)

return enriched


def analyze_file(filename: str, content: str) -> Dict[str, Any]:
manager = detect_package_manager(filename, content)
result = {"packageManager": manager, "dependencies": []}

if manager == "npm":
result["dependencies"] = parse_package_json(content)
deps = parse_package_json(content)
# Enrich with latest versions for packages without version
result["dependencies"] = enrich_dependencies_with_latest_versions(deps, "npm")
result["ecosystem"] = "npm"
elif manager == "pypi":
result["dependencies"] = parse_requirements(content)
deps = parse_requirements(content)
# Enrich with latest versions for packages without version
result["dependencies"] = enrich_dependencies_with_latest_versions(deps, "pypi")
result["ecosystem"] = "pypi"
elif manager == "maven":
deps = parse_maven_pom(content)
# Enrich with latest versions for packages without version
result["dependencies"] = enrich_dependencies_with_latest_versions(deps, "maven")
result["ecosystem"] = "maven"
elif manager == "nuget":
result["dependencies"] = parse_requirements(content, filename=filename)
deps = parse_requirements(content, filename=filename)
# Enrich with latest versions for packages without version
result["dependencies"] = enrich_dependencies_with_latest_versions(deps, "nuget")
result["ecosystem"] = "nuget"
else:
result["ecosystem"] = "unknown"
Expand Down
79 changes: 77 additions & 2 deletions backend/app/services/repo_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,88 @@ def is_dependency_file(filename: str) -> bool:
return lower in DEP_FILES


# Common directories and files to ignore during scanning
IGNORED_DIRS = {
"node_modules",
"build",
"dist",
".git",
".svn",
".hg",
"vendor",
"__pycache__",
".pytest_cache",
".mypy_cache",
".venv",
"venv",
"env",
".env",
"target",
"bin",
"obj",
".idea",
".vscode",
".vs",
"coverage",
".coverage",
".nyc_output",
".next",
"out",
".cache",
"tmp",
"temp",
".tmp",
".temp",
}


def _should_ignore_path(path: str) -> bool:
"""
Check if a path should be ignored based on common ignore patterns.
Path can be absolute or relative.
"""
# Normalize path separators
normalized = path.replace("\\", "/")
parts = normalized.split("/")

# Check if any part matches ignored directories
for part in parts:
if part in IGNORED_DIRS:
return True
# Ignore hidden directories (starting with .) except for specific files
if part.startswith(".") and part not in [".", ".."]:
# Allow .csproj files (they are dependency files, not directories)
if part.endswith(".csproj"):
continue
# Ignore other hidden directories
return True

return False


def find_dependency_files(root: str) -> List[str]:
"""
Find dependency files in a repository, excluding common build/ignore directories.
"""
matches = []
for dirpath, _, files in os.walk(root):
root_abs = os.path.abspath(root)

for dirpath, dirnames, files in os.walk(root):
# Prune ignored directories from os.walk
dirnames[:] = [d for d in dirnames if d not in IGNORED_DIRS and not d.startswith(".")]

# Check if current directory should be ignored
rel_dir = os.path.relpath(dirpath, root)
if _should_ignore_path(rel_dir):
continue

for name in files:
if is_dependency_file(name):
rel = os.path.relpath(os.path.join(dirpath, name), root)
matches.append(rel)
# Double-check the full path isn't in an ignored directory
if not _should_ignore_path(rel):
matches.append(rel)

return matches


Expand Down
7 changes: 4 additions & 3 deletions servers/mcp-licenguard/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ logInfo('[mcp] __dirname:', __dirname);
logInfo('[mcp] __filename:', __filename);


dotenv.config({ path: path.join(__dirname, '.env') });
const envPath = path.join(__dirname, '..', '.env');
dotenv.config({ path: envPath });

logInfo('Dotenv loaded from', path.join(__dirname, '.env'));
logInfo('Dotenv loaded from', envPath);
logInfo('LLM config', getActiveLlmInfo());

const llmInfo = getActiveLlmInfo();
Expand Down Expand Up @@ -453,7 +454,7 @@ function createServer() {

const analyzeFileHandler = async ({ filename, content }) => {
if (!filename || !content) throw new Error('filename and content are required');
const report = analyzeFile({ filename, content });
const report = await analyzeFile({ filename, content });
logInfo('[mcp] analyze-file', JSON.stringify({ filename, manager: report.packageManager, deps: report.dependencies }));
return {
content: [{ type: 'text', text: JSON.stringify(report, null, 2) }],
Expand Down
Loading
Loading