Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class ProcessedProjectEntry(BaseModel):
class RepoInfo(BaseModel):
owner: str
repo: str
type: str
type: str # Repository type: github, gitlab, bitbucket, azure_devops
token: Optional[str] = None
localPath: Optional[str] = None
repoUrl: Optional[str] = None
Expand Down
5 changes: 5 additions & 0 deletions api/config/generator.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
"temperature": 1.0,
"top_p": 0.8,
"top_k": 20
},
"gemini-3.1-flash-lite-preview": {
"temperature": 1.0,
"top_p": 0.8,
"top_k": 20
}
}
},
Expand Down
74 changes: 72 additions & 2 deletions api/data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ def download_repo(repo_url: str, local_path: str, repo_type: str = None, access_
elif repo_type == "bitbucket":
# Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git
clone_url = urlunparse((parsed.scheme, f"x-token-auth:{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
elif repo_type == "azure_devops":
# Format: https://{token}@dev.azure.com/org/project/_git/repo
# Strip any existing username from netloc (ADO URLs often include user@)
hostname = parsed.hostname or parsed.netloc.split('@')[-1]
clone_url = urlunparse((parsed.scheme, f"{encoded_token}@{hostname}", parsed.path, '', '', ''))
Comment on lines +124 to +125
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The hostname extracted from the user-provided repo_url is used to construct the clone URL without validation. This can lead to SSRF and credential leakage if an attacker provides a URL pointing to a malicious server, as the encoded_token (PAT) will be included in the request sent to that server.


logger.info("Using access token for authentication")

Expand Down Expand Up @@ -684,6 +689,59 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str
raise ValueError(f"Failed to get file content: {str(e)}")


def get_azure_devops_file_content(repo_url: str, file_path: str, access_token: str = None) -> str:
"""
Retrieves the content of a file from an Azure DevOps repository using the ADO REST API.

Args:
repo_url (str): The URL (e.g., "https://dev.azure.com/org/project/_git/repo")
file_path (str): Path to file (e.g., "src/main.py")
access_token (str, optional): Azure DevOps PAT

Returns:
str: The content of the file
"""
try:
parsed_url = urlparse(repo_url)
if not parsed_url.scheme or not parsed_url.netloc:
raise ValueError("Not a valid Azure DevOps repository URL")

path_parts = parsed_url.path.strip('/').split('/')

if '_git' not in path_parts:
raise ValueError("Not a valid Azure DevOps repository URL - missing _git in path")

git_index = path_parts.index('_git')
repo_name = path_parts[git_index + 1].replace(".git", "") if git_index + 1 < len(path_parts) else None

if not repo_name:
raise ValueError("Could not extract repository name from Azure DevOps URL")

# Build API URL: https://dev.azure.com/{org}/{project}/_apis/git/repositories/{repo}/items
# Strip any existing username from netloc (ADO URLs often include user@)
hostname = parsed_url.hostname or parsed_url.netloc.split('@')[-1]
project_path = '/'.join(path_parts[:git_index])
api_base = f"{parsed_url.scheme}://{hostname}/{project_path}"
Comment on lines +722 to +724
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The repo_url is used to construct an API base URL without validating the hostname. This allows an attacker to perform SSRF by providing a malicious URL. When requests.get is called on line 734, the user's Azure DevOps PAT will be sent in the Authorization header to the attacker-controlled host.

api_url = f"{api_base}/_apis/git/repositories/{repo_name}/items?path={file_path}&api-version=7.0"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The repo_name and file_path variables are concatenated into the api_url without URL encoding. This can lead to URL parameter injection or path manipulation if these variables contain special characters like ?, &, or #. It is recommended to use urllib.parse.quote for these variables. Additionally, the API version 7.0 is hardcoded; consider defining it as a constant (e.g., ADO_API_VERSION = "7.0") at the top of the file for better maintainability and easier updates.

api_url = f"{api_base}/_apis/git/repositories/{quote(repo_name)}/items?path={quote(file_path)}&api-version={ADO_API_VERSION}"


headers = {}
if access_token:
encoded = base64.b64encode(f":{access_token}".encode()).decode()
headers["Authorization"] = f"Basic {encoded}"

logger.info(f"Fetching file content from Azure DevOps API: {api_url}")
try:
response = requests.get(api_url, headers=headers)
response.raise_for_status()
except RequestException as e:
raise ValueError(f"Error fetching file content: {e}")

return response.text

except Exception as e:
raise ValueError(f"Failed to get file content: {str(e)}")


def get_file_content(repo_url: str, file_path: str, repo_type: str = None, access_token: str = None) -> str:
"""
Retrieves the content of a file from a Git repository (GitHub or GitLab).
Expand All @@ -706,8 +764,10 @@ def get_file_content(repo_url: str, file_path: str, repo_type: str = None, acces
return get_gitlab_file_content(repo_url, file_path, access_token)
elif repo_type == "bitbucket":
return get_bitbucket_file_content(repo_url, file_path, access_token)
elif repo_type == "azure_devops":
return get_azure_devops_file_content(repo_url, file_path, access_token)
else:
raise ValueError("Unsupported repository type. Only GitHub, GitLab, and Bitbucket are supported.")
raise ValueError("Unsupported repository type. Only GitHub, GitLab, Bitbucket, and Azure DevOps are supported.")

class DatabaseManager:
"""
Expand Down Expand Up @@ -763,7 +823,17 @@ def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) ->
# Extract owner and repo name to create unique identifier
url_parts = repo_url_or_path.rstrip('/').split('/')

if repo_type in ["github", "gitlab", "bitbucket"] and len(url_parts) >= 5:
if repo_type == "azure_devops":
# Azure DevOps URL: https://dev.azure.com/{org}/{project}/_git/{repo}
try:
git_index = url_parts.index('_git')
repo = url_parts[git_index + 1].replace(".git", "")
project = url_parts[git_index - 1]
repo_name = f"{project}_{repo}"
except (ValueError, IndexError):
repo_name = url_parts[-1].replace(".git", "")
return repo_name
elif repo_type in ["github", "gitlab", "bitbucket"] and len(url_parts) >= 5:
# GitHub URL format: https://github.com/owner/repo
# GitLab URL format: https://gitlab.com/owner/repo or https://gitlab.com/group/subgroup/repo
# Bitbucket URL format: https://bitbucket.org/owner/repo
Expand Down
2 changes: 1 addition & 1 deletion api/simple_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class ChatCompletionRequest(BaseModel):
messages: List[ChatMessage] = Field(..., description="List of chat messages")
filePath: Optional[str] = Field(None, description="Optional path to a file in the repository to include in the prompt")
token: Optional[str] = Field(None, description="Personal access token for private repositories")
type: Optional[str] = Field("github", description="Type of repository (e.g., 'github', 'gitlab', 'bitbucket')")
type: Optional[str] = Field("github", description="Type of repository (e.g., 'github', 'gitlab', 'bitbucket', 'azure_devops')")

# model parameters
provider: str = Field("google", description="Model provider (google, openai, openrouter, ollama, bedrock, azure, dashscope)")
Expand Down
2 changes: 1 addition & 1 deletion api/websocket_wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class ChatCompletionRequest(BaseModel):
messages: List[ChatMessage] = Field(..., description="List of chat messages")
filePath: Optional[str] = Field(None, description="Optional path to a file in the repository to include in the prompt")
token: Optional[str] = Field(None, description="Personal access token for private repositories")
type: Optional[str] = Field("github", description="Type of repository (e.g., 'github', 'gitlab', 'bitbucket')")
type: Optional[str] = Field("github", description="Type of repository (e.g., 'github', 'gitlab', 'bitbucket', 'azure_devops')")

# model parameters
provider: str = Field(
Expand Down
88 changes: 83 additions & 5 deletions src/app/[owner]/[repo]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { extractUrlDomain, extractUrlPath } from '@/utils/urlDecoder';
import Link from 'next/link';
import { useParams, useSearchParams } from 'next/navigation';
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { FaBitbucket, FaBookOpen, FaComments, FaDownload, FaExclamationTriangle, FaFileExport, FaFolder, FaGithub, FaGitlab, FaHome, FaSync, FaTimes } from 'react-icons/fa';
import { FaBitbucket, FaBookOpen, FaComments, FaDownload, FaExclamationTriangle, FaFileExport, FaFolder, FaGithub, FaGitlab, FaHome, FaMicrosoft, FaSync, FaTimes } from 'react-icons/fa';
// Define the WikiSection and WikiStructure types directly in this file
// since the imported types don't have the sections and rootSections properties
interface WikiSection {
Expand Down Expand Up @@ -173,6 +173,18 @@ const createBitbucketHeaders = (bitbucketToken: string): HeadersInit => {
return headers;
};

const createAzureDevOpsHeaders = (adoToken: string): HeadersInit => {
const headers: HeadersInit = {
'Content-Type': 'application/json',
};

if (adoToken) {
headers['Authorization'] = `Basic ${btoa(':' + adoToken)}`;
}

return headers;
};


export default function RepoWikiPage() {
// Get route parameters and search params
Expand Down Expand Up @@ -205,9 +217,11 @@ export default function RepoWikiPage() {
? 'bitbucket'
: repoHost?.includes('gitlab')
? 'gitlab'
: repoHost?.includes('github')
? 'github'
: searchParams.get('type') || 'github';
: repoHost?.includes('dev.azure.com')
? 'azure_devops'
: repoHost?.includes('github')
? 'github'
: searchParams.get('type') || 'github';

// Import language context for translations
const { messages } = useLanguage();
Expand Down Expand Up @@ -1479,6 +1493,68 @@ IMPORTANT:
console.warn('Could not fetch Bitbucket README.md, continuing with empty README', err);
}
}
else if (effectiveRepoInfo.type === 'azure_devops') {
// Azure DevOps API approach
const adoUrl = effectiveRepoInfo.repoUrl ?? '';
const adoParsed = new URL(adoUrl);
const adoParts = adoParsed.pathname.replace(/^\/|\/$/g, '').split('/');
const gitIndex = adoParts.indexOf('_git');

if (gitIndex < 1 || gitIndex + 1 >= adoParts.length) {
throw new Error('Invalid Azure DevOps repository URL');
}

const adoRepo = adoParts[gitIndex + 1];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The repository name extracted from the Azure DevOps URL doesn't account for a potential .git suffix. If the URL is .../_git/myrepo.git, adoRepo will be myrepo.git, which will cause subsequent API calls to fail. You should remove the .git suffix, similar to how it's handled in the backend Python code and other parts of the frontend.

Suggested change
const adoRepo = adoParts[gitIndex + 1];
const adoRepo = adoParts[gitIndex + 1].replace(/\.git$/, '');

const adoBase = `${adoParsed.protocol}//${adoParsed.hostname}/${adoParts.slice(0, gitIndex).join('/')}`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The frontend constructs Azure DevOps API URLs using the user-provided repoUrl without validating the hostname. This can lead to client-side SSRF and credential leakage if a user is tricked into opening a malicious repository URL, as their PAT will be sent to the attacker's server via fetch.

const headers = createAzureDevOpsHeaders(currentToken);

// Get default branch
let defaultBranchLocal = 'main';
try {
const repoInfoUrl = `${adoBase}/_apis/git/repositories/${adoRepo}?api-version=7.0`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The API version 7.0 is hardcoded here and in a few other places in this function. To improve maintainability, consider defining it as a constant at the top of the file.

// At top of file
const ADO_API_VERSION = '7.0';

// In function
const repoInfoUrl = `${adoBase}/_apis/git/repositories/${adoRepo}?api-version=${ADO_API_VERSION}`;

const repoInfoRes = await fetch(repoInfoUrl, { headers });
if (repoInfoRes.ok) {
const repoData = await repoInfoRes.json();
// ADO returns defaultBranch as "refs/heads/main"
defaultBranchLocal = (repoData.defaultBranch || 'refs/heads/main').replace('refs/heads/', '');
console.log(`Found Azure DevOps default branch: ${defaultBranchLocal}`);
}
} catch (err) {
console.warn('Could not fetch ADO repo info:', err);
}
setDefaultBranch(defaultBranchLocal);

// Get file tree using Items API with recursion
const treeUrl = `${adoBase}/_apis/git/repositories/${adoRepo}/items?recursionLevel=Full&versionDescriptor.version=${defaultBranchLocal}&api-version=7.0`;
const treeRes = await fetch(treeUrl, { headers });

if (!treeRes.ok) {
const errorData = await treeRes.text();
throw new Error(`Azure DevOps API error: ${treeRes.status} - ${errorData}`);
}

const treeData = await treeRes.json();

if (!treeData.value || treeData.value.length === 0) {
throw new Error('Could not fetch repository structure from Azure DevOps.');
}

fileTreeData = treeData.value
.filter((item: { gitObjectType: string; path: string }) => item.gitObjectType === 'blob')
.map((item: { path: string }) => item.path.replace(/^\//, ''))
.join('\n');

// Fetch README
try {
const readmeUrl = `${adoBase}/_apis/git/repositories/${adoRepo}/items?path=README.md&api-version=7.0`;
const readmeRes = await fetch(readmeUrl, { headers });
if (readmeRes.ok) {
readmeContent = await readmeRes.text();
}
} catch (err) {
console.warn('Could not fetch Azure DevOps README.md:', err);
}
}

// Now determine the wiki structure
await determineWikiStructure(fileTreeData, readmeContent, owner, repo);
Expand Down Expand Up @@ -2059,6 +2135,8 @@ IMPORTANT:
<FaGithub className="mr-2" />
) : effectiveRepoInfo.type === 'gitlab' ? (
<FaGitlab className="mr-2" />
) : effectiveRepoInfo.type === 'azure_devops' ? (
<FaMicrosoft className="mr-2" />
) : (
<FaBitbucket className="mr-2" />
)}
Expand Down Expand Up @@ -2269,7 +2347,7 @@ IMPORTANT:
onApply={confirmRefresh}
showWikiType={true}
showTokenInput={effectiveRepoInfo.type !== 'local' && !currentToken} // Show token input if not local and no current token
repositoryType={effectiveRepoInfo.type as 'github' | 'gitlab' | 'bitbucket'}
repositoryType={effectiveRepoInfo.type as 'github' | 'gitlab' | 'bitbucket' | 'azure_devops'}
authRequired={authRequired}
authCode={authCode}
setAuthCode={setAuthCode}
Expand Down
18 changes: 16 additions & 2 deletions src/app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ export default function Home() {
const [excludedFiles, setExcludedFiles] = useState('');
const [includedDirs, setIncludedDirs] = useState('');
const [includedFiles, setIncludedFiles] = useState('');
const [selectedPlatform, setSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket'>('github');
const [selectedPlatform, setSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket' | 'azure_devops'>('github');
const [accessToken, setAccessToken] = useState('');
const [error, setError] = useState<string | null>(null);
const [isSubmitting, setIsSubmitting] = useState(false);
Expand Down Expand Up @@ -212,13 +212,23 @@ export default function Home() {
type = 'gitlab';
} else if (domain?.includes('bitbucket.org') || domain?.includes('bitbucket.')) {
type = 'bitbucket';
} else if (domain?.includes('dev.azure.com')) {
type = 'azure_devops';
} else {
type = 'web'; // fallback for other git hosting services
}

fullPath = extractUrlPath(input)?.replace(/\.git$/, '');
const parts = fullPath?.split('/') ?? [];
if (parts.length >= 2) {

if (type === 'azure_devops') {
// ADO URL path: {org}/{project}/_git/{repo}
const gitIndex = parts.indexOf('_git');
if (gitIndex >= 1 && gitIndex + 1 < parts.length) {
owner = parts[gitIndex - 1]; // project name
repo = parts[gitIndex + 1]; // repo name
}
} else if (parts.length >= 2) {
repo = parts[parts.length - 1] || '';
owner = parts[parts.length - 2] || '';
}
Expand Down Expand Up @@ -559,6 +569,10 @@ export default function Home() {
className="bg-[var(--background)]/70 p-3 rounded border border-[var(--border-color)] font-mono overflow-x-hidden whitespace-nowrap"
>https://bitbucket.org/atlassian/atlaskit
</div>
<div
className="bg-[var(--background)]/70 p-3 rounded border border-[var(--border-color)] font-mono overflow-x-hidden whitespace-nowrap"
>https://dev.azure.com/org/project/_git/repo
</div>
</div>
</div>

Expand Down
39 changes: 33 additions & 6 deletions src/components/ConfigurationModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ interface ConfigurationModalProps {
setCustomModel: (value: string) => void;

// Platform selection
selectedPlatform: 'github' | 'gitlab' | 'bitbucket';
setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket') => void;
selectedPlatform: 'github' | 'gitlab' | 'bitbucket' | 'azure_devops';
setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket' | 'azure_devops') => void;

// Access token
accessToken: string;
Expand Down Expand Up @@ -98,8 +98,8 @@ export default function ConfigurationModal({
}: ConfigurationModalProps) {
const { messages: t } = useLanguage();

// Show token section state
const [showTokenSection, setShowTokenSection] = useState(false);
// Show token section state - auto-expand for Azure DevOps since PAT is required
const [showTokenSection, setShowTokenSection] = useState(selectedPlatform === 'azure_devops');

if (!isOpen) return null;

Expand Down Expand Up @@ -231,15 +231,42 @@ export default function ConfigurationModal({
/>
</div>

{/* Access token section using TokenInput component */}
{/* Platform selection - always visible */}
<div className="mb-4">
<label className="block text-sm font-medium text-[var(--foreground)] mb-2">
{t.form?.selectPlatform || 'Select Platform'}
</label>
<div className="flex gap-2">
{(['github', 'gitlab', 'bitbucket', 'azure_devops'] as const).map((platform) => (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The list of supported platforms ['github', 'gitlab', 'bitbucket', 'azure_devops'] is hardcoded. This list is also implicitly used in other components. To improve maintainability and avoid inconsistencies, consider defining this array as a constant in a shared utility file and importing it where needed.

For example:

// in a shared constants file, e.g., src/lib/constants.ts
export const SUPPORTED_PLATFORMS = ['github', 'gitlab', 'bitbucket', 'azure_devops'] as const;

// in ConfigurationModal.tsx
import { SUPPORTED_PLATFORMS } from '@/lib/constants';
// ...
{SUPPORTED_PLATFORMS.map((platform) => (
// ...

<button
key={platform}
type="button"
onClick={() => {
setSelectedPlatform(platform);
if (platform === 'azure_devops') setShowTokenSection(true);
}}
className={`flex-1 flex items-center justify-center gap-2 px-3 py-2 rounded-md border transition-all ${selectedPlatform === platform
? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)] text-[var(--accent-primary)] shadow-sm'
: 'border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
}`}
>
<span className="text-sm">
{platform === 'azure_devops' ? 'Azure DevOps' : platform.charAt(0).toUpperCase() + platform.slice(1)}
</span>
</button>
))}
</div>
</div>

{/* Access token input */}
<TokenInput
selectedPlatform={selectedPlatform}
setSelectedPlatform={setSelectedPlatform}
accessToken={accessToken}
setAccessToken={setAccessToken}
showTokenSection={showTokenSection}
onToggleTokenSection={() => setShowTokenSection(!showTokenSection)}
allowPlatformChange={true}
allowPlatformChange={false}
/>

{/* Authorization Code Input */}
Expand Down
Loading