Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 78 additions & 62 deletions src/agents/repository_analysis_agent/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,49 +99,92 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non
)


def _get_language_specific_patterns(language: str | None) -> tuple[list[str], list[str]]:
"""
Get source and test patterns based on repository language.

Returns:
Tuple of (source_patterns, test_patterns) lists
"""
# Language-specific patterns
patterns_map: dict[str, tuple[list[str], list[str]]] = {
"Python": (
["**/*.py"],
["**/tests/**", "**/*_test.py", "**/test_*.py", "**/*.test.py"],
),
"TypeScript": (
["**/*.ts", "**/*.tsx"],
["**/*.spec.ts", "**/*.test.ts", "**/tests/**"],
),
"JavaScript": (
["**/*.js", "**/*.jsx"],
["**/*.test.js", "**/*.spec.js", "**/tests/**"],
),
"Go": (
["**/*.go"],
["**/*_test.go", "**/*.test.go"],
),
"Java": (
["**/*.java"],
["**/*Test.java", "**/*Tests.java", "**/test/**"],
),
"Rust": (
["**/*.rs"],
["**/*.rs"], # Rust tests are in same file
),
}

if language and language in patterns_map:
return patterns_map[language]

# Default fallback patterns for unknown languages
return (
["**/*.py", "**/*.ts", "**/*.tsx", "**/*.js", "**/*.go"],
["**/tests/**", "**/*_test.py", "**/*.spec.ts", "**/*.test.js", "**/*.test.ts", "**/*.test.jsx"],
)


def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]:
"""Return a minimal, deterministic set of diff-aware rules."""
"""
Return a minimal, deterministic set of diff-aware rules.

Note: These recommendations use repository-specific patterns when available.
For more advanced use cases like restricting specific authors from specific paths
(e.g., preventing a member from modifying /auth), the rule engine would need:
1. A combined validator that checks both author AND file patterns, OR
2. Support for combining multiple validators with AND/OR logic in a single rule.

Currently, validators like `author_team_is` and `file_patterns` operate independently.
"""
recommendations: list[RuleRecommendation] = []

# Get language-specific patterns based on repository analysis
source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language)

# Require tests when source code changes.
recommendations.append(
RuleRecommendation(
yaml_rule=textwrap.dedent(
"""
f"""
description: "Require tests when code changes"
enabled: true
severity: medium
event_types:
- pull_request
validators:
- type: diff_pattern
parameters:
file_patterns:
- "**/*.py"
- "**/*.ts"
- "**/*.tsx"
- "**/*.js"
- "**/*.go"
- type: related_tests
parameters:
search_paths:
- "**/tests/**"
- "**/*_test.py"
- "**/*.spec.ts"
- "**/*.test.js"
actions:
- type: warn
parameters:
message: "Please include or update tests for code changes."
parameters:
Copy link
Member

@dkargatzis dkargatzis Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these hardcoded parameters limit the range of cases the agent can support? For example, can the agent enforce a rule that prevents a specific member from making changes to a specific path or module (e.g. /auth)? @naaa760

source_patterns:
{chr(10).join(f' - "{pattern}"' for pattern in source_patterns)}
test_patterns:
{chr(10).join(f' - "{pattern}"' for pattern in test_patterns)}
"""
).strip(),
confidence=0.74,
reasoning="Default guardrail for code changes without tests.",
strategy_used="static",
reasoning=f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository.",
strategy_used="hybrid",
)
)

# Require description and linked issue in PR body.
# Require description in PR body.
recommendations.append(
RuleRecommendation(
yaml_rule=textwrap.dedent(
Expand All @@ -151,15 +194,8 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme
severity: low
event_types:
- pull_request
validators:
- type: required_field_in_diff
parameters:
field: "body"
pattern: "(?i)(summary|context|issue)"
actions:
- type: warn
parameters:
message: "Add a short summary and linked issue in the PR body."
parameters:
min_description_length: 50
"""
).strip(),
confidence=0.68,
Expand All @@ -169,40 +205,20 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme
)

# If no CODEOWNERS, suggest one for shared ownership signals.
if not state.repository_features.has_codeowners:
recommendations.append(
RuleRecommendation(
yaml_rule=textwrap.dedent(
"""
description: "Flag missing CODEOWNERS entries"
enabled: true
severity: low
event_types:
- pull_request
validators:
- type: diff_pattern
parameters:
file_patterns:
- "**/*"
actions:
- type: warn
parameters:
message: "Consider adding CODEOWNERS to clarify ownership."
"""
).strip(),
confidence=0.6,
reasoning="Repository lacks CODEOWNERS; gentle nudge to add.",
strategy_used="static",
)
)
# Note: This is informational only - we can't enforce CODEOWNERS creation via validators
# but we can encourage it through the recommendation reasoning.

return recommendations


def _render_rules_yaml(recommendations: list[RuleRecommendation]) -> str:
"""Combine rule YAML snippets into a single YAML document."""
yaml_blocks = [rec.yaml_rule.strip() for rec in recommendations]
return "\n\n---\n\n".join(yaml_blocks)
rules_list = []
for rec in recommendations:
rule_dict = yaml.safe_load(rec.yaml_rule)
if rule_dict:
rules_list.append(rule_dict)
return yaml.dump({"rules": rules_list}, default_flow_style=False, sort_keys=False)


def _default_pr_plan(state: RepositoryAnalysisState) -> PullRequestPlan:
Expand Down
63 changes: 60 additions & 3 deletions src/api/recommendations.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,44 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith

base_sha = await github_client.get_git_ref_sha(repo, base_branch, **auth_ctx)
if not base_sha:
log_structured(
logger,
"base_branch_resolution_failed",
operation="proceed_with_pr",
subject_ids=[repo],
base_branch=base_branch,
error="Unable to resolve base branch SHA",
)
raise HTTPException(status_code=400, detail=f"Unable to resolve base branch '{base_branch}'")

created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx)
if not created_ref:
# Check if branch already exists
existing_branch_sha = await github_client.get_git_ref_sha(repo, request.branch_name, **auth_ctx)
if existing_branch_sha:
# Branch exists - use it
log_structured(
logger,
"branch_exists_or_create_failed",
"branch_already_exists",
operation="proceed_with_pr",
subject_ids=[repo],
branch=request.branch_name,
existing_sha=existing_branch_sha,
)
else:
# Create new branch
created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx)
if not created_ref:
log_structured(
logger,
"branch_creation_failed",
operation="proceed_with_pr",
subject_ids=[repo],
branch=request.branch_name,
base_sha=base_sha,
error="Failed to create branch",
)
raise HTTPException(
status_code=400, detail=f"Failed to create branch '{request.branch_name}'. It may already exist."
)

file_result = await github_client.create_or_update_file(
repo_full_name=repo,
Expand All @@ -160,6 +187,15 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
**auth_ctx,
)
if not file_result:
log_structured(
logger,
"file_creation_failed",
operation="proceed_with_pr",
subject_ids=[repo],
branch=request.branch_name,
file_path=request.file_path,
error="Failed to create or update file",
)
raise HTTPException(status_code=400, detail="Failed to create or update rules file")

pr = await github_client.create_pull_request(
Expand All @@ -171,8 +207,29 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
**auth_ctx,
)
if not pr:
log_structured(
logger,
"pr_creation_failed",
operation="proceed_with_pr",
subject_ids=[repo],
branch=request.branch_name,
base_branch=base_branch,
error="Failed to create pull request",
)
raise HTTPException(status_code=400, detail="Failed to create pull request")

pr_url = pr.get("html_url", "")
if not pr_url:
log_structured(
logger,
"pr_url_missing",
operation="proceed_with_pr",
subject_ids=[repo],
pr_data=pr,
error="PR created but html_url is missing",
)
raise HTTPException(status_code=500, detail="PR was created but URL is missing")

log_structured(
logger,
"proceed_with_pr_completed",
Expand Down
27 changes: 23 additions & 4 deletions src/integrations/github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,17 +768,36 @@ async def create_git_ref(
sha: str,
installation_id: int | None = None,
user_token: str | None = None,
) -> bool:
"""Create a new git ref/branch."""
) -> dict[str, Any] | None:
"""Create a new git ref/branch. Returns ref data if successful, None if failed."""
headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token)
if not headers:
return False
return None
url = f"{config.github.api_base_url}/repos/{repo_full_name}/git/refs"
ref_clean = ref.removeprefix("refs/heads/") if ref.startswith("refs/heads/") else ref
payload = {"ref": f"refs/heads/{ref_clean}", "sha": sha}
session = await self._get_session()
async with session.post(url, headers=headers, json=payload) as response:
return response.status in (200, 201)
if response.status in (200, 201):
return await response.json()
# Branch might already exist - check if it exists and points to the same SHA
if response.status == 422:
error_data = await response.json()
if "already exists" in error_data.get("message", "").lower():
# Branch exists - verify it's the same SHA
existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token)
if existing_ref == sha:
logger.info(f"Branch {ref_clean} already exists with same SHA, continuing")
return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}}
Comment on lines +784 to +791

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There is a bug in the error handling logic. If the response status is 422, await response.json() is called. However, if the idempotency check fails (i.e., the branch exists with a different SHA), the code falls through to line 792 and attempts to read the response body again with await response.text(), which will fail because the body has already been consumed. This leads to incomplete error logs.

To fix this, you should handle the non-idempotent 422 error case completely within its block by logging the error_data and returning None, preventing the fall-through.

Suggested change
if response.status == 422:
error_data = await response.json()
if "already exists" in error_data.get("message", "").lower():
# Branch exists - verify it's the same SHA
existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token)
if existing_ref == sha:
logger.info(f"Branch {ref_clean} already exists with same SHA, continuing")
return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}}
if response.status == 422:
error_data = await response.json()
if "already exists" in error_data.get("message", "").lower():
# Branch exists - verify it's the same SHA
existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token)
if existing_ref == sha:
logger.info(f"Branch {ref_clean} already exists with same SHA, continuing")
return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}}
logger.error(f"Failed to create branch {ref_clean}: {response.status} - {error_data}")
return None

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this still the case? Should we push a follow up commit to resolve this? @naaa760

# Branch exists but with different SHA - log and return None
logger.error(f"Failed to create branch {ref_clean}: branch exists with different SHA. {error_data}")
return None
# 422 error but not "already exists" - log and return None
logger.error(f"Failed to create branch {ref_clean}: {error_data}")
return None
error_text = await response.text()
logger.error(f"Failed to create branch {ref_clean}: {response.status} - {error_text}")
return None

async def create_or_update_file(
self,
Expand Down