warestack · dkargatzis · Dec 19, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 19, 2025
@@ -99,49 +99,92 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non
     )
 
 
+def _get_language_specific_patterns(language: str | None) -> tuple[list[str], list[str]]:
+    """
+    Get source and test patterns based on repository language.
+
+    Returns:
+        Tuple of (source_patterns, test_patterns) lists
+    """
+    # Language-specific patterns
+    patterns_map: dict[str, tuple[list[str], list[str]]] = {
+        "Python": (
+            ["**/*.py"],
+            ["**/tests/**", "**/*_test.py", "**/test_*.py", "**/*.test.py"],
+        ),
+        "TypeScript": (
+            ["**/*.ts", "**/*.tsx"],
+            ["**/*.spec.ts", "**/*.test.ts", "**/tests/**"],
+        ),
+        "JavaScript": (
+            ["**/*.js", "**/*.jsx"],
+            ["**/*.test.js", "**/*.spec.js", "**/tests/**"],
+        ),
+        "Go": (
+            ["**/*.go"],
+            ["**/*_test.go", "**/*.test.go"],
+        ),
+        "Java": (
+            ["**/*.java"],
+            ["**/*Test.java", "**/*Tests.java", "**/test/**"],
+        ),
+        "Rust": (
+            ["**/*.rs"],
+            ["**/*.rs"],  # Rust tests are in same file
+        ),
+    }
+
+    if language and language in patterns_map:
+        return patterns_map[language]
+
+    # Default fallback patterns for unknown languages
+    return (
+        ["**/*.py", "**/*.ts", "**/*.tsx", "**/*.js", "**/*.go"],
+        ["**/tests/**", "**/*_test.py", "**/*.spec.ts", "**/*.test.js", "**/*.test.ts", "**/*.test.jsx"],
+    )
+
+
 def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]:
-    """Return a minimal, deterministic set of diff-aware rules."""
+    """
+    Return a minimal, deterministic set of diff-aware rules.
+
+    Note: These recommendations use repository-specific patterns when available.
+    For more advanced use cases like restricting specific authors from specific paths
+    (e.g., preventing a member from modifying /auth), the rule engine would need:
+    1. A combined validator that checks both author AND file patterns, OR
+    2. Support for combining multiple validators with AND/OR logic in a single rule.
+
+    Currently, validators like `author_team_is` and `file_patterns` operate independently.
+    """
     recommendations: list[RuleRecommendation] = []
 
+    # Get language-specific patterns based on repository analysis
+    source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language)
+
     # Require tests when source code changes.
     recommendations.append(
         RuleRecommendation(
             yaml_rule=textwrap.dedent(
-                """
+                f"""
                 description: "Require tests when code changes"
                 enabled: true
                 severity: medium
                 event_types:
                   - pull_request
-                validators:
-                  - type: diff_pattern
-                    parameters:
-                      file_patterns:
-                        - "**/*.py"
-                        - "**/*.ts"
-                        - "**/*.tsx"
-                        - "**/*.js"
-                        - "**/*.go"
-                  - type: related_tests
-                    parameters:
-                      search_paths:
-                        - "**/tests/**"
-                        - "**/*_test.py"
-                        - "**/*.spec.ts"
-                        - "**/*.test.js"
-                actions:
-                  - type: warn
-                    parameters:
-                      message: "Please include or update tests for code changes."
+                parameters:
+                  source_patterns:
+{chr(10).join(f'                    - "{pattern}"' for pattern in source_patterns)}
+                  test_patterns:
+{chr(10).join(f'                    - "{pattern}"' for pattern in test_patterns)}
                 """
             ).strip(),
             confidence=0.74,
-            reasoning="Default guardrail for code changes without tests.",
-            strategy_used="static",
+            reasoning=f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository.",
+            strategy_used="hybrid",
         )
     )
 
-    # Require description and linked issue in PR body.
+    # Require description in PR body.
     recommendations.append(
         RuleRecommendation(
             yaml_rule=textwrap.dedent(
@@ -151,15 +194,8 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme
                 severity: low
                 event_types:
                   - pull_request
-                validators:
-                  - type: required_field_in_diff
-                    parameters:
-                      field: "body"
-                      pattern: "(?i)(summary|context|issue)"
-                actions:
-                  - type: warn
-                    parameters:
-                      message: "Add a short summary and linked issue in the PR body."
+                parameters:
+                  min_description_length: 50
                 """
             ).strip(),
             confidence=0.68,
@@ -169,40 +205,20 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme
     )
 
     # If no CODEOWNERS, suggest one for shared ownership signals.
-    if not state.repository_features.has_codeowners:
-        recommendations.append(
-            RuleRecommendation(
-                yaml_rule=textwrap.dedent(
-                    """
-                    description: "Flag missing CODEOWNERS entries"
-                    enabled: true
-                    severity: low
-                    event_types:
-                      - pull_request
-                    validators:
-                      - type: diff_pattern
-                        parameters:
-                          file_patterns:
-                            - "**/*"
-                    actions:
-                      - type: warn
-                        parameters:
-                          message: "Consider adding CODEOWNERS to clarify ownership."
-                    """
-                ).strip(),
-                confidence=0.6,
-                reasoning="Repository lacks CODEOWNERS; gentle nudge to add.",
-                strategy_used="static",
-            )
-        )
+    # Note: This is informational only - we can't enforce CODEOWNERS creation via validators
+    # but we can encourage it through the recommendation reasoning.
 
     return recommendations
 
 
 def _render_rules_yaml(recommendations: list[RuleRecommendation]) -> str:
     """Combine rule YAML snippets into a single YAML document."""
-    yaml_blocks = [rec.yaml_rule.strip() for rec in recommendations]
-    return "\n\n---\n\n".join(yaml_blocks)
+    rules_list = []
+    for rec in recommendations:
+        rule_dict = yaml.safe_load(rec.yaml_rule)
+        if rule_dict:
+            rules_list.append(rule_dict)
+    return yaml.dump({"rules": rules_list}, default_flow_style=False, sort_keys=False)
 
 
 def _default_pr_plan(state: RepositoryAnalysisState) -> PullRequestPlan:

@@ -139,17 +139,44 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
 
     base_sha = await github_client.get_git_ref_sha(repo, base_branch, **auth_ctx)
     if not base_sha:
+        log_structured(
+            logger,
+            "base_branch_resolution_failed",
+            operation="proceed_with_pr",
+            subject_ids=[repo],
+            base_branch=base_branch,
+            error="Unable to resolve base branch SHA",
+        )
         raise HTTPException(status_code=400, detail=f"Unable to resolve base branch '{base_branch}'")
 
-    created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx)
-    if not created_ref:
+    # Check if branch already exists
+    existing_branch_sha = await github_client.get_git_ref_sha(repo, request.branch_name, **auth_ctx)
+    if existing_branch_sha:
+        # Branch exists - use it
         log_structured(
             logger,
-            "branch_exists_or_create_failed",
+            "branch_already_exists",
             operation="proceed_with_pr",
             subject_ids=[repo],
             branch=request.branch_name,
+            existing_sha=existing_branch_sha,
         )
+    else:
+        # Create new branch
+        created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx)
+        if not created_ref:
+            log_structured(
+                logger,
+                "branch_creation_failed",
+                operation="proceed_with_pr",
+                subject_ids=[repo],
+                branch=request.branch_name,
+                base_sha=base_sha,
+                error="Failed to create branch",
+            )
+            raise HTTPException(
+                status_code=400, detail=f"Failed to create branch '{request.branch_name}'. It may already exist."
+            )
 
     file_result = await github_client.create_or_update_file(
         repo_full_name=repo,
@@ -160,6 +187,15 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
         **auth_ctx,
     )
     if not file_result:
+        log_structured(
+            logger,
+            "file_creation_failed",
+            operation="proceed_with_pr",
+            subject_ids=[repo],
+            branch=request.branch_name,
+            file_path=request.file_path,
+            error="Failed to create or update file",
+        )
         raise HTTPException(status_code=400, detail="Failed to create or update rules file")
 
     pr = await github_client.create_pull_request(
@@ -171,8 +207,29 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
         **auth_ctx,
     )
     if not pr:
+        log_structured(
+            logger,
+            "pr_creation_failed",
+            operation="proceed_with_pr",
+            subject_ids=[repo],
+            branch=request.branch_name,
+            base_branch=base_branch,
+            error="Failed to create pull request",
+        )
         raise HTTPException(status_code=400, detail="Failed to create pull request")
 
+    pr_url = pr.get("html_url", "")
+    if not pr_url:
+        log_structured(
+            logger,
+            "pr_url_missing",
+            operation="proceed_with_pr",
+            subject_ids=[repo],
+            pr_data=pr,
+            error="PR created but html_url is missing",
+        )
+        raise HTTPException(status_code=500, detail="PR was created but URL is missing")
+
     log_structured(
         logger,
         "proceed_with_pr_completed",

@@ -768,17 +768,36 @@ async def create_git_ref(
         sha: str,
         installation_id: int | None = None,
         user_token: str | None = None,
-    ) -> bool:
-        """Create a new git ref/branch."""
+    ) -> dict[str, Any] | None:
+        """Create a new git ref/branch. Returns ref data if successful, None if failed."""
         headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token)
         if not headers:
-            return False
+            return None
         url = f"{config.github.api_base_url}/repos/{repo_full_name}/git/refs"
         ref_clean = ref.removeprefix("refs/heads/") if ref.startswith("refs/heads/") else ref
         payload = {"ref": f"refs/heads/{ref_clean}", "sha": sha}
         session = await self._get_session()
         async with session.post(url, headers=headers, json=payload) as response:
-            return response.status in (200, 201)
+            if response.status in (200, 201):
+                return await response.json()
+            # Branch might already exist - check if it exists and points to the same SHA
+            if response.status == 422:
+                error_data = await response.json()
+                if "already exists" in error_data.get("message", "").lower():
+                    # Branch exists - verify it's the same SHA
+                    existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token)
+                    if existing_ref == sha:
+                        logger.info(f"Branch {ref_clean} already exists with same SHA, continuing")
+                        return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}}
-            if response.status == 422:
-                error_data = await response.json()
-                if "already exists" in error_data.get("message", "").lower():
-                    # Branch exists - verify it's the same SHA
-                    existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token)
-                    if existing_ref == sha:
-                        logger.info(f"Branch {ref_clean} already exists with same SHA, continuing")
-                        return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}}
+            if response.status == 422:
+                error_data = await response.json()
+                if "already exists" in error_data.get("message", "").lower():
+                    # Branch exists - verify it's the same SHA
+                    existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token)
+                    if existing_ref == sha:
+                        logger.info(f"Branch {ref_clean} already exists with same SHA, continuing")
+                        return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}}
+                logger.error(f"Failed to create branch {ref_clean}: {response.status} - {error_data}")
+                return None
-            if response.status == 422:
-                error_data = await response.json()
-                if "already exists" in error_data.get("message", "").lower():
-                    # Branch exists - verify it's the same SHA
-                    existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token)
-                    if existing_ref == sha:
-                        logger.info(f"Branch {ref_clean} already exists with same SHA, continuing")
-                        return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}}
+            if response.status == 422:
+                error_data = await response.json()
+                if "already exists" in error_data.get("message", "").lower():
+                    # Branch exists - verify it's the same SHA
+                    existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token)
+                    if existing_ref == sha:
+                        logger.info(f"Branch {ref_clean} already exists with same SHA, continuing")
+                        return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}}
+                logger.error(f"Failed to create branch {ref_clean}: {response.status} - {error_data}")
+                return None
+                    # Branch exists but with different SHA - log and return None
+                    logger.error(f"Failed to create branch {ref_clean}: branch exists with different SHA. {error_data}")
+                    return None
+                # 422 error but not "already exists" - log and return None
+                logger.error(f"Failed to create branch {ref_clean}: {error_data}")
+                return None
+            error_text = await response.text()
+            logger.error(f"Failed to create branch {ref_clean}: {response.status} - {error_text}")
+            return None
 
     async def create_or_update_file(
         self,