anchapin · anchapin · May 31, 2026 · sourcery-ai · May 31, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -13,3 +13,6 @@
 ## 2025-02-18 - Regex Pre-compilation in Hot Paths
 **Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup.
 **Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants.
+## 2025-02-18 - Regex Pre-compilation and Hoisting in ATS Generator
+**Learning:** Re-compiling regexes and creating large lists/sets (like `_ACTION_VERBS` or `r"\d+%|\$\d+|\d+\s*(users|customers|projects)"`) inside frequently called loops or functions causes unnecessary object creation and compilation overhead. Furthermore, `.lower()` on large strings for entire document parsing just for case-insensitive checks is inefficient and can cause matching bugs (e.g. acronym matching).
+**Action:** Always pre-compile regexes and hoist static lists to module-level constants. Use `re.IGNORECASE` when case-insensitive matching is needed instead of eagerly lowercasing the entire large input string if the original case is still required for other patterns.
diff --git a/cli/generators/ats_generator.py b/cli/generators/ats_generator.py
@@ -37,6 +37,29 @@
 
 console = Console()
 
+# Pre-compiled regex patterns for performance and bug fixes
+_TABLE_PATTERN = re.compile(r"\|[^\n]+\|")
+_SPECIAL_CHARS_PATTERN = re.compile(r"[^a-zA-Z0-9\s\-\.\,\@\(\)\#\/]")
+_EMAIL_PATTERN = re.compile(r"^[^@]+@[^@]+\.[^@]+$")
+_PHONE_PATTERN = re.compile(r"\d")
+_QUANTIFIABLE_PATTERN = re.compile(r"\d+%|\$\d+|\d+\s*(users|customers|projects)", re.IGNORECASE)
+_ACRONYM_PATTERN = re.compile(r"\b[A-Z]{2,4}\b")
+
+# Tuple of action verbs (avoids list allocation)
+_ACTION_VERBS = (
+    "developed",
+    "implemented",
+    "built",
+    "created",
+    "designed",
+    "managed",
+    "led",
+    "increased",
+    "decreased",
+    "improved",
+    "achieved",
+)
+
 
 @dataclass
 class ATSCategoryScore:
@@ -214,8 +237,8 @@ def _check_format_parsing(self, resume_data: Dict[str, Any]) -> ATSCategoryScore
 
         # Check for complex formatting indicators
         all_text = self._get_all_text(resume_data)
-        has_tables = bool(re.search(r"\|[^\n]+\|", all_text))
-        has_special_chars = len(re.findall(r"[^a-zA-Z0-9\s\-\.\,\@\(\)\#\/]", all_text))
+        has_tables = bool(_TABLE_PATTERN.search(all_text))
+        has_special_chars = len(_SPECIAL_CHARS_PATTERN.findall(all_text))
 
         if not has_tables:
             details.append("No tables detected (ATS-friendly)")
@@ -349,15 +372,15 @@ def _check_contact_info(self, resume_data: Dict[str, Any]) -> ATSCategoryScore:
 
         # Check required contact fields
         contact_fields = {
-            "email": (contact.get("email"), 5, r"^[^@]+@[^@]+\.[^@]+$"),
-            "phone": (contact.get("phone"), 5, r"\d"),
+            "email": (contact.get("email"), 5, _EMAIL_PATTERN),
+            "phone": (contact.get("phone"), 5, _PHONE_PATTERN),
             "location": (contact.get("location"), 5, None),  # Just presence check
         }
 
         for field_name, (field_value, field_points, pattern) in contact_fields.items():
             if field_value:
                 if pattern:
-                    if re.search(pattern, field_value):
+                    if pattern.search(field_value):
                         points += field_points
                         details.append(f"✓ {field_name.capitalize()} present and valid")
                     else:
@@ -392,22 +415,10 @@ def _check_readability(self, resume_data: Dict[str, Any]) -> ATSCategoryScore:
         suggestions = []
 
         all_text = self._get_all_text(resume_data)
+        all_text_lower = all_text.lower()
 
         # Check for action verbs in experience bullets
-        action_verbs = [
-            "developed",
-            "implemented",
-            "built",
-            "created",
-            "designed",
-            "managed",
-            "led",
-            "increased",
-            "decreased",
-            "improved",
-            "achieved",
-        ]
-        action_verb_count = sum(1 for verb in action_verbs if verb in all_text.lower())
+        action_verb_count = sum(1 for verb in _ACTION_VERBS if verb in all_text_lower)
 
         if action_verb_count >= 3:
             details.append(f"✓ Uses action verbs ({action_verb_count} found)")
@@ -416,7 +427,7 @@ def _check_readability(self, resume_data: Dict[str, Any]) -> ATSCategoryScore:
             suggestions.append("Use more action verbs (e.g., developed, implemented)")
 
         # Check for quantifiable achievements
-        has_numbers = bool(re.search(r"\d+%|\$\d+|\d+\s*(users|customers|projects)", all_text))
+        has_numbers = bool(_QUANTIFIABLE_PATTERN.search(all_text))
         if has_numbers:
             details.append("✓ Includes quantifiable achievements")
         else:
@@ -425,8 +436,7 @@ def _check_readability(self, resume_data: Dict[str, Any]) -> ATSCategoryScore:
 
         # Check for acronyms (should be minimal or defined)
         # This is a simple heuristic
-        acronym_pattern = r"\b[A-Z]{2,4}\b"
-        acronyms = re.findall(acronym_pattern, all_text)
+        acronyms = _ACRONYM_PATTERN.findall(all_text)
         if len(acronyms) < 10:
             details.append(f"✓ Minimal acronyms ({len(acronyms)} found)")
         else:
@@ -466,7 +476,7 @@ def extract_value(value):
                     extract_value(v)
 
         extract_value(resume_data)
-        return " ".join(text_parts).lower()
+        return " ".join(text_parts)
 
     def _extract_job_keywords(self, job_description: str) -> List[str]:
         """

diff --git a/tests/test_ats_generator.py b/tests/test_ats_generator.py
@@ -356,8 +356,8 @@ def test_get_all_text_from_nested_dict(self, ats_generator):
 
         text = ats_generator._get_all_text(resume_data)
 
-        # Text is lowercased
-        assert "john" in text
-        assert "tech corp" in text
-        assert "built apis" in text
-        assert "python" in text
+        # Text should match original casing
+        assert "John" in text
+        assert "Tech Corp" in text
+        assert "Built APIs" in text
+        assert "Python" in text