Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@
## 2025-02-18 - Regex Pre-compilation in Hot Paths
**Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup.
**Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants.
## 2025-02-18 - Regex Pre-compilation and Hoisting in ATS Generator
**Learning:** Re-compiling regexes and creating large lists/sets (like `_ACTION_VERBS` or `r"\d+%|\$\d+|\d+\s*(users|customers|projects)"`) inside frequently called loops or functions causes unnecessary object creation and compilation overhead. Furthermore, `.lower()` on large strings for entire document parsing just for case-insensitive checks is inefficient and can cause matching bugs (e.g. acronym matching).
**Action:** Always pre-compile regexes and hoist static lists to module-level constants. Use `re.IGNORECASE` when case-insensitive matching is needed instead of eagerly lowercasing the entire large input string if the original case is still required for other patterns.
56 changes: 33 additions & 23 deletions cli/generators/ats_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,29 @@

console = Console()

# Pre-compiled regex patterns for performance and bug fixes
_TABLE_PATTERN = re.compile(r"\|[^\n]+\|")
_SPECIAL_CHARS_PATTERN = re.compile(r"[^a-zA-Z0-9\s\-\.\,\@\(\)\#\/]")
_EMAIL_PATTERN = re.compile(r"^[^@]+@[^@]+\.[^@]+$")
_PHONE_PATTERN = re.compile(r"\d")
_QUANTIFIABLE_PATTERN = re.compile(r"\d+%|\$\d+|\d+\s*(users|customers|projects)", re.IGNORECASE)
_ACRONYM_PATTERN = re.compile(r"\b[A-Z]{2,4}\b")

# Tuple of action verbs (avoids list allocation)
_ACTION_VERBS = (
"developed",
"implemented",
"built",
"created",
"designed",
"managed",
"led",
"increased",
"decreased",
"improved",
"achieved",
)


@dataclass
class ATSCategoryScore:
Expand Down Expand Up @@ -214,8 +237,8 @@ def _check_format_parsing(self, resume_data: Dict[str, Any]) -> ATSCategoryScore

# Check for complex formatting indicators
all_text = self._get_all_text(resume_data)
has_tables = bool(re.search(r"\|[^\n]+\|", all_text))
has_special_chars = len(re.findall(r"[^a-zA-Z0-9\s\-\.\,\@\(\)\#\/]", all_text))
has_tables = bool(_TABLE_PATTERN.search(all_text))
has_special_chars = len(_SPECIAL_CHARS_PATTERN.findall(all_text))

if not has_tables:
details.append("No tables detected (ATS-friendly)")
Expand Down Expand Up @@ -349,15 +372,15 @@ def _check_contact_info(self, resume_data: Dict[str, Any]) -> ATSCategoryScore:

# Check required contact fields
contact_fields = {
"email": (contact.get("email"), 5, r"^[^@]+@[^@]+\.[^@]+$"),
"phone": (contact.get("phone"), 5, r"\d"),
"email": (contact.get("email"), 5, _EMAIL_PATTERN),
"phone": (contact.get("phone"), 5, _PHONE_PATTERN),
"location": (contact.get("location"), 5, None), # Just presence check
}

for field_name, (field_value, field_points, pattern) in contact_fields.items():
if field_value:
if pattern:
if re.search(pattern, field_value):
if pattern.search(field_value):
points += field_points
details.append(f"βœ“ {field_name.capitalize()} present and valid")
else:
Expand Down Expand Up @@ -392,22 +415,10 @@ def _check_readability(self, resume_data: Dict[str, Any]) -> ATSCategoryScore:
suggestions = []

all_text = self._get_all_text(resume_data)
all_text_lower = all_text.lower()

# Check for action verbs in experience bullets
action_verbs = [
"developed",
"implemented",
"built",
"created",
"designed",
"managed",
"led",
"increased",
"decreased",
"improved",
"achieved",
]
action_verb_count = sum(1 for verb in action_verbs if verb in all_text.lower())
action_verb_count = sum(1 for verb in _ACTION_VERBS if verb in all_text_lower)

if action_verb_count >= 3:
details.append(f"βœ“ Uses action verbs ({action_verb_count} found)")
Expand All @@ -416,7 +427,7 @@ def _check_readability(self, resume_data: Dict[str, Any]) -> ATSCategoryScore:
suggestions.append("Use more action verbs (e.g., developed, implemented)")

# Check for quantifiable achievements
has_numbers = bool(re.search(r"\d+%|\$\d+|\d+\s*(users|customers|projects)", all_text))
has_numbers = bool(_QUANTIFIABLE_PATTERN.search(all_text))
if has_numbers:
details.append("βœ“ Includes quantifiable achievements")
else:
Expand All @@ -425,8 +436,7 @@ def _check_readability(self, resume_data: Dict[str, Any]) -> ATSCategoryScore:

# Check for acronyms (should be minimal or defined)
# This is a simple heuristic
acronym_pattern = r"\b[A-Z]{2,4}\b"
acronyms = re.findall(acronym_pattern, all_text)
acronyms = _ACRONYM_PATTERN.findall(all_text)
if len(acronyms) < 10:
details.append(f"βœ“ Minimal acronyms ({len(acronyms)} found)")
else:
Expand Down Expand Up @@ -466,7 +476,7 @@ def extract_value(value):
extract_value(v)

extract_value(resume_data)
return " ".join(text_parts).lower()
return " ".join(text_parts)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): Changing _get_all_text to return non-lowercased text may impact other callers that relied on lowercase

This now returns original casing instead of lowercase. While _check_readability was updated to lowercase locally, other callers may still rely on a lowercased result (e.g., for case-insensitive matching). Please review other call sites and add explicit .lower() where needed to avoid subtle behavior changes.


def _extract_job_keywords(self, job_description: str) -> List[str]:
"""
Expand Down
10 changes: 5 additions & 5 deletions tests/test_ats_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,8 @@ def test_get_all_text_from_nested_dict(self, ats_generator):

text = ats_generator._get_all_text(resume_data)

# Text is lowercased
assert "john" in text
assert "tech corp" in text
assert "built apis" in text
assert "python" in text
# Text should match original casing
assert "John" in text
assert "Tech Corp" in text
assert "Built APIs" in text
assert "Python" in text
Loading