@@ -434,29 +434,47 @@ def english_to_lambda(text: str) -> str:
434434 # Build comprehensive reverse lookup (English -> Lambda)
435435 rev = {}
436436
437- # Add all categories
438- for cat in ["entities" , "verbs" , "modifiers" , "time" , "quantifiers" ]:
439- for k , v in ATOMS .get (cat , {}).items ():
440- for word in v ["en" ].lower ().replace ("/" , " " ).split ():
441- word = word .strip ("()" )
442- if word and word not in rev :
443- rev [word ] = k
444-
445- # Add extended vocabulary
446- for k , v in ATOMS .get ("extended" , {}).items ():
447- for word in v ["en" ].lower ().replace ("/" , " " ).split ():
448- word = word .strip ("()" )
449- if word and word not in rev :
450- rev [word ] = k
451-
452- # Add domain atoms with prefixes
437+ def _iter_words (en_value : str ):
438+ """Yield normalized tokens from an English description."""
439+ normalized = en_value .lower ().replace ("/" , " " ).replace ("-" , " " )
440+ for part in normalized .split ():
441+ word = re .sub (r"[^a-z0-9']" , "" , part .strip ("()" )).strip ("'" )
442+ if word :
443+ yield word
444+
445+ def _add_mapping (word : str , token : str ):
446+ if word and word not in rev :
447+ rev [word ] = token
448+
449+ # Iterate all categories defined in ATOMS (except metadata and domains)
450+ for category , entries in ATOMS .items ():
451+ if category in {"version" , "changelog" , "domains" }:
452+ continue
453+ if not isinstance (entries , dict ):
454+ continue
455+ for atom , data in entries .items ():
456+ if atom == "_meta" or not isinstance (data , dict ):
457+ continue
458+ en_value = data .get ("en" )
459+ if not isinstance (en_value , str ):
460+ continue
461+ for word in _iter_words (en_value ):
462+ _add_mapping (word , atom )
463+
464+ # Include domain-specific atoms with domain prefixes
453465 for domain_code , domain_data in ATOMS .get ("domains" , {}).items ():
454- domain_prefix = {"cd" : "c" , "vb" : "v" , "sc" : "s" , "emo" : "e" , "soc" : "o" }.get (domain_code , domain_code )
455- for atom , atom_data in domain_data .get ("atoms" , {}).items ():
456- for word in atom_data ["en" ].lower ().replace ("/" , " " ).split ():
457- word = word .strip ("()" )
458- if word and word not in rev :
459- rev [word ] = f"{ domain_prefix } :{ atom } "
466+ atoms = domain_data .get ("atoms" , {})
467+ if not isinstance (atoms , dict ):
468+ continue
469+ for atom , atom_data in atoms .items ():
470+ if atom == "_meta" or not isinstance (atom_data , dict ):
471+ continue
472+ en_value = atom_data .get ("en" )
473+ if not isinstance (en_value , str ):
474+ continue
475+ token = f"{ domain_code } :{ atom } "
476+ for word in _iter_words (en_value ):
477+ _add_mapping (word , token )
460478
461479 # Add common word mappings (these override domain atoms when more specific)
462480 rev .update ({
0 commit comments