OpenSyndrome · anapaulagomes · May 20, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 18, 2026
diff --git a/README.md b/README.md
@@ -94,6 +94,31 @@ opensyndrome convert --language "Português do Brasil" --edit
 opensyndrome convert --validate
 ```
 
+### Enrich ontology IDs on a JSON definition
+
+The `enrich` command populates `ontology_id` fields on criteria nodes and sets the `@context` to the OpenSyndrome JSON-LD context URL. It queries [EBI OLS4](https://www.ebi.ac.uk/ols4) by default, or [text2term](https://ccb-hms.github.io/ontology-mapper/) as an alternative mapper.
+
+```bash
+# enrich an existing JSON definition (uses OLS4 by default)
+opensyndrome enrich definition.json
+
+# use text2term instead (requires: pip install opensyndrome[text2term])
+opensyndrome enrich definition.json --mapper text2term
+
+# review and adjust the result in an editor before printing
+opensyndrome enrich definition.json --edit
+
+# enrich and validate in one step
+opensyndrome enrich definition.json --validate
+```
+
+You can also enrich directly after conversion:
+
+```bash
+opensyndrome convert -hr "Any person with fever and rash" --enrich-ontology
+opensyndrome convert -hr "Any person with fever and rash" --enrich-ontology --mapper text2term
+```
+
 ### Convert a machine-readable JSON syndrome definition to a human-readable format
 
 ```bash
@@ -118,6 +143,12 @@ To get started with development, you need to have [uv](https://docs.astral.sh/uv
 uv sync
 ```
 
+To include the optional `text2term` mapper (and its `bioregistry` dependency) so the full test suite runs without skips:
+
+```bash
+uv sync --all-extras
+```
+
 ### Generate Ollama-compatible JSON
 
 > You only need to do this if you are a maintainer adding a new OSI schema or updating an existing one.

diff --git a/opensyndrome/cli.py b/opensyndrome/cli.py
@@ -11,6 +11,7 @@
     generate_machine_readable_format,
     generate_human_readable_format,
 )
+from opensyndrome.ontology import enrich_definition, MAPPERS
 from opensyndrome.artifacts import get_schema_filepath, get_definition_dir
 from opensyndrome.validators import validate_machine_readable_format
 from opensyndrome.providers import (
@@ -106,6 +107,18 @@ def wrapper(*args, **kwargs):
     is_flag=True,
     help="Open editor after generation.",
 )
+@click.option(
+    "--enrich-ontology / --no-enrich-ontology",
+    default=False,
+    help="Post-process output to populate ontology IDs.",
+)
+@click.option(
+    "--mapper",
+    type=click.Choice(MAPPERS),
+    default="ols",
+    show_default=True,
+    help="Ontology mapper to use with --enrich-ontology.",
+)
 @click.option(
     "-hr",
     "--human-readable-definition",
@@ -131,6 +144,8 @@ def convert_to_json(
     model,
     language,
     edit,
+    enrich_ontology,
+    mapper,
     human_readable_definition,
     human_readable_definition_file,
     provider,
@@ -156,6 +171,21 @@ def convert_to_json(
         _show_llm_error(exception, provider, model)
         return
 
+    if enrich_ontology:
+        click.echo(click.style("Enriching ontology IDs...", fg="cyan"), err=True)
+
+        def _progress(name, curie):
+            click.echo(
+                click.style(f"  {name} → {curie}"),
+                err=True,
+            )
+
+        machine_readable_definition = enrich_definition(
+            machine_readable_definition,
+            mapper=mapper,
+            verbose_callback=_progress,
+        )
+
     if edit:
         machine_readable_definition_edited = click.edit(
             text=json.dumps(machine_readable_definition, indent=4), extension=".json"
@@ -169,6 +199,42 @@ def convert_to_json(
         validate_machine_readable_format_with_style(machine_readable_definition)
 
 
+@cli.command("enrich")
+@click.argument("json_file", type=click.Path(exists=True))
+@click.option("--edit", is_flag=True, help="Open editor after enrichment.")
+@click.option(
+    "--validate", is_flag=True, help="Validate the JSON file against the schema."
+)
+@click.option(
+    "--mapper",
+    type=click.Choice(MAPPERS),
+    default="ols",
+    show_default=True,
+    help="Ontology mapper to use.",
+)
+def enrich_json(json_file, edit, validate, mapper):
+    """Populate ontology IDs on an existing JSON definition."""
+    definition = json.loads(Path(json_file).read_text())
+    click.echo(click.style("Enriching ontology IDs...", fg="cyan"), err=True)
+
+    def _progress(name, curie):
+        click.echo(click.style(f"  {name} → {curie}"), err=True)
+
+    definition = enrich_definition(
+        definition, mapper=mapper, verbose_callback=_progress
+    )
+
+    if edit:
+        edited = click.edit(text=json.dumps(definition, indent=4), extension=".json")
+        if edited:
+            definition = json.loads(edited)
+
+    click.echo(color_json(definition))
+
+    if validate:
+        validate_machine_readable_format_with_style(definition)
+
+
 @cli.command("humanize")
 @click.argument("json_file", type=click.Path(exists=True))
 @click.option(

diff --git a/opensyndrome/ontology.py b/opensyndrome/ontology.py
@@ -0,0 +1,196 @@
+import logging
+from typing import Literal, get_args
+
+import requests
+from ols_client import EBIClient
+
+logger = logging.getLogger(__name__)
+
+OPENSYNDROME_CONTEXT_URL = "https://opensyndrome.org/schema/v1/context.jsonld"
+SKIP_TYPES = {"criterion", "demographic_criteria"}
+TEXT2TERM_MIN_SCORE = 0.5
+
+ols = EBIClient()
+
+CRITERION_TYPE_ONTOLOGIES = {
+    "symptom": ["mondo", "hp"],
+    "diagnosis": ["mondo", "efo"],
+    "syndrome": ["mondo", "efo"],
+    "diagnostic_test": ["loinc", "obi"],
+    "epidemiological_history": ["hp", "efo"],
+    "professional_judgment": ["hp", "efo"],
+}
+
+type Mapper = Literal["ols", "text2term"]
+MAPPERS: tuple[Mapper, ...] = get_args(Mapper.__value__)
+
+
+def _pick_best(docs: list[dict], name: str) -> str | None:
+    for doc in docs:
+        if doc.get("label", "").lower() == name.lower():
+            return doc["obo_id"]
+    return None
+
+
+def _search_ols(queries: dict[str, list[str]], timeout: int = 10) -> dict[str, str]:
+    """Map names to CURIEs via OLS4 (one HTTP call per name; per-name failures are isolated)."""
+    result: dict[str, str] = {}
+    for name, ontologies in queries.items():
+        base_params = {
+            "q": name,
+            "ontology": ",".join(ontologies),
+            "rows": 5,
+            "type": "class",
+        }
+        try:
+            payload = ols.get_json("/search", params=base_params, timeout=timeout)
+            docs = payload.get("response", {}).get("docs", [])
+            curie = _pick_best(docs, name)
+            if not curie:
+                payload = ols.get_json(
+                    "/search",
+                    params={**base_params, "queryFields": "label,synonym"},
+                    timeout=timeout,
+                )
+                docs = payload.get("response", {}).get("docs", [])
+                curie = _pick_best(docs, name)
+            if curie:
+                result[name] = curie
+        except requests.RequestException as exc:
+            logger.warning("OLS4 search failed for %r: %s", name, exc)
+    return result
+
+
+def _iri_to_curie(iri: str) -> str | None:
+    """Convert an ontology IRI to a CURIE. e.g. http://purl.obolibrary.org/obo/HP_0001945 → HP:0001945"""
+    import bioregistry
+
+    try:
+        prefix, identifier = bioregistry.parse_iri(iri, use_preferred=True)
+    except TypeError:
+        # bioregistry v0.11.35 raises on unparseable IRIs instead of returning (None, None).
+        return None
+    if not prefix:
+        return None
+    # bioregistry uses "obo" as a generic fallback for unrecognized OBO PURLs
+    # e.g. LOINC_2345-7, UNKNOWN_42
+    # reject to avoid producing OBO:* CURIEs
+    if prefix.lower() == "obo" and "_" in identifier:
+        return None
+    return f"{prefix.upper()}:{identifier}"
+
+
+def _search_text2term(
+    queries: dict[str, list[str]],
+    min_score: float = TEXT2TERM_MIN_SCORE,
+) -> dict[str, str]:
+    """Batch-map names to CURIEs via text2term, loading each ontology at most once."""
+    try:
+        import text2term
+    except ImportError:
+        raise ImportError(
+            "text2term is not installed. Run: pip install text2term"
+        ) from None
+
+    names_per_ontology: dict[str, set[str]] = {}
+    for name, ontologies in queries.items():
+        for ontology in ontologies:
+            names_per_ontology.setdefault(ontology.upper(), set()).add(name)
+
+    matches_per_ontology: dict[str, dict[str, str]] = {}
+    for ontology, names in names_per_ontology.items():
+        try:
+            use_cache = text2term.cache_exists(ontology)
+            df = text2term.map_terms(
+                source_terms=sorted(names),
+                target_ontology=ontology,
+                max_mappings=1,
+                min_score=min_score,
+                use_cache=use_cache,
+                excl_deprecated=True,
+            )
+            ontology_matches: dict[str, str] = {}
+            for _, row in df.iterrows():
+                curie = _iri_to_curie(row["Mapped Term IRI"])
+                if curie:
+                    ontology_matches[row["Source Term"]] = curie
+            matches_per_ontology[ontology] = ontology_matches
+        except (requests.RequestException, OSError, RuntimeError, ValueError) as exc:
+            logger.warning(
+                "text2term batch failed for %s (%d terms): %s",
+                ontology,
+                len(names),
+                exc,
+            )
+
+    result: dict[str, str] = {}
+    for name, ontologies in queries.items():
+        for ontology in ontologies:
+            curie = matches_per_ontology.get(ontology.upper(), {}).get(name)
+            if curie:
+                result[name] = curie
+                break
+    return result
+
+
+def _collect_enrichable(criteria: list[dict]) -> list[dict]:
+    result = []
+    for criterion in criteria:
+        result.extend(_collect_enrichable(criterion.get("values") or []))
+        type_ = criterion.get("type")
+        if not type_ or type_ in SKIP_TYPES:
+            continue
+        if criterion.get("ontology_id"):
+            continue
+        if not (criterion.get("name") or "").strip():
+            continue
+        result.append(criterion)
+    return result
+
+
+def _apply_mapping(
+    criteria: list[dict],
+    mapping: dict[str, str],
+    verbose_callback=None,
+) -> None:
+    for criterion in criteria:
+        _apply_mapping(criterion.get("values", []), mapping, verbose_callback)
+        if criterion.get("type") in SKIP_TYPES:
+            continue
+        name = criterion.get("name", "")
+        if name in mapping:
+            criterion["ontology_id"] = mapping[name]
+            if verbose_callback:
+                verbose_callback(name, mapping[name])
+
+
+def enrich_definition(
+    definition: dict,
+    *,
+    mapper: Mapper = "ols",
+    verbose_callback=None,
+) -> dict:
+    if mapper not in MAPPERS:
+        raise ValueError(
+            f"Unknown mapper {mapper!r}. Choose from: {', '.join(MAPPERS)}"
+        )
+
+    search_fn = _search_ols if mapper == "ols" else _search_text2term
+
+    inclusion_criteria = definition.get("inclusion_criteria") or []
+    exclusion_criteria = definition.get("exclusion_criteria") or []
+    definition["@context"] = OPENSYNDROME_CONTEXT_URL
+    enrichable = _collect_enrichable(inclusion_criteria + exclusion_criteria)
+    if not enrichable:
+        return definition
+
+    queries: dict[str, list[str]] = {
+        c["name"]: CRITERION_TYPE_ONTOLOGIES.get(c["type"], ["hp", "efo", "mondo"])
+        for c in enrichable
+    }
+    mapping = search_fn(queries)
+
+    for criteria_list in [inclusion_criteria, exclusion_criteria]:
+        _apply_mapping(criteria_list, mapping, verbose_callback)
+
+    return definition
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,19 +11,22 @@ dependencies = [
     "pygments>=2.19.1,<3",
     "python-dotenv>=1.0.1,<2",
     "pydantic>=2.10.6,<3",
-    "numpy>=2.3.1,<3",
     "pyarrow>=20.0.0,<21",
     "ollama>=0.5.1,<0.6",
     "requests>=2.32.5",
     "polars>=1.38.1",
     "datamodel-code-generator>=0.55.0",
     "litellm>=1.82.2",
     "instructor>=1.14.5",
+    "ols-client>=0.2.1",
 ]
 
 [project.scripts]
 opensyndrome = "opensyndrome.cli:main"
 
+[project.optional-dependencies]
+text2term = ["text2term>=4.5.0", "bioregistry>=0.11.35"]
+
 [dependency-groups]
 dev = [
     "pre-commit>=4.1.0,<5",