sunlabuiuc · tbirch5 · Apr 20, 2026 · May 4, 2026
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
@@ -246,3 +246,4 @@ Available Datasets
     datasets/pyhealth.datasets.TCGAPRADDataset
     datasets/pyhealth.datasets.splitter
     datasets/pyhealth.datasets.utils
+    datasets/pyhealth.datasets.medlingo
diff --git a/docs/api/datasets/pyhealth.datasets.medlingo.rst b/docs/api/datasets/pyhealth.datasets.medlingo.rst
@@ -0,0 +1,22 @@
+pyhealth.datasets.medlingo
+==========================
+
+Overview
+--------
+
+MedLingo-style dataset for clinical abbreviation expansion.
+
+API Reference
+-------------
+
+.. autoclass:: pyhealth.datasets.MedLingoDataset
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
+References
+----------
+
+MedLingo: A dataset for clinical abbreviation expansion  
+https://arxiv.org/abs/2505.15024
diff --git a/docs/api/models/pyhealth.models.abbreviation_lookup.rst b/docs/api/models/pyhealth.models.abbreviation_lookup.rst
@@ -0,0 +1,7 @@
+pyhealth.models.abbreviation_lookup
+===================================
+
+.. automodule:: pyhealth.models.abbreviation_lookup
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/tasks.rst b/docs/api/tasks.rst
@@ -230,3 +230,5 @@ Available Tasks
     Mutation Pathogenicity (COSMIC) <tasks/pyhealth.tasks.MutationPathogenicityPrediction>
     Cancer Survival Prediction (TCGA) <tasks/pyhealth.tasks.CancerSurvivalPrediction>
     Cancer Mutation Burden (TCGA) <tasks/pyhealth.tasks.CancerMutationBurden>
+    Clinical Abbreviation Expansion <tasks/pyhealth.tasks.clinical_abbreviation>
+    MedLingo Task <tasks/pyhealth.tasks.medlingo_task>
diff --git a/docs/api/tasks/pyhealth.tasks.clinical_abbreviation.rst b/docs/api/tasks/pyhealth.tasks.clinical_abbreviation.rst
@@ -0,0 +1,7 @@
+pyhealth.tasks.clinical_abbreviation
+====================================
+
+.. automodule:: pyhealth.tasks.clinical_abbreviation
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/tasks/pyhealth.tasks.medlingo_task.rst b/docs/api/tasks/pyhealth.tasks.medlingo_task.rst
@@ -0,0 +1,15 @@
+pyhealth.tasks.medlingo_task
+============================
+
+Overview
+--------
+Task wrapper for MedLingo dataset that converts structured records
+into model-ready input/target pairs.
+
+API Reference
+-------------
+
+.. autoclass:: pyhealth.tasks.medlingo_task.MedLingoTask
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/examples/medlingo_clinical_abbreviation_abbreviation_lookup.py b/examples/medlingo_clinical_abbreviation_abbreviation_lookup.py
@@ -0,0 +1,97 @@
+"""
+This example demonstrates an evaluation of the Clinical Abbreviation Task using the MedLingo dataset. 
+
+All samples used in this example are synthetic and defined inline to ensure reproducibility and to avoid
+reliance on external or real clinical datasets.
+
+We perform ablation studies to understand the impact of different input modifications on model performance.
+
+The ablation studies include:
+1. Base abbreviation-only input. 
+2. Ablation with Lowercase formatting. 
+3. Ablation with short clinical context.
+4. Ablation with Noisy formatting. 
+
+Paper:
+    Diagnosing Our Datasets: How Does My Language Model Learn Clinical Information?
+    https://arxiv.org/abs/2505.15024
+
+"""
+
+from pyhealth.datasets.medlingo import MedLingoDataset
+from pyhealth.tasks.clinical_abbreviation import ClinicalAbbreviationTask
+
+SYNTHETIC_MEDLINGO_SAMPLES = [
+    {
+        "abbr": "SOB",
+        "context": "Patient presents with SOB.",
+        "label": "shortness of breath",
+        "source": "synthetic_demo",
+    },
+    {
+        "abbr": "BP",
+        "context": "BP remained stable overnight.",
+        "label": "blood pressure",
+        "source": "synthetic_demo",
+    },
+    {
+        "abbr": "HTN",
+        "context": "History of HTN.",
+        "label": "hypertension",
+        "source": "synthetic_demo",
+    },
+    {
+        "abbr": "CHF",
+        "context": "Known CHF with fluid overload.",
+        "label": "congestive heart failure",
+        "source": "synthetic_demo",
+    },
+    {
+        "abbr": "DM",
+        "context": "History of DM with elevated glucose.",
+        "label": "diabetes mellitus",
+        "source": "synthetic_demo",
+    },
+]
+
+def main() -> None:
+    dataset = MedLingoDataset(samples=SYNTHETIC_MEDLINGO_SAMPLES)
+    records = dataset.process()
+
+    samples = []
+    for record in records:
+        for s in record["medlingo"]:
+            samples.append(s)
+
+    print("=== Base Results: Abbreviation-Only ===")
+    base_task = ClinicalAbbreviationTask(use_context=False)
+    for sample in samples:
+        print(base_task(sample))
+
+    print("\n=== Ablation 1: Lowercase Input ===")
+    for sample in samples:
+        modified = {
+            **sample,
+            "abbr": sample["abbr"].lower(),
+        }
+        print(base_task(modified))
+
+    print("\n=== Ablation 2: Short Clinical Context ===")
+    context_task = ClinicalAbbreviationTask(use_context=True)
+    for sample in samples:
+        print(context_task(sample))
+
+    print("\n=== Ablation 3: Noisy Formatting ===")
+    noise_variants = ["!!!", "???", "..."]
+
+    for sample in samples:
+        for noise in noise_variants:
+            noisy = {
+                **sample,
+                "abbr": sample["abbr"] + noise,
+            }
+            print(base_task(noisy))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/medlingo_demo.py b/examples/medlingo_demo.py
@@ -0,0 +1,16 @@
+from pyhealth.datasets.medlingo import MedLingoDataset
+
+samples = [
+    {
+        "abbr": "SOB",
+        "context": "Patient presents with SOB.",
+        "label": "shortness of breath",
+        "source": "synthetic_demo",
+    }
+]
+
+dataset = MedLingoDataset(samples=samples)
+records = dataset.process()
+
+print("Dataset loaded")
+print(records)
diff --git a/examples/medlingo_full_pipeline.py b/examples/medlingo_full_pipeline.py
@@ -0,0 +1,92 @@
+"""
+This script demonstrates a replication of the MedLingo clinical abbreviation expansion task.
+
+It uses synthetic MedLingo-style samples defined inline, processes them into task-ready format, 
+and evaluates a simple rule-based abbreviation lookup model.
+
+Contributors:
+    Tedra Birch (tbirch2@illinois.edu)
+
+Paper:
+    Diagnosing Our Datasets: How Does My Language Model Learn Clinical Information?
+    https://arxiv.org/abs/2505.15024
+
+"""
+
+from pyhealth.datasets.medlingo import MedLingoDataset
+from pyhealth.tasks.medlingo_task import MedLingoTask
+from pyhealth.models.abbreviation_lookup import AbbreviationLookupModel
+
+
+
+SYNTHETIC_MEDLINGO_SAMPLES = [
+    {
+        "abbr": "SOB",
+        "context": "Patient presents with SOB.",
+        "label": "shortness of breath",
+        "source": "synthetic_demo",
+    },
+    {
+        "abbr": "BP",
+        "context": "BP remained stable overnight.",
+        "label": "blood pressure",
+        "source": "synthetic_demo",
+    },
+    {
+        "abbr": "HTN",
+        "context": "History of HTN.",
+        "label": "hypertension",
+        "source": "synthetic_demo",
+    },
+    {
+        "abbr": "CHF",
+        "context": "Known CHF with fluid overload.",
+        "label": "congestive heart failure",
+        "source": "synthetic_demo",
+    },
+    {
+        "abbr": "DM",
+        "context": "History of DM with elevated glucose.",
+        "label": "diabetes mellitus",
+        "source": "synthetic_demo",
+    },
+]
+
+
+def main() -> None:
+    dataset = MedLingoDataset(samples=SYNTHETIC_MEDLINGO_SAMPLES)
+    records = dataset.process()
+
+    task = MedLingoTask()
+    processed = task.process(records)
+
+    model = AbbreviationLookupModel(normalize=True)
+    model.fit(
+        [
+            {"abbr": item["input"], "label": item["target"]}
+            for item in processed
+        ]
+    )
+
+    correct = 0
+    total = len(processed)
+
+    for item in processed:
+        pred = model.predict(item["input"])
+        if pred == item["target"]:
+            correct += 1
+
+    accuracy = correct / total if total > 0 else 0.0
+
+    print("=== MedLingo Replication Pipeline ===")
+    print(f"Loaded {len(records)} records")
+    print(f"Processed {len(processed)} task samples")
+    print(f"Accuracy: {accuracy:.3f}")
+    print("Example sample:")
+    print(processed[0])
+    print("Example prediction:")
+    print(model.predict(processed[0]['input']))
+
+
+if __name__ == "__main__":
+    main()