Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 2 additions & 77 deletions notebooks/atai_2026/tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -217,82 +217,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import Literal\n",
"from mellea.core import ModelOutputThunk\n",
"from mellea.stdlib.components.docs.richdocument import Table\n",
"from mellea.stdlib.requirements import req, simple_validate\n",
"import pydantic\n",
"\n",
"\n",
"class BOMEntry(pydantic.BaseModel):\n",
" item: str\n",
" quantity: int | str\n",
" notes: str\n",
" category: Literal[\"lumber\", \"windows\", \"doors\", \"other\"]\n",
"\n",
"class BOM(pydantic.BaseModel):\n",
" items: list[BOMEntry]\n",
"\n",
"def _bom_entry_is_well_formed(entry: BOMEntry) -> bool:\n",
" \"\"\"Checks that the BOMEntry quantity is either an integer or 'allowance'.\"\"\"\n",
" try:\n",
" int(entry.quantity)\n",
" return True\n",
" except ValueError as e:\n",
" if entry.quantity.lower() == \"allowance\":\n",
" return True\n",
" return False\n",
"\n",
"def _bom_entries_are_well_formed(s: str) -> bool:\n",
" try:\n",
" bom = BOM.model_validate_json(s)\n",
" return all([_bom_entry_is_well_formed(entry) for entry in bom.items])\n",
" except pydantic.ValidationError as e:\n",
" print(f\"Failed on table: {s}\")\n",
" return False\n",
"\n",
"# Filter out tables that are not lists of construction items.\n",
"@mellea.generative\n",
"def is_material_list(table_markdown: str) -> Literal[\"yes\", \"no\"]:\n",
" \"\"\"Determines if the table contains a list of construction items.\"\"\"\n",
"\n",
"async def extract_bom(doc: RichDocument):\n",
" bom_routines = list()\n",
" # Fire off async requests for each table.\n",
" for table in doc.get_tables():\n",
" if is_material_list(m, table_markdown=table.to_markdown()) == \"yes\":\n",
" next_sub_bom = m.ainstruct(\n",
" \"Reformat this table to have four columns: item, quantity, type, and notes (optional).\",\n",
" grounding_context={'table': table.to_markdown()},\n",
" requirements=[\n",
" req(\n",
" \"Quantity row should only contain an integer or Allowance\",\n",
" validation_fn=simple_validate(_bom_entries_are_well_formed)\n",
" ),\n",
" req(\n",
" \"type should be one of: lumber, windows, doors, other\",\n",
" validation_fn=simple_validate(lambda x: True)\n",
" ), # note: this is enforced by the Literal type so no check is required.\n",
" ],\n",
" format=BOM\n",
" )\n",
" bom_routines.append(next_sub_bom)\n",
" \n",
" # wait for all of the async work to finish, then concatenate the results.\n",
" bom_thunks: list[ModelOutputThunk] = [await bom_routine for bom_routine in bom_routines]\n",
" boms = [BOM.model_validate_json(await bom_thunk.avalue()) for bom_thunk in bom_thunks]\n",
" \n",
" # Concatente all of the indiviual BOMs into one large list.\n",
" all_items = []\n",
" for bom in boms:\n",
" all_items.extend(bom.items)\n",
" full_bom = BOM(items=all_items)\n",
" return full_bom\n",
"\n",
"bom = None\n",
"bom = await extract_bom(doc=construction_plans)"
]
"source": "import asyncio\nfrom typing import Literal\nfrom mellea.core import ModelOutputThunk\nfrom mellea.stdlib.components.docs.richdocument import Table\nfrom mellea.stdlib.requirements import req, simple_validate\nimport pydantic\n\n\nclass BOMEntry(pydantic.BaseModel):\n item: str\n quantity: int | str\n notes: str\n category: Literal[\"lumber\", \"windows\", \"doors\", \"other\"]\n\nclass BOM(pydantic.BaseModel):\n items: list[BOMEntry]\n\ndef _bom_entry_is_well_formed(entry: BOMEntry) -> bool:\n \"\"\"Checks that the BOMEntry quantity is either an integer or 'allowance'.\"\"\"\n try:\n int(entry.quantity)\n return True\n except ValueError as e:\n if entry.quantity.lower() == \"allowance\":\n return True\n return False\n\ndef _bom_entries_are_well_formed(s: str) -> bool:\n try:\n bom = BOM.model_validate_json(s)\n return all([_bom_entry_is_well_formed(entry) for entry in bom.items])\n except pydantic.ValidationError as e:\n print(f\"Failed on table: {s}\")\n return False\n\n# Filter out tables that are not lists of construction items.\n@mellea.generative\ndef is_material_list(table_markdown: str) -> Literal[\"yes\", \"no\"]:\n \"\"\"Determines if the table contains a list of construction items.\"\"\"\n\nasync def extract_bom(doc: RichDocument):\n bom_routines = list()\n # Fire off async requests for each table.\n for table in doc.get_tables():\n if is_material_list(m, table_markdown=table.to_markdown()) == \"yes\":\n next_sub_bom = m.ainstruct(\n \"Reformat this table to have four columns: item, quantity, type, and notes (optional).\",\n grounding_context={'table': table.to_markdown()},\n requirements=[\n req(\n \"Quantity row should only contain an integer or Allowance\",\n validation_fn=simple_validate(_bom_entries_are_well_formed)\n ),\n req(\n \"type should be one of: lumber, windows, doors, other\",\n validation_fn=simple_validate(lambda x: True)\n ), # note: this is enforced by the Literal type so no check is required.\n ],\n format=BOM\n )\n bom_routines.append(next_sub_bom)\n \n # wait for all of the async work to finish in parallel, then concatenate the results.\n bom_thunks: list[ModelOutputThunk] = await asyncio.gather(*bom_routines)\n boms = [BOM.model_validate_json(await bom_thunk.avalue()) for bom_thunk in bom_thunks]\n \n # Concatente all of the indiviual BOMs into one large list.\n all_items = []\n for bom in boms:\n all_items.extend(bom.items)\n full_bom = BOM(items=all_items)\n return full_bom\n\nbom = None\nbom = await extract_bom(doc=construction_plans)"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -666,4 +591,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}