Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/examples/mify/mify.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mellea.stdlib.components.docs import TableQuery
from mellea.stdlib.components.docs.richdocument import TableQuery
from mellea.stdlib.components.mify import MifiedProtocol, mify
from mellea.stdlib.session import start_session

Expand Down
2 changes: 1 addition & 1 deletion docs/examples/mify/rich_document_advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

# 4. `Mellea` also provides a basic wrapper around this functionality to make
# basic processing of documents easier.
from mellea.stdlib.components.docs import RichDocument
from mellea.stdlib.components.docs.richdocument import RichDocument

# This creates a new `Mellea` RichDocument component that encapsulates all
# the logic above along with some convenient helpers.
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/mify/rich_table_execute_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from mellea.backends import model_ids
from mellea.backends import ModelOption
from mellea.core import FancyLogger
from mellea.stdlib.components.docs import RichDocument, Table
from mellea.stdlib.components.docs.richdocument import RichDocument, Table

FancyLogger.get_logger().setLevel("ERROR")

Expand Down
15 changes: 3 additions & 12 deletions docs/examples/notebooks/document_mobject.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,7 @@
"id": "3j-Se7PpfMqV"
},
"outputs": [],
"source": [
"from mellea.stdlib.components.docs import RichDocument\n",
"\n",
"rd = RichDocument.from_document_file(\"https://arxiv.org/pdf/1906.04043\")"
]
"source": "from mellea.stdlib.components.docs.richdocument import RichDocument\n\nrd = RichDocument.from_document_file(\"https://arxiv.org/pdf/1906.04043\")"
},
{
"cell_type": "markdown",
Expand All @@ -101,12 +97,7 @@
"id": "kcBb3g_BfMqV"
},
"outputs": [],
"source": [
"from mellea.stdlib.components.docs import Table\n",
"\n",
"table1: Table = rd.get_tables()[0]\n",
"print(table1.to_markdown())"
]
"source": "from mellea.stdlib.components.docs.richdocument import Table\n\ntable1: Table = rd.get_tables()[0]\nprint(table1.to_markdown())"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -177,4 +168,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
2 changes: 1 addition & 1 deletion docs/examples/rag/mellea_pdf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import mellea
from mellea.stdlib.components.docs import RichDocument
from mellea.stdlib.components.docs.richdocument import RichDocument

m = mellea.start_session()

Expand Down
4 changes: 2 additions & 2 deletions docs/examples/tutorial/document_mobject.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from mellea.backends import model_ids
from mellea.backends.model_ids import IBM_GRANITE_3_3_8B
from mellea.stdlib.components.docs import RichDocument
from mellea.stdlib.components.docs.richdocument import RichDocument

rd = RichDocument.from_document_file("https://arxiv.org/pdf/1906.04043")

from mellea.stdlib.components.docs import Table # noqa: E402
from mellea.stdlib.components.docs.richdocument import Table # noqa: E402

table1: Table = rd.get_tables()[0]
print(table1.to_markdown())
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ Let's create a RichDocument from an arxiv paper:

```python
# file: https://github.com/generative-computing/mellea/blob/main/docs/examples/tutorial/document_mobject.py#L1-L3
from mellea.stdlib.components.docs import RichDocument
from mellea.stdlib.components.docs.richdocument import RichDocument
rd = RichDocument.from_document_file("https://arxiv.org/pdf/1906.04043")
```
this loads the PDF file and parses it using the Docling parser into an
Expand All @@ -627,7 +627,7 @@ From the rich document we can extract some document content, e.g. the
first table:
```python
# file: https://github.com/generative-computing/mellea/blob/main/docs/examples/tutorial/document_mobject.py#L5-L8
from mellea.stdlib.components.docs import Table
from mellea.stdlib.components.docs.richdocument import Table
table1: Table = rd.get_tables()[0]
print(table1.to_markdown())
```
Expand Down
8 changes: 6 additions & 2 deletions mellea/stdlib/components/docs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
"""Classes and functions for working with document-like objects."""

from .richdocument import RichDocument, Table, TableQuery, TableTransform
from .document import Document

__all__ = ["RichDocument", "Table", "TableQuery", "TableTransform"]
# Note: RichDocument, Table, TableQuery, TableTransform are not imported here
# by default to avoid heavy docling/torch/transformers imports at module load time.
# Import them explicitly from mellea.stdlib.components.docs.richdocument when needed.

__all__ = ["Document"]
21 changes: 20 additions & 1 deletion mellea/stdlib/requirements/md.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,32 @@
"""This file contains various requirements for Markdown-formatted files."""

import mistletoe
from __future__ import annotations

from typing import TYPE_CHECKING

from ...core import Context, Requirement

if TYPE_CHECKING:
import mistletoe

_mistletoe = None


def _get_mistletoe():
global _mistletoe
if _mistletoe is None:
import mistletoe as mt

_mistletoe = mt
return _mistletoe


# region lists


def as_markdown_list(ctx: Context) -> list[str] | None:
"""Attempts to format the last_output of the given context as a markdown list."""
mistletoe = _get_mistletoe()
xs = list()
raw_output = ctx.last_output()
assert raw_output is not None
Expand Down Expand Up @@ -44,6 +62,7 @@ def _md_list(ctx: Context):


def _md_table(ctx: Context):
mistletoe = _get_mistletoe()
raw_output = ctx.last_output()
assert raw_output is not None
try:
Expand Down
2 changes: 1 addition & 1 deletion test/backends/test_tool_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from mellea.core import ModelOutputThunk
from mellea.stdlib.context import ChatContext

from mellea.stdlib.components.docs import Table
from mellea.stdlib.components.docs.richdocument import Table
from mellea.stdlib.session import MelleaSession


Expand Down
2 changes: 1 addition & 1 deletion test/stdlib/components/docs/test_richdocument.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from mellea.core import TemplateRepresentation
from mellea.stdlib.components.docs import RichDocument, Table
from mellea.stdlib.components.docs.richdocument import RichDocument, Table
import mellea
from docling_core.types.doc.document import DoclingDocument
import tempfile
Expand Down
2 changes: 1 addition & 1 deletion test/stdlib/components/test_transform.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from mellea.core import TemplateRepresentation
from mellea.stdlib.components.docs import TableTransform
from mellea.stdlib.components.docs.richdocument import TableTransform
from mellea.stdlib.components import MObject, Query, Transform

custom_mobject_description = "custom mobject description"
Expand Down