Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,35 @@ Standalone on PyPI, and portable across training and inference stacks (transform
uv add renderers
```

`transformers` is an optional extra. The base install is lightweight — bring
your own tokenizer (e.g. a `tokenizers`-backed one) and you can render and parse
text without `transformers` in your environment at all. Install the extra when
you want the `load_tokenizer` / `create_renderer` convenience helpers, or any of
the vision-language renderers (they need a HuggingFace image processor):

```bash
uv add 'renderers[transformers]'
```

Two caveats for the lightweight path: a bring-your-own tokenizer must satisfy the
[`Tokenizer`](renderers/base.py) protocol (`encode` / `decode` /
`convert_tokens_to_ids` / `apply_chat_template`, plus `name_or_path`,
`unk_token_id`, `eos_token_id`); and per-token training attribution
(`attribute_text_segments`) additionally needs `tokenizer(..., return_offsets_mapping=True)`
— without it, attribution falls back to a vanilla HuggingFace tokenizer, which
requires the extra.

`renderers.client` — the generate client for vLLM's `/inference/v1/generate` — is
also opt-in. It's the only piece that needs the `openai` SDK and `httpx`, so
`import renderers` and the renderers themselves stay free of HTTP/engine deps.
Install the extra to use it:

```bash
uv add 'renderers[vllm]'
```

The extras compose, e.g. `uv add 'renderers[transformers,vllm]'`.

## At a glance

```python
Expand Down
47 changes: 39 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@ license-files = ["LICENSE"]
requires-python = ">=3.10,<3.14"
dependencies = [
"numpy",
"openai>=1.108.1",
"tiktoken",
"jinja2",
"transformers>=4.50.0",
# Used by GptOssRenderer to render and parse harmony tokens. Vendoring
# OpenAI's reference implementation keeps us byte-identical with vLLM
# (which also uses it) and saves us mirroring a 330-line Jinja template.
Expand All @@ -31,19 +29,42 @@ dependencies = [
# against 0.0.8) and ``tests/test_gpt_oss_harmony_parity.py`` passes on it,
# so the older harmony is safe.
"openai-harmony>=0.0.4",
# Crusoe's Rust BPE tokenizer; ~10x faster encode vs HF's tokenizers.
# ``load_tokenizer`` patches it in by default for every supported model
# except a small denylist (DeepSeek-V3 family). The patch is bracketed
# around ``from_pretrained``, so subsequent ``AutoTokenizer`` calls
# outside the renderers package stay vanilla.
"fastokens>=0.2.0",
# ``BaseRendererConfig`` inherits from ``pydantic_config.BaseConfig`` so
# the typed-config surface stays uniform with prime-rl / verifiers config
# bases. Transitively brings pydantic, which ``renderers.configs`` also
# imports directly.
"prime-pydantic-config>=0.3.0.dev83",
]

[project.optional-dependencies]
# ``transformers`` is heavy, and text-only renderers don't need it: construct
# a renderer with your own tokenizer object (e.g. a ``tokenizers``-backed one)
# and render / parse with no ``transformers`` import at all (issue #31). It is
# required only by the ``load_tokenizer`` / ``create_renderer*`` convenience
# helpers, the offset-attribution fallback in ``attribute_text_segments``, and
# the VLM renderers (which need ``AutoProcessor`` image processors). The lazy
# import points raise a clear "install renderers[transformers]" error when it's
# missing — see ``renderers.base._require_transformers``.
#
# ``fastokens`` (Crusoe's Rust BPE; ~10x faster encode) patches
# ``transformers`` inside ``load_tokenizer``, so it travels in the same extra.
transformers = [
"transformers>=4.50.0",
"fastokens>=0.2.0",
]

# ``renderers.client`` is a generate client for vLLM's
# ``/inference/v1/generate`` — engine-specific glue, not part of the core
# message↔token contract. It's the only thing that needs the ``openai`` SDK
# (``AsyncOpenAI``) and ``httpx``, so it's opt-in: ``import renderers`` and the
# renderers themselves stay free of HTTP/engine deps. Install this extra to use
# ``from renderers.client import generate``. (A future ``sglang`` client would
# live behind its own extra.)
vllm = [
"openai>=1.108.1",
"httpx",
]

[tool.hatch.version]
source = "vcs"
# Tags look like ``renderers-v0.1.8`` (prefix matches the publish.yml
Expand Down Expand Up @@ -76,6 +97,16 @@ dev = [
"torch>=2.11.0",
"torchvision>=0.26.0",
"ty>=0.0.1a29,<0.0.22",
# ``transformers`` is an optional extra for consumers (issue #31), but the
# test suite + type-check exercise the ``load_tokenizer`` / VLM paths, so
# the dev env always installs it. Mirror of ``[project.optional-dependencies].transformers``.
"transformers>=4.50.0",
"fastokens>=0.2.0",
# ``renderers.client`` deps — opt-in via the ``[vllm]`` extra for consumers,
# but ``tests/test_client.py`` exercises the client, so the dev env always
# installs them. Mirror of ``[project.optional-dependencies].vllm``.
"openai>=1.108.1",
"httpx",
]

[tool.uv]
Expand Down
38 changes: 27 additions & 11 deletions renderers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
__version__ = "0+unknown"

from renderers.base import (
ChatTemplateTokenizer,
Content,
ContentPart,
ImagePart,
Expand All @@ -17,12 +18,14 @@
ParsedResponse,
ParsedToolCall,
PlaceholderRange,
Processor,
RenderedConversation,
RenderedTokens,
Renderer,
RendererPool,
TextPart,
ThinkingPart,
Tokenizer,
ToolCall,
ToolCallFunction,
ToolCallParseStatus,
Expand All @@ -37,7 +40,6 @@
reject_assistant_in_extension,
trim_to_turn_close,
)
from renderers.client import OverlongPromptError
from renderers.configs import (
AutoRendererConfig,
BaseRendererConfig,
Expand All @@ -62,15 +64,27 @@

# Concrete renderer classes are lazy-loaded so that consumers needing
# only the config layer (``RendererConfig`` discriminated union) don't
# pay the ``transformers`` import cost. Each renderer module does
# ``from transformers.tokenization_utils import PreTrainedTokenizer``
# at module level, so eager imports here would drag ``transformers``
# into every downstream ``import renderers``. ``__getattr__`` (PEP 562)
# resolves the names on first attribute access, so ``from renderers
# import DefaultRenderer`` and ``renderers.DefaultRenderer`` both work
# transparently. ``create_renderer`` doesn't depend on these eager
# imports — ``renderers.base._populate_registry`` lazy-imports the
# concrete classes itself when a renderer is instantiated.
# pay the cost of importing every renderer module up front. ``__getattr__``
# (PEP 562) resolves the names on first attribute access, so ``from
# renderers import DefaultRenderer`` and ``renderers.DefaultRenderer`` both
# work transparently. ``create_renderer`` doesn't depend on these eager
# imports — ``renderers.base._populate_registry`` lazy-imports the concrete
# classes itself when a renderer is instantiated.
#
# As of issue #31, ``transformers`` is an optional extra: the renderer
# modules type their ``tokenizer`` / ``processor`` params against the
# ``Tokenizer`` / ``Processor`` protocols in ``renderers.base`` rather than
# ``transformers.PreTrainedTokenizer``, so ``import renderers`` (and
# constructing a text renderer with your own tokenizer) no longer pulls in
# ``transformers`` at all. It's loaded lazily only by ``load_tokenizer`` /
# ``create_renderer*`` and the VLM renderers — see ``_require_transformers``.
#
# ``renderers.client`` (the vLLM ``/inference/v1/generate`` client) is
# likewise opt-in: it depends on the ``openai`` SDK + ``httpx`` (the
# ``renderers[vllm]`` extra) and is deliberately *not* imported here, so
# ``import renderers`` stays free of HTTP/engine deps. Import it explicitly
# (``from renderers.client import generate, OverlongPromptError``) when you
# want it.
_LAZY_RENDERERS: dict[str, str] = {
"DeepSeekV3Renderer": "renderers.deepseek_v3",
"DefaultRenderer": "renderers.default",
Expand Down Expand Up @@ -108,6 +122,7 @@ def __dir__() -> list[str]:
__all__ = [
"AutoRendererConfig",
"BaseRendererConfig",
"ChatTemplateTokenizer",
"Content",
"ContentPart",
"DeepSeekV3Renderer",
Expand Down Expand Up @@ -137,10 +152,10 @@ def __dir__() -> list[str]:
"MultimodalRenderer",
"Nemotron3Renderer",
"Nemotron3RendererConfig",
"OverlongPromptError",
"ParsedResponse",
"ParsedToolCall",
"PlaceholderRange",
"Processor",
"Qwen35Renderer",
"Qwen35RendererConfig",
"Qwen36Renderer",
Expand All @@ -156,6 +171,7 @@ def __dir__() -> list[str]:
"RendererPool",
"TextPart",
"ThinkingPart",
"Tokenizer",
"ToolCall",
"ToolCallFunction",
"ToolCallParseStatus",
Expand Down
108 changes: 107 additions & 1 deletion renderers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,78 @@ def with_completion(
)


@runtime_checkable
class Tokenizer(Protocol):
"""Structural type for the tokenizer a renderer needs.

Satisfied by HuggingFace ``PreTrainedTokenizerBase`` and by any
bring-your-own wrapper (e.g. around a ``tokenizers.Tokenizer``) that
exposes this surface. Defining it here — rather than annotating with
``transformers.PreTrainedTokenizer`` — keeps ``transformers`` out of
the import graph for text-only renderers (issue #31): the heavy
package becomes an optional extra, needed only by the
``load_tokenizer`` / ``create_renderer`` convenience helpers and by
the VLM renderers.

The hand-coded renderers only need ``encode`` / ``decode`` /
``convert_tokens_to_ids`` (plus the id attributes), so a plain
``tokenizers.Tokenizer`` wrapper satisfies this protocol.
``apply_chat_template`` is deliberately *not* required here — only
:class:`DefaultRenderer` needs it, via the :class:`ChatTemplateTokenizer`
subtype.

``__call__`` is consumed only by ``attribute_text_segments`` for
character-offset attribution (``return_offsets_mapping=True``). A
tokenizer that doesn't support offsets still renders and parses fine;
offset attribution then falls back to a vanilla HuggingFace tokenizer
(which requires the ``transformers`` extra) — see
``_get_offset_tokenizer``.
"""

name_or_path: str
unk_token_id: int | None
eos_token_id: int | None

def encode(self, text: str, *args: Any, **kwargs: Any) -> list[int]: ...

def decode(self, token_ids: Any, *args: Any, **kwargs: Any) -> str: ...

def convert_tokens_to_ids(self, tokens: Any) -> Any: ...

def __call__(self, *args: Any, **kwargs: Any) -> Any: ...


@runtime_checkable
class ChatTemplateTokenizer(Tokenizer, Protocol):
"""A :class:`Tokenizer` that also exposes ``apply_chat_template``.

Required only by :class:`DefaultRenderer`, the generic fallback that
delegates rendering to the tokenizer's Jinja chat template. The
hand-coded renderers reproduce each model's template in Python and only
touch the base :class:`Tokenizer` surface, so they don't need this.
"""

def apply_chat_template(self, *args: Any, **kwargs: Any) -> Any: ...


@runtime_checkable
class Processor(Protocol):
"""Structural type for the HuggingFace processor a VLM renderer needs.

Satisfied by ``AutoProcessor`` instances (Qwen-VL, Kimi-VL, ...). The
``image_processor`` sub-object's surface differs per model family
(Qwen exposes ``__call__(images=...)`` + ``merge_size``; Kimi exposes
``preprocess(...)`` + ``media_tokens_calculator``), so it's typed
loosely. VLMs intrinsically need ``transformers`` at render time — this
Protocol just keeps the ``processor:`` type hint from importing it at
module load.
"""

image_processor: Any

def apply_chat_template(self, *args: Any, **kwargs: Any) -> Any: ...


@runtime_checkable
class Renderer(Protocol):
"""Owns message ↔ token conversion for a specific model family."""
Expand Down Expand Up @@ -1066,6 +1138,32 @@ def _model_has_vision_config(model_name: str) -> bool:
_FASTOKENS_ANNOUNCED = False


_TRANSFORMERS_INSTALL_HINT = (
"This requires the optional `transformers` extra, which is not "
"installed. Install it with `pip install 'renderers[transformers]'` "
"(or `uv add 'renderers[transformers]'`). Text-only renderers work "
"without it when you construct them with your own tokenizer object."
)


def _require_transformers():
"""Import and return the ``transformers`` module, or raise a clear,
actionable error pointing at the optional extra.

``transformers`` (and ``fastokens``, which patches it) is an optional
dependency — see issue #31. The convenience helpers (``load_tokenizer``,
``create_renderer*``), the offset-attribution fallback, and all VLM
renderers need it; text-only render/parse with a bring-your-own
tokenizer does not.
"""
try:
import transformers

return transformers
except ImportError as exc:
raise ImportError(_TRANSFORMERS_INSTALL_HINT) from exc


def _patched_load(model_name_or_path: str, **kwargs):
"""Run ``AutoTokenizer.from_pretrained`` with fastokens patched in
process-locally — patch around the load, unpatch right after.
Expand Down Expand Up @@ -1203,7 +1301,11 @@ def load_tokenizer(
validation for configs with nested ``rope_parameters``), we fall
back to loading the repo's self-contained ``tokenizer.json``
directly — see ``_load_tokenizer_via_auto``.

Requires the optional ``transformers`` extra; raises a clear
``ImportError`` with install instructions if it's missing.
"""
_require_transformers()
kwargs: dict[str, Any] = {}
revision = TRUSTED_REVISIONS.get(model_name_or_path)
if revision is not None:
Expand Down Expand Up @@ -1587,7 +1689,11 @@ def _get_offset_tokenizer(tokenizer):
cached = _offset_tokenizers.get(name_or_path)
if cached is not None:
return cached
from transformers import AutoTokenizer
# The supplied tokenizer can't produce offsets; fall back to a
# vanilla HuggingFace tokenizer, which needs the ``transformers``
# extra. A bring-your-own tokenizer that supports
# ``return_offsets_mapping=True`` skips this path entirely.
AutoTokenizer = _require_transformers().AutoTokenizer

kwargs: dict[str, Any] = {}
revision = TRUSTED_REVISIONS.get(name_or_path)
Expand Down
5 changes: 2 additions & 3 deletions renderers/deepseek_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@

import json

from transformers.tokenization_utils import PreTrainedTokenizer

from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
Tokenizer,
ToolSpec,
attribute_text_segments,
reject_assistant_in_extension,
Expand Down Expand Up @@ -54,7 +53,7 @@ class DeepSeekV3Renderer:

def __init__(
self,
tokenizer: PreTrainedTokenizer,
tokenizer: Tokenizer,
config: DeepSeekV3RendererConfig | None = None,
):
self._tokenizer = tokenizer
Expand Down
5 changes: 2 additions & 3 deletions renderers/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
import json
from typing import Any

from transformers.tokenization_utils import PreTrainedTokenizer

from renderers.base import (
ChatTemplateTokenizer,
Message,
ParsedResponse,
RenderedTokens,
Expand Down Expand Up @@ -90,7 +89,7 @@ class DefaultRenderer:

def __init__(
self,
tokenizer: PreTrainedTokenizer,
tokenizer: ChatTemplateTokenizer,
config: DefaultRendererConfig | None = None,
):
cfg = config or DefaultRendererConfig()
Expand Down
Loading
Loading