Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions cli/alora/commands.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Typer sub-application for the ``m alora`` command group.
"""Typer sub-application for the `m alora` command group.

Provides three commands: ``train`` (fine-tune a base causal language model on a JSONL
dataset to produce a LoRA or aLoRA adapter), ``upload`` (push adapter weights to
Provides three commands: `train` (fine-tune a base causal language model on a JSONL
dataset to produce a LoRA or aLoRA adapter), `upload` (push adapter weights to
Hugging Face Hub, optionally packaging the adapter as an intrinsic with an
``io.yaml`` configuration), and ``add-readme`` (use an LLM to auto-generate and
upload an ``INTRINSIC_README.md`` for the trained adapter).
`io.yaml` configuration), and `add-readme` (use an LLM to auto-generate and
upload an `INTRINSIC_README.md` for the trained adapter).
"""

import json
Expand Down Expand Up @@ -38,8 +38,8 @@ def alora_train(
basemodel: Base model ID or path.
outfile: Path to save adapter weights.
promptfile: Path to load the prompt format file.
adapter: Adapter type; ``"alora"`` or ``"lora"``.
device: Device to train on: ``"auto"``, ``"cpu"``, ``"cuda"``, or ``"mps"``.
adapter: Adapter type; `"alora"` or `"lora"`.
device: Device to train on: `"auto"`, `"cpu"`, `"cuda"`, or `"mps"`.
epochs: Number of training epochs.
learning_rate: Learning rate for the optimizer.
batch_size: Per-device training batch size.
Expand Down Expand Up @@ -84,10 +84,10 @@ def alora_upload(
Args:
weight_path: Path to saved adapter weights directory.
name: Destination model name on Hugging Face Hub
(e.g. ``"acme/carbchecker-alora"``).
intrinsic: If ``True``, the adapter implements an intrinsic and an
``io.yaml`` file must also be provided.
io_yaml: Path to the ``io.yaml`` file configuring input/output processing
(e.g. `"acme/carbchecker-alora"`).
intrinsic: If `True`, the adapter implements an intrinsic and an
`io.yaml` file must also be provided.
io_yaml: Path to the `io.yaml` file configuring input/output processing
when the model is invoked as an intrinsic.
"""
from cli.alora.intrinsic_uploader import upload_intrinsic
Expand Down Expand Up @@ -143,10 +143,10 @@ def alora_add_readme(
Args:
datafile: JSONL file with item/label pairs used to train the adapter.
basemodel: Base model ID or path.
promptfile: Path to the prompt format file, or ``None``.
promptfile: Path to the prompt format file, or `None`.
name: Destination model name on Hugging Face Hub.
hints: Path to a file containing additional domain hints, or ``None``.
io_yaml: Path to the ``io.yaml`` intrinsic configuration file, or ``None``.
hints: Path to a file containing additional domain hints, or `None`.
io_yaml: Path to the `io.yaml` intrinsic configuration file, or `None`.

Raises:
OSError: If no Hugging Face authentication token is found.
Expand Down
36 changes: 18 additions & 18 deletions cli/alora/intrinsic_uploader.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Upload a trained adapter to Hugging Face Hub in the intrinsic directory layout.

Creates or updates a private Hugging Face repository and uploads adapter weights
into a ``<intrinsic_name>/<base_model>/<adapter_type>`` sub-directory, together with
the required ``io.yaml`` configuration file. If an ``INTRINSIC_README.md`` exists in
the weight directory it is also uploaded as the repository's root ``README.md``.
Requires an authenticated Hugging Face token obtained via ``huggingface-cli login``.
into a `<intrinsic_name>/<base_model>/<adapter_type>` sub-directory, together with
the required `io.yaml` configuration file. If an `INTRINSIC_README.md` exists in
the weight directory it is also uploaded as the repository's root `README.md`.
Requires an authenticated Hugging Face token obtained via `huggingface-cli login`.
"""

import os
Expand All @@ -27,31 +27,31 @@ def upload_intrinsic(
"""Upload an adapter to Hugging Face Hub using the intrinsic directory layout.

Creates or updates a private Hugging Face repository and uploads adapter
weights into a ``<intrinsic_name>/<base_model>/<adapter_type>`` sub-directory,
together with the ``io.yaml`` configuration file. If an
``INTRINSIC_README.md`` exists in the weight directory it is also uploaded
as the repository root ``README.md``.
weights into a `<intrinsic_name>/<base_model>/<adapter_type>` sub-directory,
together with the `io.yaml` configuration file. If an
`INTRINSIC_README.md` exists in the weight directory it is also uploaded
as the repository root `README.md`.

Args:
weight_path (str): Local directory containing the adapter weights
(output of ``save_pretrained``).
(output of `save_pretrained`).
model_name (str): Target Hugging Face repository name in
``"<userid>/<intrinsic_name>"`` format (e.g. ``"acme/carbchecker-alora"``).
`"<userid>/<intrinsic_name>"` format (e.g. `"acme/carbchecker-alora"`).
base_model (str): Base model ID or path (e.g.
``"ibm-granite/granite-3.3-2b-instruct"``). Must contain at most
one ``"/"`` separator.
`"ibm-granite/granite-3.3-2b-instruct"`). Must contain at most
one `"/"` separator.
type (Literal["lora", "alora"]): Adapter type, used as the leaf
directory name in the repository layout.
io_yaml (str): Path to the ``io.yaml`` configuration file for
io_yaml (str): Path to the `io.yaml` configuration file for
intrinsic input/output processing.
private (bool): Whether the repository should be private. Currently
only ``True`` is supported.
only `True` is supported.

Raises:
AssertionError: If ``weight_path`` or ``io_yaml`` do not exist, if
``private`` is ``False``, if ``base_model`` contains more than one
``"/"`` separator, or if ``model_name`` does not contain exactly
one ``"/"`` separator.
AssertionError: If `weight_path` or `io_yaml` do not exist, if
`private` is `False`, if `base_model` contains more than one
`"/"` separator, or if `model_name` does not contain exactly
one `"/"` separator.
OSError: If no Hugging Face authentication token is found.
"""
try:
Expand Down
28 changes: 14 additions & 14 deletions cli/alora/readme_generator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""LLM-assisted generator for adapter intrinsic README files.

Uses a ``MelleaSession`` with rejection sampling to derive README template variables
Uses a `MelleaSession` with rejection sampling to derive README template variables
from a JSONL training dataset — including a high-level description, the inferred
Python argument list, and Jinja2-renderable sample rows. Validates the generated
output with deterministic requirements (correct naming conventions, syntactically
valid argument lists) before rendering the final ``INTRINSIC_README.md`` via a
valid argument lists) before rendering the final `INTRINSIC_README.md` via a
Jinja2 template.
"""

Expand All @@ -28,10 +28,10 @@ class ReadmeTemplateVars(BaseModel):
high_level_description (str): A 2-3 sentence description of what the intrinsic adapter does.
dataset_description (str): Brief description of the training dataset contents and format.
userid (str): HuggingFace user ID (the namespace portion of the model name).
intrinsic_name (str): Short snake_case identifier for the intrinsic (e.g. ``"carbchecker"``).
intrinsic_name_camelcase (str): CamelCase version of ``intrinsic_name`` (e.g. ``"CarbChecker"``).
arglist (str): Python function argument list with type hints (e.g. ``"description: str"``).
arglist_without_type_annotations (str): Argument list without type hints (e.g. ``"description"``).
intrinsic_name (str): Short snake_case identifier for the intrinsic (e.g. `"carbchecker"`).
intrinsic_name_camelcase (str): CamelCase version of `intrinsic_name` (e.g. `"CarbChecker"`).
arglist (str): Python function argument list with type hints (e.g. `"description: str"`).
arglist_without_type_annotations (str): Argument list without type hints (e.g. `"description"`).
"""

high_level_description: str
Expand Down Expand Up @@ -141,20 +141,20 @@ def make_readme_jinja_dict(
"""Generate all template variables for the intrinsic README using an LLM.

Loads the first five lines of the JSONL dataset, determines the input structure,
and uses ``m.instruct`` with deterministic requirements and rejection sampling to
and uses `m.instruct` with deterministic requirements and rejection sampling to
generate README template variables.

Args:
m: Active ``MelleaSession`` to use for LLM generation.
m: Active `MelleaSession` to use for LLM generation.
dataset_path: Path to the JSONL training dataset file.
base_model: Base model ID or path used to train the adapter.
prompt_file: Path to the prompt format file (empty string if not provided).
name: Destination model name on Hugging Face Hub
(e.g. ``"acme/carbchecker-alora"``).
(e.g. `"acme/carbchecker-alora"`).
hints: Optional string of additional domain hints to include in the prompt.

Returns:
Dict of Jinja2 template variables for rendering the ``INTRINSIC_README.md``.
Dict of Jinja2 template variables for rendering the `INTRINSIC_README.md`.
"""
# Load first 5 lines of the dataset.
samples = []
Expand Down Expand Up @@ -294,19 +294,19 @@ def generate_readme(
) -> str:
"""Generate an INTRINSIC_README.md file from the dataset and template.

Creates a ``MelleaSession``, uses the LLM to generate template variables,
renders the Jinja template, and writes the result to ``output_path``.
Creates a `MelleaSession`, uses the LLM to generate template variables,
renders the Jinja template, and writes the result to `output_path`.

Args:
dataset_path: Path to the JSONL training dataset file.
base_model: Base model ID or path used to train the adapter.
prompt_file: Path to the prompt format file, or ``None``.
prompt_file: Path to the prompt format file, or `None`.
output_path: Destination path for the generated README file.
name: Destination model name on Hugging Face Hub.
hints: Optional string of additional domain hints for the LLM.

Returns:
The path to the written output file (same as ``output_path``).
The path to the written output file (same as `output_path`).
"""
from jinja2 import Environment, FileSystemLoader

Expand Down
70 changes: 35 additions & 35 deletions cli/alora/train.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Fine-tune a causal language model to produce a LoRA or aLoRA adapter.

Loads a JSONL dataset of ``item``/``label`` pairs, applies an 80/20 train/validation
split, and trains using HuggingFace PEFT and TRL's ``SFTTrainer`` — saving the
Loads a JSONL dataset of `item`/`label` pairs, applies an 80/20 train/validation
split, and trains using HuggingFace PEFT and TRL's `SFTTrainer` — saving the
checkpoint with the lowest validation loss. Supports CUDA, MPS (macOS,
PyTorch ≥ 2.8), and CPU device selection, and handles the
``alora_invocation_tokens`` configuration required for aLoRA training.
`alora_invocation_tokens` configuration required for aLoRA training.
"""

import json
Expand Down Expand Up @@ -42,18 +42,18 @@
def load_dataset_from_json(json_path, tokenizer, invocation_prompt):
"""Load a JSONL dataset and format it for SFT training.

Reads ``item``/``label`` pairs from a JSONL file and builds a HuggingFace
``Dataset`` with ``input`` and ``target`` columns. Each input is formatted as
``"{item}\\nRequirement: <|end_of_text|>\\n{invocation_prompt}"``.
Reads `item`/`label` pairs from a JSONL file and builds a HuggingFace
`Dataset` with `input` and `target` columns. Each input is formatted as
`"{item}\\nRequirement: <|end_of_text|>\\n{invocation_prompt}"`.

Args:
json_path: Path to the JSONL file containing ``item``/``label`` pairs.
json_path: Path to the JSONL file containing `item`/`label` pairs.
tokenizer: HuggingFace tokenizer instance (currently unused, reserved for
future tokenization steps).
invocation_prompt: Invocation string appended to each input prompt.

Returns:
A HuggingFace ``Dataset`` with ``"input"`` and ``"target"`` string columns.
A HuggingFace `Dataset` with `"input"` and `"target"` string columns.
"""
data = []
with open(json_path, encoding="utf-8") as f:
Expand All @@ -77,12 +77,12 @@ def formatting_prompts_func(example):
"""Concatenate input and target columns for SFT prompt formatting.

Args:
example: A batch dict with ``"input"`` and ``"target"`` list fields, as
produced by HuggingFace ``Dataset.map`` in batched mode.
example: A batch dict with `"input"` and `"target"` list fields, as
produced by HuggingFace `Dataset.map` in batched mode.

Returns:
A list of strings, each formed by concatenating the ``input`` and
``target`` values for a single example in the batch.
A list of strings, each formed by concatenating the `input` and
`target` values for a single example in the batch.
"""
return [
f"{example['input'][i]}{example['target'][i]}"
Expand All @@ -95,7 +95,7 @@ class SaveBestModelCallback(TrainerCallback):

Attributes:
best_eval_loss (float): Lowest evaluation loss seen so far across all
evaluation steps. Initialised to ``float("inf")``.
evaluation steps. Initialised to `float("inf")`.
"""

def __init__(self):
Expand All @@ -105,18 +105,18 @@ def on_evaluate(self, args, state, control, **kwargs):
"""Save the adapter weights if the current evaluation loss is a new best.

Called automatically by the HuggingFace Trainer after each evaluation
step. Compares the current ``eval_loss`` from ``metrics`` against
``best_eval_loss`` and, if lower, updates the stored best and saves the
model to ``args.output_dir``.
step. Compares the current `eval_loss` from `metrics` against
`best_eval_loss` and, if lower, updates the stored best and saves the
model to `args.output_dir`.

Args:
args: ``TrainingArguments`` instance with training configuration,
including ``output_dir``.
state: ``TrainerState`` instance with the current training state.
control: ``TrainerControl`` instance for controlling training flow.
args: `TrainingArguments` instance with training configuration,
including `output_dir`.
state: `TrainerState` instance with the current training state.
control: `TrainerControl` instance for controlling training flow.
**kwargs: Additional keyword arguments provided by the Trainer,
including ``"model"`` (the current PEFT model) and
``"metrics"`` (a dict containing ``"eval_loss"``).
including `"model"` (the current PEFT model) and
`"metrics"` (a dict containing `"eval_loss"`).
"""
model = kwargs["model"]
metrics = kwargs["metrics"]
Expand All @@ -132,13 +132,13 @@ class SafeSaveTrainer(SFTTrainer):
def save_model(self, output_dir: str | None = None, _internal_call: bool = False):
"""Save the model and tokenizer with safe serialization always enabled.

Overrides ``SFTTrainer.save_model`` to call ``save_pretrained`` with
``safe_serialization=True``, ensuring weights are saved in safetensors
Overrides `SFTTrainer.save_model` to call `save_pretrained` with
`safe_serialization=True`, ensuring weights are saved in safetensors
format rather than the legacy pickle-based format.

Args:
output_dir (str | None): Directory to save the model into. If
``None``, the trainer's configured ``output_dir`` is used.
`None`, the trainer's configured `output_dir` is used.
_internal_call (bool): Internal flag passed through from the Trainer
base class; not used by this override.
"""
Expand All @@ -165,31 +165,31 @@ def train_model(
"""Fine-tune a causal language model to produce a LoRA or aLoRA adapter.

Loads and 80/20-splits the JSONL dataset, configures PEFT with the specified
adapter type, trains using ``SFTTrainer`` with a best-checkpoint callback, saves
the adapter weights, and removes the PEFT-generated ``README.md`` from the output
adapter type, trains using `SFTTrainer` with a best-checkpoint callback, saves
the adapter weights, and removes the PEFT-generated `README.md` from the output
directory.

Args:
dataset_path: Path to the JSONL training dataset file.
base_model: Hugging Face model ID or local path to the base model.
output_file: Destination path for the trained adapter weights.
prompt_file: Optional path to a JSON config file with an
``"invocation_prompt"`` key. Defaults to the aLoRA invocation token.
adapter: Adapter type to train -- ``"alora"`` (default) or ``"lora"``.
device: Device selection -- ``"auto"``, ``"cpu"``, ``"cuda"``, or
``"mps"``.
run_name: Name of the training run (passed to ``SFTConfig``).
`"invocation_prompt"` key. Defaults to the aLoRA invocation token.
adapter: Adapter type to train -- `"alora"` (default) or `"lora"`.
device: Device selection -- `"auto"`, `"cpu"`, `"cuda"`, or
`"mps"`.
run_name: Name of the training run (passed to `SFTConfig`).
epochs: Number of training epochs.
learning_rate: Optimizer learning rate.
batch_size: Per-device training batch size.
max_length: Maximum token sequence length.
grad_accum: Gradient accumulation steps.

Raises:
ValueError: If ``device`` is not one of ``"auto"``, ``"cpu"``,
``"cuda"``, or ``"mps"``.
ValueError: If `device` is not one of `"auto"`, `"cpu"`,
`"cuda"`, or `"mps"`.
RuntimeError: If the GPU has insufficient VRAM to load the model
(wraps ``NotImplementedError`` for meta tensor errors).
(wraps `NotImplementedError` for meta tensor errors).
"""
if prompt_file:
# load the configurable variable invocation_prompt
Expand Down
8 changes: 4 additions & 4 deletions cli/alora/upload.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Upload a trained LoRA or aLoRA adapter to Hugging Face Hub.

Creates the target repository if it does not already exist and pushes the entire
adapter weights directory (output of ``save_pretrained``) to the repository root.
Requires an authenticated Hugging Face token set via the ``HF_TOKEN`` environment
variable or ``huggingface-cli login``.
adapter weights directory (output of `save_pretrained`) to the repository root.
Requires an authenticated Hugging Face token set via the `HF_TOKEN` environment
variable or `huggingface-cli login`.
"""

import os
Expand All @@ -20,7 +20,7 @@ def upload_model(weight_path: str, model_name: str, private: bool = True):
private (bool): Whether the repo should be private. Default: True.

Raises:
FileNotFoundError: If ``weight_path`` does not exist on disk.
FileNotFoundError: If `weight_path` does not exist on disk.
OSError: If no Hugging Face authentication token is found.
RuntimeError: If creating or accessing the Hugging Face repository fails.
"""
Expand Down
6 changes: 3 additions & 3 deletions cli/decompose/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Typer sub-application for the ``m decompose`` command group.
"""Typer sub-application for the `m decompose` command group.

Exposes a single ``run`` command that takes a task prompt (from a file or
Exposes a single `run` command that takes a task prompt (from a file or
interactively), calls the LLM-based decomposition pipeline to break it into
structured subtasks with constraints and dependency ordering, and writes the results
as a JSON data file and a ready-to-run Python script. Invoke via
``m decompose run --help`` for full option documentation.
`m decompose run --help` for full option documentation.
"""

import typer
Expand Down
Loading
Loading