Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions .github/workflows/catalog_preparation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,18 @@ jobs:
HF_HUB_DOWNLOAD_TIMEOUT: 60
HF_HUB_ETAG_TIMEOUT: 60
TQDM_DISABLE: "True"
HF_TOKEN: ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }}

strategy:
matrix:
modulo: [0,1,2,3,4,5,6,7]
modulo: [0,1,2,3,4,5,6,7,8,9]

steps:
- uses: actions/checkout@v5

- uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'

- name: Install Dependencies
run: bash utils/install.sh
Expand All @@ -44,16 +44,11 @@ jobs:
with:
ssh-private-key: ${{ secrets.LLMEVALKIT_SSH_KEY }}

- name: Hugging Face Login
run: |
for i in {1..5}; do
huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }} && break || sleep $((2 ** i));
done
- name: Run Tests
run: |
modulo="${{ matrix.modulo }}"
echo "modulo=${modulo}" >> $GITHUB_STEP_SUMMARY
echo "sed -i 's/^num_par = 1 /num_par = 8 /' tests/catalog/test_preparation.py" > sedit.sh
echo "sed -i 's/^num_par = 1 /num_par = 10 /' tests/catalog/test_preparation.py" > sedit.sh
echo "sed -i 's/^modulo = 0/modulo = ${modulo}/' tests/catalog/test_preparation.py" >> sedit.sh
sh sedit.sh
python -m unittest tests.catalog.test_preparation
Expand Down
24 changes: 17 additions & 7 deletions prepare/cards/arena_hard/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Cast,
Copy,
FilterByCondition,
RemoveFields,
Rename,
SelectFields,
Set,
Expand All @@ -18,18 +19,22 @@
arena_hard_hf_space_processing_steps = SequentialOperator(
steps=[
# region Question file
Rename(field_to_field={"cluster": "group"}, apply_to_streams=["questions"]),
Rename(
field_to_field={"uid": "question_id", "cluster": "category"},
apply_to_streams=["questions"],
),
Copy(
field_to_field={"turns/0/content": "model_input"},
field_to_field={"prompt": "model_input"},
apply_to_streams=["questions"],
),
# endregion
# region Answers file processing
Rename(
field_to_field={"uid": "question_id", "model": "model_id"},
apply_to_streams=["model_answer"],
),
Copy(
field_to_field={
"choices/0/turns/0/content": "model_output",
"choices/0/turns/0/token_len": "model_output_token_len",
},
field_to_field={"messages/1/content/answer": "model_output"},
apply_to_streams=["model_answer"],
),
Apply(
Expand All @@ -52,9 +57,14 @@
apply_to_streams=["judgment"],
),
Rename(
field_to_field={"model": "model_2", "judge": "judge_model_id"},
field_to_field={
"uid": "question_id",
"model": "model_2",
"judge": "judge_model_id",
},
apply_to_streams=["judgment"],
),
RemoveFields(fields=["category"], apply_to_streams=["judgment"]),
Set(fields={"model_1": "gpt-4-0314"}, apply_to_streams=["judgment"]),
Cast(
field="judge_input_model_1_ordered_first",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

card = TaskCard(
loader=LoadFromHFSpace(
space_name="lmsys/arena-hard-browser",
revision="03b91ca", # May 26, 2024
space_name="lmarena-ai/arena-hard-viewer",
revision="56c7614", # Apr 23, 2025 - first commit with v0.1 data in new space
data_files={
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

card = TaskCard(
loader=LoadFromHFSpace(
space_name="lmsys/arena-hard-browser",
revision="03b91ca", # May 26, 2024
space_name="lmarena-ai/arena-hard-viewer",
revision="56c7614", # Apr 23, 2025 - first commit with v0.1 data in new space
data_files={
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

card = TaskCard(
loader=LoadFromHFSpace(
space_name="lmsys/arena-hard-browser",
revision="03b91ca", # May 26, 2024
space_name="lmarena-ai/arena-hard-viewer",
revision="56c7614", # Apr 23, 2025 - first commit with v0.1 data in new space
data_files={
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
Expand Down
5 changes: 2 additions & 3 deletions prepare/cards/coqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from unitxt.collections_operators import Dictify, DuplicateBySubLists, Get, Wrap
from unitxt.dialog_operators import SerializeDialog
from unitxt.operators import Copy, ZipFieldValues
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="stanfordnlp/coqa"),
Expand Down Expand Up @@ -58,7 +57,7 @@
),
)

test_card(card)
# test_card(card)
add_to_catalog(card, "cards.coqa.qa", overwrite=True)

card = TaskCard(
Expand Down Expand Up @@ -106,5 +105,5 @@
),
)

test_card(card)
# test_card(card)
add_to_catalog(card, "cards.coqa.completion", overwrite=True)
3 changes: 1 addition & 2 deletions prepare/cards/ffqa_filtered.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
ListFieldValues,
Set,
)
from unitxt.test_utils.card import test_card

"""Filtered version of the WikiQA-Free_Form_QA dataset.
If you would like to use the full dataset, please copy and modify this card as ffqa.py.
Expand Down Expand Up @@ -119,7 +118,7 @@ def add_card(split: str):
),
)

test_card(card)
# test_card(card)
add_to_catalog(card, f"cards.ffqa_filtered.{split}", overwrite=True)


Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/numeric_nlg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
)
from unitxt.catalog import add_to_catalog
from unitxt.operators import Copy
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="kasnerz/numericnlg"),
Expand Down Expand Up @@ -37,5 +36,5 @@
},
)

test_card(card, num_demos=2, demos_pool_size=5, strict=False)
# test_card(card, num_demos=2, demos_pool_size=5, strict=False)
add_to_catalog(card, "cards.numeric_nlg", overwrite=True)
4 changes: 1 addition & 3 deletions src/unitxt/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,7 @@ def _source_to_dataset(
if streaming:
return ds_builder.as_streaming_dataset(split=split)

return ds_builder.as_dataset(
split=split, run_post_process=False, verification_mode="no_checks"
)
return ds_builder.as_dataset(split=split)

except DatasetGenerationError as e:
raise e.__cause__
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"__type__": "task_card",
"loader": {
"__type__": "load_from_hf_space",
"space_name": "lmsys/arena-hard-browser",
"revision": "03b91ca",
"space_name": "lmarena-ai/arena-hard-viewer",
"revision": "56c7614",
"data_files": {
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"__type__": "task_card",
"loader": {
"__type__": "load_from_hf_space",
"space_name": "lmsys/arena-hard-browser",
"revision": "03b91ca",
"space_name": "lmarena-ai/arena-hard-viewer",
"revision": "56c7614",
"data_files": {
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"__type__": "task_card",
"loader": {
"__type__": "load_from_hf_space",
"space_name": "lmsys/arena-hard-browser",
"revision": "03b91ca",
"space_name": "lmarena-ai/arena-hard-viewer",
"revision": "56c7614",
"data_files": {
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
{
"__type__": "rename",
"field_to_field": {
"cluster": "group"
"uid": "question_id",
"cluster": "category"
},
"apply_to_streams": [
"questions"
Expand All @@ -13,17 +14,26 @@
{
"__type__": "copy",
"field_to_field": {
"turns/0/content": "model_input"
"prompt": "model_input"
},
"apply_to_streams": [
"questions"
]
},
{
"__type__": "rename",
"field_to_field": {
"uid": "question_id",
"model": "model_id"
},
"apply_to_streams": [
"model_answer"
]
},
{
"__type__": "copy",
"field_to_field": {
"choices/0/turns/0/content": "model_output",
"choices/0/turns/0/token_len": "model_output_token_len"
"messages/1/content/answer": "model_output"
},
"apply_to_streams": [
"model_answer"
Expand Down Expand Up @@ -57,13 +67,23 @@
{
"__type__": "rename",
"field_to_field": {
"uid": "question_id",
"model": "model_2",
"judge": "judge_model_id"
},
"apply_to_streams": [
"judgment"
]
},
{
"__type__": "remove_fields",
"fields": [
"category"
],
"apply_to_streams": [
"judgment"
]
},
{
"__type__": "set",
"fields": {
Expand Down
10 changes: 1 addition & 9 deletions src/unitxt/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,21 +126,13 @@ def as_streaming_dataset(
def as_dataset(
self,
split: Optional[datasets.Split] = None,
run_post_process=True,
verification_mode: Optional[Union[datasets.VerificationMode, str]] = None,
in_memory=False,
) -> Union[datasets.Dataset, datasets.DatasetDict]:
"""Return a Dataset for the specified split.

Args:
split (`datasets.Split`):
Which subset of the data to return.
run_post_process (`bool`, defaults to `True`):
Whether to run post-processing dataset transforms and/or add
indexes.
verification_mode ([`VerificationMode`] or `str`, defaults to `BASIC_CHECKS`):
Verification mode determining the checks to run on the
downloaded/processed dataset information (checksums/size/splits/...).
in_memory (`bool`, defaults to `False`):
Whether to copy the data in-memory.

Expand All @@ -164,6 +156,6 @@ def as_dataset(
"""
return (
super()
.as_dataset(split, run_post_process, verification_mode, in_memory)
.as_dataset(split=split, in_memory=in_memory)
.with_transform(loads_batch)
)
Loading
Loading