hypernetwork-research-group · tizianocitro · Jan 26, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 23, 2026
diff --git a/.github/hooks/.pre-commit-config.yaml b/.github/hooks/.pre-commit-config.yaml
@@ -12,8 +12,7 @@ repos:
         args:
           - --fix=lf
       - id: name-tests-test
-        args:
-          - --pytest
+        exclude: "hyperbench/tests/mock/"
       - id: pretty-format-json
         args:
           - --autofix

diff --git a/.github/workflows/chore.yaml b/.github/workflows/chore.yaml
@@ -0,0 +1,35 @@
+name: Run tests and upload coverage
+
+on:
+  push:
+    branches: [main]
+
+jobs:
+  test:
+    name: Run tests and collect coverage
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.14"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+      - name: Install dependencies
+        run: |
+          uv venv
+          uv pip install -e .
+
+      - name: Run tests
+        run: uv run pytest --cov --cov-branch --cov-report=xml
+
+      - name: Upload results to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/README.md b/README.md
@@ -1,25 +1,78 @@
 # HyperBench
 
-## Contribution guide
+[![Contributors][contributors-shield]][contributors-url]
+[![Forks][forks-shield]][forks-url]
+[![Stargazers][stars-shield]][stars-url]
+[![Issues][issues-shield]][issues-url]
+[![project_license][license-shield]][license-url]
+[![codecov](https://codecov.io/github/hypernetwork-research-group/hyperbench/graph/badge.svg?token=XE0TB5JMOS)](https://codecov.io/github/hypernetwork-research-group/hyperbench)
+
+<!-- TABLE OF CONTENTS -->
+<details>
+  <summary>Table of Contents</summary>
+  <ol>
+    <li>
+      <a href="#about-the-project">About the project</a>
+    </li>
+    <li>
+      <a href="#getting-started">Getting started</a>
+      <ul>
+        <li><a href="#prerequisites">Prerequisites</a></li>
+        <li><a href="#installation">Installation</a></li>
+            <ul>
+                <li><a href="#sync-dependencies">Sync dependencies</a></li>
+            </ul>
+      </ul>
+    </li>
+    <li><a href="#usage">Usage</a></li>
+    <li>
+        <a href="#contributing">Contributing</a>
+        <ul>
+            <li><a href="#pre-commit-hooks">Pre-commit hooks</a></li>
+            <li><a href="#linter">Linter</a></li>
+            <li><a href="#type-checker">Type checker</a></li>
+            <li><a href="#tests">Tests</a></li>
+            <li><a href="#utilities">Utilities</a></li>
+        </ul>
+    </li>
+    <li><a href="#license">License</a></li>
+    <li><a href="#contact">Contact</a></li>
+    <li><a href="#acknowledgments">Acknowledgments</a></li>
+  </ol>
+</details>
+
+## About The Project
+
+## Getting Started
+
+### Prerequisites
+
+WIP
+
+### Installation
+
+#### Sync dependencies
 
-### Pre-commit hooks
-
-Run the following command to install the pre-commit hook:
+Use [uv](https://docs.astral.sh/uv/reference/cli/) to sync dependencies:
 
 ```bash
 uv sync
-
-pre-commit install --config .github/hooks/.pre-commit-config.yaml --hook-type pre-commit --install-hooks --overwrite
 ```
 
-## Commands
+## Usage
 
-### Sync dependencies
+## Contributing
 
-Use [uv](https://docs.astral.sh/uv/reference/cli/) to sync dependencies:
+See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
+
+### Pre-commit hooks
+
+Run the following command to install the pre-commit hook:
 
 ```bash
 uv sync
+
+pre-commit install --config .github/hooks/.pre-commit-config.yaml --hook-type pre-commit --install-hooks --overwrite
 ```
 
 ### Linter
@@ -58,9 +111,34 @@ uv run pytest --cov=hyperbench --cov-report=html
 Before committing code, run the following command to ensure code quality:
 
 ```bash
+uv pip uninstall . && \
 uv sync && \
 uv pip install -e . && \
 uv run ruff format && \
 uvx ty check && \
 uv run pytest --cov=hyperbench --cov-report=term-missing
 ```
+
+## License
+
+WIP
+
+## Contact
+
+WIP
+
+## Acknowledgments
+
+
+
+
+[contributors-shield]: https://img.shields.io/github/contributors/hypernetwork-research-group/hyperbench.svg?style=for-the-badge
+[contributors-url]: https://github.com/hypernetwork-research-group/hyperbench/graphs/contributors
+[forks-shield]: https://img.shields.io/github/forks/hypernetwork-research-group/hyperbench.svg?style=for-the-badge
+[forks-url]: https://github.com/hypernetwork-research-group/hyperbench/network/members
+[stars-shield]: https://img.shields.io/github/stars/hypernetwork-research-group/hyperbench.svg?style=for-the-badge
+[stars-url]: https://github.com/hypernetwork-research-group/hyperbench/stargazers
+[issues-shield]: https://img.shields.io/github/issues/hypernetwork-research-group/hyperbench.svg?style=for-the-badge
+[issues-url]: https://github.com/hypernetwork-research-group/hyperbench/issues
+[license-shield]: https://img.shields.io/github/license/hypernetwork-research-group/hyperbench.svg?style=for-the-badge
+[license-url]: https://github.com/hypernetwork-research-group/hyperbench/blob/master/LICENSE.txt
diff --git a/hyperbench/data/dataset.py b/hyperbench/data/dataset.py
@@ -1,16 +1,17 @@
 """Example usage of the Hypergraph class with HIF data."""
 
 import json
+import os
 import gdown
 import tempfile
 import torch
+import zstandard as zstd
 
 from enum import Enum
-from typing import Any
 from torch.utils.data import Dataset as TorchDataset
 from hyperbench.types.hypergraph import HIFHypergraph
 from hyperbench.types.hdata import HData
-from hyperbench.utils.hif import validate_hif_json
+from hyperbench.utils.hif_utils import validate_hif_json
 
 
 class DatasetNames(Enum):
@@ -38,13 +39,30 @@ def load_from_hif(dataset_name: str | None, file_id: str | None) -> HIFHypergrap
         if dataset_name not in DatasetNames.__members__:
             raise ValueError(f"Dataset '{dataset_name}' not found.")
 
-        url = f"https://drive.google.com/uc?id={file_id}"
+        dataset_name_lower = dataset_name.lower()
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        zst_filename = os.path.join(
+            current_dir, "datasets", f"{dataset_name_lower}.json.zst"
+        )
 
-        with tempfile.NamedTemporaryFile(
-            mode="w+", suffix=".json", delete=False
-        ) as tmp_file:
-            output = tmp_file.name
-            gdown.download(url=url, output=output, quiet=False, fuzzy=True)
+        if os.path.exists(zst_filename):
+            dctx = zstd.ZstdDecompressor()
+            with (
+                open(zst_filename, "rb") as input_f,
+                tempfile.NamedTemporaryFile(
+                    mode="wb", suffix=".json", delete=False
+                ) as tmp_file,
+            ):
+                dctx.copy_stream(input_f, tmp_file)
+                output = tmp_file.name
+        else:
+            url = f"https://drive.google.com/uc?id={file_id}"
+
+            with tempfile.NamedTemporaryFile(
+                mode="w+", suffix=".json", delete=False
+            ) as tmp_file:
+                output = tmp_file.name
+                gdown.download(url=url, output=output, quiet=False, fuzzy=True)
 
         with open(output, "r") as f:
             hiftext = json.load(f)
@@ -67,27 +85,29 @@ class Dataset(TorchDataset):
         process(): Processes the hypergraph into HData format.
     """
 
+    # TODO: move as input to __init__()? So that users can provide new ids and names of datasets formatted in HIF
     GDRIVE_FILE_ID = None
     DATASET_NAME = None
 
     def __init__(self) -> None:
-        self.hypergraph = None
+        self.hypergraph: HIFHypergraph = self.download()
+        self.hdata: HData = self.process()
 
     def __len__(self) -> int:
-        if self.hypergraph is None:
-            return 0
         return len(self.hypergraph.nodes)
 
+    def __getitem__(self, index: int) -> HData:
+        # TODO: implement sampling of nodes with given index
+        return self.hdata
+
     def download(self) -> HIFHypergraph:
         """
         Load the hypergraph from HIF format using HIFConverter class.
         """
-        if self.hypergraph is not None:
+        if hasattr(self, "hypergraph") and self.hypergraph is not None:
             return self.hypergraph
-        self.hypergraph = HIFConverter.load_from_hif(
-            self.DATASET_NAME, self.GDRIVE_FILE_ID
-        )
-        return self.hypergraph
+        hypergraph = HIFConverter.load_from_hif(self.DATASET_NAME, self.GDRIVE_FILE_ID)
+        return hypergraph
 
     def process(self) -> HData:
         """
@@ -96,27 +116,35 @@ def process(self) -> HData:
             HData: Processed hypergraph data.
         """
 
-        if self.hypergraph is None:
-            raise ValueError("Hypergraph is not loaded. Call download() first.")
-
         num_nodes = len(self.hypergraph.nodes)
         num_edges = len(self.hypergraph.edges)
 
         x = torch.arange(num_nodes).unsqueeze(1)
 
-        node_ids = []
-        edge_ids = []
-        for incidence in self.hypergraph.incidences:
-            node_id = int(incidence.get("node", 0))
-            edge_id = int(incidence.get("edge", 0))
-            node_ids.append(node_id)
-            edge_ids.append(edge_id)
+        node_set = []
+        edge_set = []
+        incidences_tuples = []
+
+        for inc in self.hypergraph.incidences:
+            node = inc.get("node", 0)
+            edge = inc.get("edge", 0)
+            if node not in node_set:
+                node_set.append(node)
+            if edge not in edge_set:
+                edge_set.append(edge)
+            incidences_tuples.append((node, edge))
+
+        node_id_mapping = {node_id: idx for idx, node_id in enumerate(node_set)}
+        edge_id_mapping = {edge_id: idx for idx, edge_id in enumerate(edge_set)}
+
+        node_ids = [node_id_mapping[node] for node, _ in incidences_tuples]
+        edge_ids = [edge_id_mapping[edge] for _, edge in incidences_tuples]
 
         edge_index = None
         if len(node_ids) < 1:
             raise ValueError("Hypergraph has no incidences.")
 
-        # edge_index: shape [2, M] where M is number of incidences
+        # edge_index: shape [2, E] where E is number of incidences
         # First row: node IDs, Second row: hyperedge IDs
         edge_index = torch.tensor([node_ids, edge_ids])
 
@@ -130,17 +158,9 @@ def process(self) -> HData:
                 edge_attrs.append(len(attrs))
             edge_attr = torch.tensor(edge_attrs).unsqueeze(1)
 
-        hdata = HData(x, edge_index, edge_attr, num_nodes, num_edges)
-
-        return hdata
-
-    def __getitem__(self, index: int) -> Any:
-        pass
+        return HData(x, edge_index, edge_attr, num_nodes, num_edges)
 
 
 class AlgebraDataset(Dataset):
     DATASET_NAME = "ALGEBRA"
     GDRIVE_FILE_ID = "1-H21_mZTcbbae4U_yM3xzXX19VhbCZ9C"
-
-    def __init__(self) -> None:
-        super().__init__()
diff --git a/hyperbench/data/datasets/algebra.json.zst b/hyperbench/data/datasets/algebra.json.zst
diff --git a/hyperbench/tests/__init__.py b/hyperbench/tests/__init__.py
@@ -1,3 +1 @@
-from .mock import MOCK_BASE_PATH
-
-__all__ = ["MOCK_BASE_PATH"]
+from .mock import *
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,8 +12,7 @@ repos: @@
             args:
               - --fix=lf
           - id: name-tests-test
-            args:
-              - --pytest
+            exclude: "hyperbench/tests/mock/"
           - id: pretty-format-json
             args:
               - --autofix
@@ Expand Down @@