cern-nextgen · nroope · Feb 22, 2026 · Dec 19, 2025 · Jan 12, 2026 · Jan 12, 2026
diff --git a/README.md b/README.md
@@ -3,8 +3,11 @@
 ## Prune and Quantize ML models
 PQuant is a library for training compressed machine learning models, developed at CERN as part of the [Next Generation Triggers](https://nextgentriggers.web.cern.ch/t13/) project.
 
-Installation via pip: ```pip install pquant-ml```.
-To run the code, [HGQ2](https://github.com/calad0i/HGQ2) is also needed.
+Installation via pip: ```pip install pquant-ml```. 
+
+With TensorFlow ```pip install pquant-ml[tensorflow]```. 
+
+With PyTorch ```pip install pquant-ml[torch]```.
 
 PQuant replaces the layers and activations it finds with a Compressed (in the case of layers) or Quantized (in the case of activations) variant. These automatically handle the quantization of the weights, biases and activations, and the pruning of the weights. 
 Both PyTorch and TensorFlow models are supported. 
@@ -47,6 +50,12 @@ For detailed documentation check this page: [PQuantML documentation](https://pqu
 ### Authors
  - Roope Niemi (CERN)
  - Anastasiia Petrovych (CERN)
+ - Arghya Das (Purdue University)
+ - Enrico Lupi (CERN)
  - Chang Sun (Caltech)
+ - Dimitrios Danopoulos (CERN)
+ - Marlon Joshua Helbing
+ - Mia Liu (Purdue University)
  - Michael Kagan (SLAC National Accelerator Laboratory)
  - Vladimir Loncar (CERN)
+ - Maurizio Pierini (CERN)
diff --git a/docs/source/reference.md b/docs/source/reference.md
@@ -38,7 +38,8 @@ If you require additional parameters for the training or optimization loops, ple
 | `layer_specific`                 | dict     | `{}`        | Dictionary for per-layer quantization overrides.                       |
 | `use_hgq`                        | bool     | `false`     | Enable or disable High Granularity Quantization (HGQ).                 |
 | `use_real_tanh`                  | bool     | `false`     | Use a real `tanh` instead of hard/approximate `tanh`.                  |
-| `overflow`                       | str      | `"SAT"`     | Overflow handling mode (`SAT`, `SAT_SYM`, `WRAP`, `WRAP_SM`).          |
+| `overflow_mode_data`                       | str      | `"SAT"`     | Overflow handling mode for input and output quantizers(`SAT`, `SAT_SYM`, `WRAP`, `WRAP_SM`).          |
+| `overflow_mode_parameters`                       | str      | `"SAT"`     | Overflow handling mode for weight and biases quantizers(`SAT`, `SAT_SYM`, `WRAP`, `WRAP_SM`).          |
 | `round_mode`                     | str      | `"RND"`     | Rounding mode (`TRN`, `RND`, `RND_CONV`, `RND_ZERO`, etc.).            |
 | `use_relu_multiplier`            | bool     | `true`      | Enable a learned bit-shift multiplier inside ReLU layers.              |
 

diff --git a/examples/example_finetuning.ipynb b/examples/example_finetuning.ipynb
@@ -150,7 +150,7 @@
     "  enable_quantization: true\n",
     "  hgq_gamma: 0.0003\n",
     "  hgq_heterogeneous: true\n",
-    "  layer_specific: []\n",
+    "  layer_specific: {}\n",
     "  use_high_granularity_quantization: false\n",
     "  use_real_tanh: false\n",
     "  use_symmetric_quantization: false\n",

diff --git a/examples/example_jet_tagging.ipynb b/examples/example_jet_tagging.ipynb
diff --git a/src/pquant/__init__.py b/src/pquant/__init__.py
@@ -6,11 +6,13 @@
 backend = os.getenv("KERAS_BACKEND", "tensorflow")
 if backend == "torch":
     from . import configs, pruning_methods
-    from .core.finetuning import (
+    from .core.hyperparameter_optimization import (
+        PQConfig,
         ap_config,
         autosparse_config,
         cs_config,
         dst_config,
+        fitcompress_config,
         load_from_dictionary,
         load_from_file,
         mdmm_config,
@@ -24,6 +26,7 @@
         get_ebops,
         get_layer_keep_ratio,
         get_model_losses,
+        load_torch_hgq_model,
         post_training_prune,
     )
     from .core.torch.train import train_model
@@ -49,14 +52,18 @@
     _forwards.append("mdmm_config")
     _forwards.append("pdp_config")
     _forwards.append("wanda_config")
+    _forwards.append("fitcompress_config")
     _forwards.append("load_from_file")
     _forwards.append("load_from_dictionary")
     _forwards.append("get_ebops")
+    _forwards.append("load_torch_hgq_model")
+    _forwards.append("PQConfig")
     __all__ = _forwards
 
 else:
     from . import configs, pruning_methods
-    from .core.finetuning import (
+    from .core.hyperparameter_optimization import (
+        PQConfig,
         ap_config,
         autosparse_config,
         cs_config,
@@ -101,4 +108,5 @@
     _forwards.append("wanda_config")
     _forwards.append("load_from_file")
     _forwards.append("load_from_dictionary")
+    _forwards.append("PQConfig")
     __all__ = _forwards
diff --git a/src/pquant/configs/config_ap.yaml b/src/pquant/configs/config_ap.yaml
@@ -1,5 +1,5 @@
 pruning_parameters:
-  disable_pruning_for_layers: [] # Disable pruning for these layers, even if enable_pruning is true
+  disable_pruning_for_layers: []
   enable_pruning: true
   pruning_method: activation_pruning
   threshold: 0.2
@@ -13,18 +13,20 @@ quantization_parameters:
   default_data_keep_negatives: 0.
   default_data_integer_bits: 0.
   default_data_fractional_bits: 7.
+  granularity: "per_tensor"
   quantize_input: true
   quantize_output: false
   enable_quantization: true
   hgq_beta: 1e-5
   hgq_gamma: 0.0003
   hgq_heterogeneous: True
-  layer_specific: []
+  layer_specific: {}
   use_high_granularity_quantization: false
   use_real_tanh: false
-  use_relu_multiplier: true
+  use_relu_multiplier: false
   use_symmetric_quantization: false
-  overflow: SAT
+  overflow_mode_parameters: SAT
+  overflow_mode_data: SAT
   round_mode: RND
 fitcompress_parameters:
   enable_fitcompress : false
@@ -40,20 +42,16 @@ training_parameters:
   epochs: 200
   fine_tuning_epochs: 0
   pretraining_epochs: 0
-  pruning_first: false
+  pruning_first: true
   rewind: never
   rounds: 1
   save_weights_epoch: -1
-batch_size: 256
-cosine_tmax: 200
-gamma: 0.1
-l2_decay: 0.0001
-label_smoothing: 0.0
-lr: 0.01
-lr_schedule: cosine
-milestones:
-- -1
-- -1
-momentum: 0.9
-optimizer: sgd
-plot_frequency: 100
+hpo_parameters:
+    experiment_name: experiment_name
+    model_name: jet_tagger
+    num_trials: 1
+    sampler:
+      type: RandomSampler
+    hyperparameter_search:
+      numerical: {}
+      categorical: {}
diff --git a/src/pquant/configs/config_autosparse.yaml b/src/pquant/configs/config_autosparse.yaml
@@ -16,18 +16,20 @@ quantization_parameters:
   default_data_keep_negatives: 0.
   default_data_integer_bits: 0.
   default_data_fractional_bits: 7.
+  granularity: "per_tensor"
   quantize_input: true
   quantize_output: false
   enable_quantization: true
   hgq_beta: 1e-5
   hgq_gamma: 0.0003
   hgq_heterogeneous: True
-  layer_specific: []
+  layer_specific: {}
   use_high_granularity_quantization: false
   use_real_tanh: false
-  use_relu_multiplier: true
+  use_relu_multiplier: false
   use_symmetric_quantization: false
-  overflow: SAT
+  overflow_mode_parameters: SAT
+  overflow_mode_data: SAT
   round_mode: RND
 fitcompress_parameters:
   enable_fitcompress : false
@@ -43,20 +45,16 @@ training_parameters:
   epochs: 100
   fine_tuning_epochs: 0
   pretraining_epochs: 0
-  pruning_first: false
+  pruning_first: true
   rewind: never
   rounds: 1
   save_weights_epoch: -1.0
-batch_size: 256
-cosine_tmax: 200
-gamma: 0.1
-l2_decay: 3.0517578125e-05
-label_smoothing: 0.1
-lr: 0.01
-lr_schedule: cosine
-milestones:
-- -1
-- -1
-momentum: 0.875
-optimizer: sgd
-plot_frequency: 100
+hpo_parameters:
+    experiment_name: experiment_name
+    model_name: jet_tagger
+    num_trials: 1
+    sampler:
+      type: RandomSampler
+    hyperparameter_search:
+      numerical: {}
+      categorical: {}
diff --git a/src/pquant/configs/config_cs.yaml b/src/pquant/configs/config_cs.yaml
@@ -12,18 +12,20 @@ quantization_parameters:
   default_data_keep_negatives: 0.
   default_data_integer_bits: 0.
   default_data_fractional_bits: 7.
+  granularity: "per_tensor"
   quantize_input: true
   quantize_output: false
   enable_quantization: true
   hgq_beta: 1e-5
   hgq_gamma: 0.0003
   hgq_heterogeneous: True
-  layer_specific: []
+  layer_specific: {}
   use_high_granularity_quantization: false
   use_real_tanh: false
-  use_relu_multiplier: true
+  use_relu_multiplier: false
   use_symmetric_quantization: false
-  overflow: SAT
+  overflow_mode_parameters: SAT
+  overflow_mode_data: SAT
   round_mode: RND
 fitcompress_parameters:
   enable_fitcompress : false
@@ -39,20 +41,16 @@ training_parameters:
   epochs: 85
   fine_tuning_epochs: 85
   pretraining_epochs: 0
-  pruning_first: false
-  rewind: post-ticket-search
+  pruning_first: true
+  rewind: post-training-stage
   rounds: 3
   save_weights_epoch: 2
-batch_size: 256
-cosine_tmax: 200
-gamma: 0.1
-l2_decay: 0.0001
-label_smoothing: 0.0
-lr: 0.1
-lr_schedule: multistep
-milestones:
-- 56
-- 71
-momentum: 0.9
-optimizer: sgd
-plot_frequency: 100
+hpo_parameters:
+    experiment_name: experiment_name
+    model_name: jet_tagger
+    num_trials: 1
+    sampler:
+      type: RandomSampler
+    hyperparameter_search:
+      numerical: {}
+      categorical: {}
diff --git a/src/pquant/configs/config_dst.yaml b/src/pquant/configs/config_dst.yaml
@@ -14,18 +14,20 @@ quantization_parameters:
   default_data_keep_negatives: 0.
   default_data_integer_bits: 0.
   default_data_fractional_bits: 7.
+  granularity: "per_tensor"
   quantize_input: true
   quantize_output: false
   enable_quantization: true
   hgq_beta: 1e-5
   hgq_gamma: 0.0003
   hgq_heterogeneous: True
-  layer_specific: []
+  layer_specific: {}
   use_high_granularity_quantization: false
   use_real_tanh: false
-  use_relu_multiplier: true
+  use_relu_multiplier: false
   use_symmetric_quantization: false
-  overflow: SAT
+  overflow_mode_parameters: SAT
+  overflow_mode_data: SAT
   round_mode: RND
 fitcompress_parameters:
   enable_fitcompress : false
@@ -41,20 +43,16 @@ training_parameters:
   epochs: 160
   fine_tuning_epochs: 0
   pretraining_epochs: 0
-  pruning_first: false
+  pruning_first: true
   rewind: never
   rounds: 1
   save_weights_epoch: -1
-batch_size: 64
-cosine_tmax: 200
-gamma: 0.1
-l2_decay: 0.0001
-label_smoothing: 0.0
-lr: 0.01
-lr_schedule: multistep
-milestones:
-- 80
-- 120
-momentum: 0.9
-optimizer: sgd
-plot_frequency: 100
+hpo_parameters:
+    experiment_name: experiment_name
+    model_name: jet_tagger
+    num_trials: 1
+    sampler:
+      type: RandomSampler
+    hyperparameter_search:
+      numerical: {}
+      categorical: {}
diff --git a/src/pquant/configs/config_fitcompress.yaml b/src/pquant/configs/config_fitcompress.yaml
@@ -11,18 +11,20 @@ quantization_parameters:
   default_data_keep_negatives: 0.
   default_data_integer_bits: 0.
   default_data_fractional_bits: 7.
+  granularity: "per_tensor"
   quantize_input: true
   quantize_output: false
   enable_quantization: true
   hgq_beta: 1e-5
   hgq_gamma: 0.0003
   hgq_heterogeneous: True
-  layer_specific: []
+  layer_specific: {}
   use_high_granularity_quantization: false
   use_real_tanh: false
-  use_relu_multiplier: true
+  use_relu_multiplier: false
   use_symmetric_quantization: false
-  overflow: SAT
+  overflow_mode_parameters: SAT
+  overflow_mode_data: SAT
   round_mode: RND
 fitcompress_parameters:
   enable_fitcompress : true
@@ -38,20 +40,16 @@ training_parameters:
   epochs: 200
   fine_tuning_epochs: 0
   pretraining_epochs: 100
-  pruning_first: false
+  pruning_first: true
   rewind: never
   rounds: 1
   save_weights_epoch: -1
-batch_size: 256
-cosine_tmax: 200
-gamma: 0.1
-l2_decay: 0.0001
-label_smoothing: 0.0
-lr: 0.1
-lr_schedule: cosine
-milestones:
-- -1
-- -1
-momentum: 0.9
-optimizer: adam
-plot_frequency: 100
+hpo_parameters:
+    experiment_name: experiment_name
+    model_name: jet_tagger
+    num_trials: 1
+    sampler:
+      type: RandomSampler
+    hyperparameter_search:
+      numerical: {}
+      categorical: {}