make activation pruning a continuous method instead of a single one-shot

nroope · nroope · commit 89d7eaffc9a4 · 2025-12-15T18:39:23.000+01:00
diff --git a/src/pquant/pruning_methods/activation_pruning.py b/src/pquant/pruning_methods/activation_pruning.py
@@ -18,7 +18,6 @@ def __init__(self, config, layer_type, *args, **kwargs):
         self.activations = None
         self.total = 0.0
         self.is_pretraining = True
-        self.done = False
         self.threshold = ops.convert_to_tensor(config.pruning_parameters.threshold)
         self.t_start_collecting_batch = self.config.pruning_parameters.t_start_collecting_batch
 
@@ -37,7 +36,7 @@ def collect_output(self, output, training):
         linear/convolution layer are over 0. Every t_delta steps, uses these values to update
         the mask to prune those channels and neurons that are active less than a given threshold
         """
-        if self.done or not training or self.is_pretraining:
+        if not training or self.is_pretraining:
             # Don't collect during validation
             return
         if self.activations is None:
@@ -54,6 +53,7 @@ def collect_output(self, output, training):
             pct_active = self.activations / self.total
             self.t = 0
             self.total = 0
+            self.batches_collected = 0
             if self.layer_type == "linear":
                 self.mask = ops.expand_dims(ops.cast((pct_active > self.threshold), pct_active.dtype), 1)
             else:
@@ -65,7 +65,6 @@ def collect_output(self, output, training):
                 else:
                     self.mask = ops.reshape(pct_active_above_threshold, list(pct_active_above_threshold.shape) + [1, 1, 1])
             self.activations *= 0.0
-            self.done = True
 
     def call(self, weight):  # Mask is only updated every t_delta step, using collect_output
         if self.is_pretraining: