Arm backend: Fix inf/-inf handling in comparison function

martinlsm · martinlsm · commit c65273693d7d · 2026-03-23T15:06:57.000+01:00
`compare_rel_frobenius_and_cosine_similarity` was not handling inf/-inf
correctly for cases where zero points were non-zero. This patch
addresses this issue by using qmin/qmax together with the zero point
when translating inf/-inf in the floating-point reference before
comparison.

Signed-off-by: Martin Lindström &lt;Martin.Lindstroem@arm.com&gt;
Change-Id: I498783bdf2065eae22c262a6179534682aa7c5ec
diff --git a/backends/arm/test/ops/test_masked_fill.py b/backends/arm/test/ops/test_masked_fill.py
@@ -106,13 +106,7 @@ def test_masked_fill_scalar_tosa_FP(test_module):
     pipeline.run()
 
 
-@common.parametrize(
-    "test_module",
-    test_modules,
-    xfails={
-        "masked_fill_8_extreme_scalar_inf": "MLETORCH-1812 - Quantization inaccurate on inf-values in masked fill"
-    },
-)
+@common.parametrize("test_module", test_modules)
 def test_masked_fill_scalar_tosa_INT(test_module):
     module, inputs = test_module()
     pipeline = TosaPipelineINT[input_t](
diff --git a/backends/arm/test/tester/analyze_output_utils.py b/backends/arm/test/tester/analyze_output_utils.py
@@ -350,8 +350,8 @@ def compare_rel_frobenius_and_cosine_similarity(
     Cosine similarity test: The cosine similiarity of the flattened reference and test tensor. Closer to 1 is better.
 
     If clean_reference is set to True the following is done to the reference :
-        - NaN-values will be set to 0
-        - Inf values will be set to max/min representable by the dtype * quantization scale
+        - NaN-values will be set to 0.0
+        - Inf values will be set to max/min representable by the (dtype - zp) * scale
         - Values lower than the scale will be set to 0.0
     If the reference is all zeros, the function returns without testing.
 
@@ -374,10 +374,15 @@ def compare_rel_frobenius_and_cosine_similarity(
                 if isinstance(scale, torch.Tensor)
                 else float(scale)
             )
-            dtype_info = torch.iinfo(quantization_parameters.dtype)
             assert quant_scale_for_guards is not None
-            posinf_value = float(dtype_info.max) * quant_scale_for_guards
-            neginf_value = float(dtype_info.min) * quant_scale_for_guards
+            posinf_value = (
+                float(quantization_parameters.qmax - quantization_parameters.zp)
+                * quant_scale_for_guards
+            )
+            neginf_value = (
+                float(quantization_parameters.qmin - quantization_parameters.zp)
+                * quant_scale_for_guards
+            )
             reference_output = reference_output.where(
                 torch.abs(reference_output) >= scale, 0.0
             )