Arm backend: Add limited boolean mask support to index_put (pytorch#18396)

Erik-Lundell · web-flow · commit 59838fc3c9b1 · 2026-03-26T15:40:23.000+01:00
index_put with a single boolean mask and scalar values can be normalized
to a where operator.
This is also an important case that has shown up in models, i.e. x[mask]
= 0.

Since the mask is not guaranteed to be constant, require the value to be
scalar, that can be broadcasted for any mask.

With multiple boolean masks or mixed integer, shapes and ranks become
data dependent.


Signed-off-by: Erik Lundell &lt;erik.lundell@arm.com&gt;
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
@@ -126,6 +126,9 @@
 from .match_arg_dtype_pass import MatchArgDtypePass  # noqa
 from .match_arg_ranks_pass import MatchArgRanksPass  # noqa
 from .mm_to_bmm_pass import ConvertMmToBmmPass  # noqa
+from .normalize_index_put_bool_index_tensor_pass import (  # noqa
+    NormalizeIndexPutBoolIndexTensorPass,
+)
 from .normalize_index_put_none_indices_pass import (  # noqa
     NormalizeIndexPutNoneIndicesPass,
 )
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -113,6 +113,7 @@
     InsertTableOpsPass,
     MatchArgDtypePass,
     MatchArgRanksPass,
+    NormalizeIndexPutBoolIndexTensorPass,
     NormalizeIndexPutNoneIndicesPass,
     NormalizeWhileInitialArgsPass,
     PromoteBoolOperandsPass,
@@ -450,6 +451,7 @@ def _tosa_pipeline(
         self.add_passes(
             [
                 NormalizeIndexPutNoneIndicesPass(),
+                NormalizeIndexPutBoolIndexTensorPass(),
                 RewriteIndexPutPass(),
                 RewriteBoolBitwiseToLogicalPass(),
                 DecomposeRemainderPass(),
diff --git a/backends/arm/_passes/normalize_index_put_bool_index_tensor_pass.py b/backends/arm/_passes/normalize_index_put_bool_index_tensor_pass.py
@@ -0,0 +1,101 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Set, Type
+
+import torch
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes.rewrite_index_put_pass import RewriteIndexPutPass
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+
+class NormalizeIndexPutBoolIndexTensorPass(ArmPass):
+    """Normalize  single boolean mask index_put scalar to where.
+    In the general case, boolean masks are complex and data dependent. The simple case
+    x[mask] = scalar
+    Can however be directly translated to a where operation:
+
+    out = index_put(destination, [mask], data, accumulate=False)
+    becomes
+    mask = reshape(mask, mask_shape_padded)
+    data = reshape(data, data_shape_padded)
+    out = where(mask, data, destination)
+
+    Where the padded shapes are right-padded with ones to match the rank of destination (if needed).
+    `data` must be a scalar, to ensure data_padded can be broadcasted to any destination shape
+    depending on the (non-constant) mask.
+    """
+
+    _passes_required_after: Set[Type[ExportPass]] = {RewriteIndexPutPass}
+
+    def __init__(self):
+        super().__init__()
+        self.reshape_op = exir_ops.edge.aten.view_copy.default
+        self.where_op = exir_ops.edge.aten.where.self
+
+    def _is_valid_bool_mask(
+        self,
+        indices_tensor_list,
+        data,
+        accumulate: bool,
+    ) -> bool:
+
+        indices = indices_tensor_list[0]
+        if indices is None or indices.data.dtype != torch.bool:
+            return False
+
+        # We have a boolean mask, validate that the args are supported.
+        if accumulate or len(indices_tensor_list) != 1 or data.data.numel() != 1:
+            raise RuntimeError(
+                f"Got unsupported args for bool mask index_put: {accumulate=}, num indices={len(indices_tensor_list)}!=1, data shape {data.data.shape} not scalar.\n"
+                "This is a bug, the operator should not have been delegated."
+            )
+
+        return True
+
+    def call_operator(self, op, args, kwargs, meta, updated: bool | None = False):
+        if op not in (exir_ops.edge.aten.index_put.default,):
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        destination, indices_tensor_list, data = args[:3]
+        accumulate = len(args) > 3 and bool(args[3])
+        indices_tensor_list = list(indices_tensor_list)
+        if not self._is_valid_bool_mask(indices_tensor_list, data, accumulate):
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        mask = indices_tensor_list[0]
+        destination_shape = tuple(destination.data.shape)
+        mask_shape = tuple(mask.data.shape)
+        padded_mask_shape = (
+            *mask_shape,
+            *([1] * (len(destination_shape) - len(mask_shape))),
+        )
+
+        if len(mask_shape) < len(destination_shape):
+            mask = super().call_operator(
+                self.reshape_op,
+                (mask, padded_mask_shape),
+                {},
+                meta,
+                True,
+            )
+
+        if len(destination_shape) != len(data.data.shape):
+            data = super().call_operator(
+                self.reshape_op,
+                (data, [1] * len(destination_shape)),
+                {},
+                meta,
+                True,
+            )
+
+        return super().call_operator(
+            self.where_op,
+            (mask, data, destination),
+            kwargs,
+            meta,
+            True,
+        )
diff --git a/backends/arm/operator_support/__init__.py b/backends/arm/operator_support/__init__.py
@@ -13,6 +13,7 @@
     embedding_support,
     ethos_u55_support,
     gather_support,
+    index_put_support,
     index_select_support,
     index_tensor_support,
     minmax_support,
diff --git a/backends/arm/operator_support/index_put_support.py b/backends/arm/operator_support/index_put_support.py
@@ -0,0 +1,86 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Declare operator support for ``aten.index_put``."""
+
+from typing import cast
+
+import torch
+import torch.fx as fx
+from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
+from executorch.backends.arm.operator_support.tosa_supported_operators import (
+    register_tosa_support_check,
+    SupportedTOSAOperatorCheck,
+)
+
+from executorch.backends.arm.tosa import TosaSpecification
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+@register_tosa_support_check
+class IndexPutSupported(SupportedTOSAOperatorCheck):
+    """Reject unsupported ``index_put`` cases.
+
+    Explicit integer indices are fully supported.
+
+    For boolean mask, there are limitations:
+    - boolean index cases only supports one bool index
+    - boolean index cases must use a scalar ``values`` tensor
+    - boolean index cases don't support accumulate = True.
+
+    """
+
+    targets = [exir_ops.edge.aten.index_put.default]
+
+    def is_node_tosa_supported(
+        self, node: fx.Node, tosa_spec: TosaSpecification
+    ) -> bool:
+        indices_tensors = cast(list[fx.Node], node.args[1])
+
+        # None indexes mean "select whole dim", we can handle that.
+        explicit_indices = [index for index in indices_tensors if index is not None]
+        has_bool_index = any(
+            get_first_fake_tensor(index).dtype == torch.bool
+            for index in explicit_indices
+        )
+        has_non_bool_index = any(
+            get_first_fake_tensor(index).dtype != torch.bool
+            for index in explicit_indices
+        )
+
+        if has_bool_index and has_non_bool_index:
+            self.reporter.report_reject(
+                node,
+                (
+                    "Mixed boolean mask and integer indices in "
+                    "index_put are not supported."
+                ),
+            )
+            return False
+
+        if has_bool_index and len(explicit_indices) != 1:
+            self.reporter.report_reject(
+                node,
+                "Boolean mask index_put only supports a single explicit bool index.",
+            )
+            return False
+
+        if has_bool_index:
+            values = cast(fx.Node, node.args[2])
+            values_tensor = get_first_fake_tensor(values)
+            if values_tensor.numel() != 1:
+                self.reporter.report_reject(
+                    node,
+                    "Boolean mask index_put only supports scalar values.",
+                )
+                return False
+
+            if len(node.args) > 3 and node.args[3]:
+                self.reporter.report_reject(
+                    node,
+                    "Bool-mask index_put not supported with accumulate = True.",
+                )
+                return False
+
+        return True
diff --git a/backends/arm/operator_support/tosa_profile_supported_op_lists.py b/backends/arm/operator_support/tosa_profile_supported_op_lists.py
@@ -124,7 +124,6 @@
     exir_ops.edge.aten.bitwise_not.default,
     exir_ops.edge.aten.copy.default,
     exir_ops.edge.aten.tan.default,
-    exir_ops.edge.aten.index_put.default,
     exir_ops.edge.aten.silu.default,
     exir_ops.edge.aten.detach_copy.default,
 }
@@ -249,7 +248,6 @@
     exir_ops.edge.aten.copy.default,
     exir_ops.edge.aten.floor_divide.default,
     exir_ops.edge.aten.tan.default,
-    exir_ops.edge.aten.index_put.default,
     exir_ops.edge.aten.detach_copy.default,
 }
 
diff --git a/backends/arm/test/ops/test_index_put.py b/backends/arm/test/ops/test_index_put.py
@@ -156,6 +156,18 @@
         ),
         0,
     ),
+    "bool_mask_scalar": (
+        lambda: (
+            torch.randn((2, 3, 4), dtype=torch.float32),
+            (
+                torch.arange(3).expand(2, 3)
+                >= torch.tensor([3, 2], dtype=torch.int64)[:, None],
+            ),
+            torch.tensor(0.0, dtype=torch.float32),
+            False,
+        ),
+        0,
+    ),
     "none_indices": (
         lambda: (
             torch.ones((5, 3, 2, 2), dtype=torch.float32),
@@ -210,6 +222,62 @@
         ),
         0,
     ),
+    "none_and_bool_indices_scalar": (
+        lambda: (
+            torch.randn((2, 3, 4), dtype=torch.float32),
+            (None, torch.tensor([True, False, True]), None),
+            torch.tensor(0.0, dtype=torch.float32),
+            False,
+        ),
+        0,
+    ),
+}
+mixed_indices_not_supported = {
+    "bool_and_tensor_indices_scalar": (
+        lambda: (
+            torch.randn((2, 3, 4), dtype=torch.float32),
+            (
+                torch.tensor([True, False]),
+                torch.tensor([1, 2], dtype=torch.int64),
+            ),
+            torch.tensor(0.0, dtype=torch.float32),
+            False,
+        ),
+        0,
+    ),
+    "bool_mask_tensor": (
+        lambda: (
+            torch.randn((2, 3, 4), dtype=torch.float32),
+            (torch.tensor([True, False]),),
+            torch.rand((1, 3, 4), dtype=torch.float32),
+            False,
+        ),
+        0,
+    ),
+    "two_bool_mask_scalar": (
+        lambda: (
+            torch.randn((2, 3, 4), dtype=torch.float32),
+            (
+                torch.tensor([False, True]),
+                torch.tensor([True, False, False]),
+            ),
+            torch.tensor(0.0, dtype=torch.float32),
+            False,
+        ),
+        0,
+    ),
+    "two_bool_mask_tensor": (
+        lambda: (
+            torch.randn((2, 3, 4), dtype=torch.float32),
+            (
+                torch.tensor([False, True]),
+                torch.tensor([True, False, False]),
+            ),
+            torch.rand((1, 4), dtype=torch.float32),
+            False,
+        ),
+        0,
+    ),
 }
 test_data_int = {
     "rank3_zeros_int8": (
@@ -385,3 +453,28 @@ def test_index_put_vgf_quant(test_module: input_t):
         exir_op=IndexPut.exir_op,
     )
     pipeline.run()
+
+
+@common.parametrize("test_module", mixed_indices_not_supported)
+def test_index_put_tosa_FP_not_delegated(test_module: input_t):
+    pipeline = OpNotSupportedPipeline[input_t](
+        IndexPut(),
+        test_module[0](),
+        {IndexPut.exir_op: 1},
+        quantize=False,
+        u55_subset=False,
+        n_expected_delegates=0,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_module", mixed_indices_not_supported)
+def test_index_put_tosa_INT_not_delegated(test_module: input_t):
+    pipeline = OpNotSupportedPipeline[input_t](
+        IndexPut(),
+        test_module[0](),
+        {IndexPut.exir_op: 1},
+        quantize=True,
+        n_expected_delegates=0,
+    )
+    pipeline.run()

Original file line number	Diff line number	Diff line change
`@@ -124,7 +124,6 @@`
`124`	`124`	`exir_ops.edge.aten.bitwise_not.default,`
`125`	`125`	`exir_ops.edge.aten.copy.default,`
`126`	`126`	`exir_ops.edge.aten.tan.default,`
`127`		`- exir_ops.edge.aten.index_put.default,`
`128`	`127`	`exir_ops.edge.aten.silu.default,`
`129`	`128`	`exir_ops.edge.aten.detach_copy.default,`
`130`	`129`	`}`
`@@ -249,7 +248,6 @@`
`249`	`248`	`exir_ops.edge.aten.copy.default,`
`250`	`249`	`exir_ops.edge.aten.floor_divide.default,`
`251`	`250`	`exir_ops.edge.aten.tan.default,`
`252`		`- exir_ops.edge.aten.index_put.default,`
`253`	`251`	`exir_ops.edge.aten.detach_copy.default,`
`254`	`252`	`}`
`255`	`253`