Arm backend: Add multiple get_attr folding crash workaround (pytorch#19663)

AdrianLundell · web-flow · commit 41a38d8a8e1a · 2026-05-19T21:29:39.000+02:00
See description in the added test. The workaround implemented is to
create multiple attributes pointing to the same data source.


Signed-off-by: Adrian Lundell &lt;adrian.lundell@arm.com&gt;
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
@@ -97,6 +97,7 @@
 from .decompose_var_pass import DecomposeVarPass  # noqa
 from .decompose_where_scalar_other_pass import DecomposeWhereScalarOtherPass  # noqa
 from .decorate_fp32_to_int32_casting_pass import DecorateFp32toInt32CastingPass  # noqa
+from .deduplicate_get_attr_pass import DeduplicateGetAttrPass  # noqa
 from .ensure_unique_output_nodes_pass import EnsureUniqueOutputNodesPass  # noqa
 from .fold_qdq_with_annotated_qparams_pass import (  # noqa
     FoldAndAnnotateQParamsPass,
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -97,6 +97,7 @@
     DecomposeVarPass,
     DecomposeWhereScalarOtherPass,
     DecorateFp32toInt32CastingPass,
+    DeduplicateGetAttrPass,
     EnsureUniqueOutputNodesPass,
     FoldAndAnnotateQParamsPass,
     FuseBatchNorm2dPass,
@@ -651,6 +652,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
                 [
                     ReplaceInfAndLimitValuesPass(tfa_pass=True),
                     DecomposeMaskedFillPass(tfa_pass=True),
+                    DeduplicateGetAttrPass(tfa_pass=True),
                 ]
             )
 
diff --git a/backends/arm/_passes/deduplicate_get_attr_pass.py b/backends/arm/_passes/deduplicate_get_attr_pass.py
@@ -0,0 +1,71 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Set, Type
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch.fx import GraphModule, Node
+from torchao.quantization.pt2e.utils import get_new_attr_name_with_prefix
+
+
+class DeduplicateGetAttrPass(ArmPass):
+    """Give duplicate get_attr nodes distinct backing attributes.
+
+    Torchao's constant folder can delete a shared backing attribute while
+    another get_attr node still refers to it. Keep separate graph nodes so PT2E
+    can attach per-use observers and backend lowering can process constants per
+    use.
+
+    """
+
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
+    def _get_attr(self, graph_module: GraphModule, target: str) -> Any:
+        attr: Any = graph_module
+        for target_atom in target.split("."):
+            attr = getattr(attr, target_atom)
+        return attr
+
+    def _copy_attr(self, graph_module: GraphModule, node: Node) -> str:
+        """Register a new attribute referring to the same data as the original
+        one.
+        """
+
+        assert isinstance(node.target, str)
+        attr = self._get_attr(graph_module, node.target)
+        get_new_attr_name = get_new_attr_name_with_prefix(
+            f"_deduplicated_get_attr_{node.name}_"
+        )
+        attr_name = get_new_attr_name(graph_module)
+
+        if isinstance(attr, torch.nn.Parameter):
+            graph_module.register_parameter(attr_name, attr)
+        elif isinstance(attr, torch.Tensor):
+            graph_module.register_buffer(attr_name, attr)
+        else:
+            setattr(graph_module, attr_name, attr)
+
+        return attr_name
+
+    def call(self, graph_module: GraphModule) -> PassResult:
+        seen_targets: set[str] = set()
+        modified = False
+
+        for node in graph_module.graph.find_nodes(op="get_attr"):
+
+            if node.target not in seen_targets:
+                seen_targets.add(node.target)
+                continue
+
+            node.target = self._copy_attr(graph_module, node)
+            modified = True
+
+        if modified:
+            graph_module.graph.lint()
+            graph_module.recompile()
+
+        return PassResult(graph_module, modified)
diff --git a/backends/arm/test/quantizer/test_selective_quantization.py b/backends/arm/test/quantizer/test_selective_quantization.py
@@ -17,6 +17,7 @@
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import QuantizationPipeline
 from executorch.backends.arm.tosa import TosaSpecification
+from executorch.backends.cortex_m.test.tester import ramp_tensor
 from executorch.backends.test.harness.stages import StageType
 from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
 from torchvision import models, transforms  # type: ignore[import-untyped]
@@ -229,6 +230,20 @@ def test_composable_global_none_linear_graph_tail_tosa_INT():
 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 
 
+class SharedBufferEmbeddingLinearConstantFold(torch.nn.Module):
+
+    def __init__(self):
+        super().__init__()
+        self.shared = torch.nn.Embedding(4, 4)
+        self.lm_head = torch.nn.Linear(4, 4, bias=False)
+        self.lm_head.weight = self.shared.weight
+
+    def forward(self, ids, x):
+        y0 = self.shared(ids).sum(dim=1)
+        z = self.lm_head(x)
+        return y0 + z
+
+
 def test_mv3_selective_quant_int16_tosa_INT():
     model = mv3
     inputs = (normalize(torch.randn(1, 3, 224, 224)),)
@@ -302,3 +317,33 @@ def test_mv3_io_quant_tosa_INT():
     )
 
     pipeline.run()
+
+
+def test_multiple_folded_get_attr():
+    """In torchao/quantization/pt2e/constant_fold.py:constant_fold, get_attr
+    node targets are deleted as soon as there is one get_attr node w/o users
+    using the target.
+
+    If there are multiple get_attr nodes refering the same target such as in
+    this test, the function crashes if no workaround is present.
+
+    """
+
+    model = SharedBufferEmbeddingLinearConstantFold()
+    example_inputs = (
+        torch.tensor([[0, 1]], dtype=torch.long),
+        ramp_tensor(-2, 2, (1, 4)),
+    )
+
+    quantizer = get_quantizer()
+    quantizer.set_module_type(torch.nn.Embedding, None)
+
+    pipeline = QuantizationPipeline(
+        model,
+        example_inputs,
+        quantizer=quantizer,
+        qspecs=None,
+        input_qspecs=None,
+        output_qspecs=None,
+    )
+    pipeline.run()
diff --git a/backends/cortex_m/passes/cortex_m_pass_manager.py b/backends/cortex_m/passes/cortex_m_pass_manager.py
@@ -8,6 +8,7 @@
 from typing import Any, Optional, Type
 
 from executorch.backends.arm._passes import (
+    DeduplicateGetAttrPass,
     FoldAndAnnotateQParamsPass,
     ScalarsToAttributePass,
 )
@@ -52,6 +53,7 @@ class CortexMPassManager(PassManager):
         ReplaceScalarWithTensorArgPass,
         ClampHardswishPass,
         DecomposeMeanPass,
+        DeduplicateGetAttrPass,
     ]
 
     def __init__(

Original file line number	Diff line number	Diff line change
`@@ -97,6 +97,7 @@`
`97`	`97`	`DecomposeVarPass,`
`98`	`98`	`DecomposeWhereScalarOtherPass,`
`99`	`99`	`DecorateFp32toInt32CastingPass,`
	`100`	`+ DeduplicateGetAttrPass,`
`100`	`101`	`EnsureUniqueOutputNodesPass,`
`101`	`102`	`FoldAndAnnotateQParamsPass,`
`102`	`103`	`FuseBatchNorm2dPass,`
`@@ -651,6 +652,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):`
`651`	`652`	`[`
`652`	`653`	`ReplaceInfAndLimitValuesPass(tfa_pass=True),`
`653`	`654`	`DecomposeMaskedFillPass(tfa_pass=True),`
	`655`	`+ DeduplicateGetAttrPass(tfa_pass=True),`
`654`	`656`	`]`
`655`	`657`	`)`
`656`	`658`
Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@`
`8`	`8`	`from typing import Any, Optional, Type`
`9`	`9`
`10`	`10`	`from executorch.backends.arm._passes import (`
	`11`	`+ DeduplicateGetAttrPass,`
`11`	`12`	`FoldAndAnnotateQParamsPass,`
`12`	`13`	`ScalarsToAttributePass,`
`13`	`14`	`)`
`@@ -52,6 +53,7 @@ class CortexMPassManager(PassManager):`
`52`	`53`	`ReplaceScalarWithTensorArgPass,`
`53`	`54`	`ClampHardswishPass,`
`54`	`55`	`DecomposeMeanPass,`
	`56`	`+ DeduplicateGetAttrPass,`
`55`	`57`	`]`
`56`	`58`
`57`	`59`	`def __init__(`