Qualcomm AI Engine Direct - Resolved RMSNorm issue without weight (pytorch#18219)

shewu-quic · web-flow · commit 75c85e7e0682 · 2026-03-17T12:49:53.000-05:00
diff --git a/backends/qualcomm/builders/op_rms_norm.py b/backends/qualcomm/builders/op_rms_norm.py
@@ -61,8 +61,25 @@ def define_node(
         axes = [node.args[0].meta["val"].dim() - 1]
         axes_shape = [len(axes)]
 
-        weight_node = self.get_node(node.args[2])
-        weight_tensor = get_parameter(weight_node, self.edge_program)
+        has_weight = len(node.args) > 2 and node.args[2] is not None
+        if has_weight:
+            weight_node = self.get_node(node.args[2])
+            weight_tensor = get_parameter(weight_node, self.edge_program)
+        else:
+            # elementwise_affine=False: use all-ones weight as identity
+            weight_tensor = torch.ones(normalized_shapes, dtype=torch.float32)
+            weight_node = torch.fx.Node(
+                node.graph,
+                node.name + "_runtime_weight",
+                "call_function",
+                exir_ops.edge.aten.tensor.default,
+                (),
+                {},
+            )
+            if quant_attrs := node.meta.get(QCOM_QUANT_ATTRS):
+                quant_attrs = quant_attrs.copy()
+                quant_attrs[QCOM_ZERO_POINT] = 0
+                weight_node.meta[QCOM_QUANT_ATTRS] = quant_attrs
         weight_tensor_wrapper = self.define_tensor(
             weight_node,
             node,
diff --git a/backends/qualcomm/quantizer/annotators/htp_rules.py b/backends/qualcomm/quantizer/annotators/htp_rules.py
@@ -1289,7 +1289,6 @@ def annotate(node: Node, quantization_config: QuantizationConfig) -> None:
             return
 
         act_node = node.args[0]
-        weight_node = node.args[2]
 
         # TODO current only support 16a16w
         annotate_input_qspec_map(
@@ -1298,11 +1297,13 @@ def annotate(node: Node, quantization_config: QuantizationConfig) -> None:
             quantization_config.input_activation,
         )
 
-        annotate_input_qspec_map(
-            node,
-            weight_node,
-            quantization_config.input_activation,
-        )
+        if len(node.args) > 2 and node.args[2] is not None:
+            weight_node = node.args[2]
+            annotate_input_qspec_map(
+                node,
+                weight_node,
+                quantization_config.input_activation,
+            )
         nodes_to_mark_annotated = [node]
         annotate_output_qspec(node, quantization_config.output_activation)
         _mark_nodes_as_annotated(nodes_to_mark_annotated)
diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py
@@ -1897,11 +1897,11 @@ def forward(self, x):
 
 
 class RmsNorm(torch.nn.Module):
-    def __init__(self, eps=None):
+    def __init__(self, eps=None, elementwise_affine=True):
         super().__init__()
-        self.rms = torch.nn.RMSNorm([4])
+        self.rms = torch.nn.RMSNorm([4], elementwise_affine=elementwise_affine)
         if eps:
-            self.rms = torch.nn.RMSNorm([4], eps)
+            self.rms = torch.nn.RMSNorm([4], eps, elementwise_affine=elementwise_affine)
 
     def forward(self, x):
         return self.rms(x)
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -1630,6 +1630,7 @@ def test_qnn_backend_rms_norm(self):
         modules = [
             RmsNorm(),  # noqa: F405
             RmsNorm(eps=1e-5),  # noqa: F405
+            RmsNorm(elementwise_affine=False),  # noqa: F405
         ]
         sample_input = (torch.randn([1, 1, 1, 4]),)
         for i, module in enumerate(modules):
@@ -3958,6 +3959,7 @@ def test_qnn_backend_rms_norm(self):
         modules = [
             RmsNorm(),  # noqa: F405
             RmsNorm(eps=1e-5),  # noqa: F405
+            RmsNorm(elementwise_affine=False),  # noqa: F405
         ]
         sample_input = (torch.randn([1, 1, 1, 4]),)
         for i, module in enumerate(modules):