CodeLinaro
diff --git a/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 4 additions & 1 deletion b/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py‎
Lines changed: 14 additions & 3 deletions b/‎backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp‎
Lines changed: 3 additions & 1 deletion b/‎backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/qualcomm/builders/qnn_constants.py‎
Lines changed: 5 additions & 0 deletions b/‎backends/qualcomm/builders/qnn_constants.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/qualcomm/quantizer/README.md‎
Lines changed: 169 additions & 46 deletions b/‎backends/qualcomm/quantizer/README.md‎
Lines changed: 169 additions & 46 deletions
@@ -18,6 +18,9 @@ import argparse
 
 import torch
 from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer
+from executorch.backends.qualcomm.serialization.qc_schema import (
+    QnnExecuTorchBackendType,
+)
 from executorch.backends.qualcomm.utils.utils import (
     generate_htp_compiler_spec,
     generate_qnn_executorch_compiler_spec,
@@ -50,7 +53,7 @@ def main() -> None:
     example_inputs = model.get_example_inputs()
 
     if args.quantization:
-        quantizer = QnnQuantizer()
+        quantizer = QnnQuantizer(backend=QnnExecuTorchBackendType.kHtpBackend, soc_model=get_soc_to_chipset_map()[args.soc])
         m = torch.export.export(model.eval(), example_inputs, strict=True).module()
         if args.quantization == "qat":
             m = prepare_qat_pt2e(m, quantizer)
 
@@ -4,8 +4,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import torch
-from executorch.backends.qualcomm.builders.node_visitor import q_ops
+from executorch.backends.qualcomm.builders.node_visitor import dq_ops, q_ops
 from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
+from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
 
@@ -25,17 +26,27 @@ def __init__(self, edge_program: torch.export.ExportedProgram):
 
     def _annotate_adaptive_avg_pool1d(self, graph_module: torch.fx.GraphModule):
         partitions = get_source_partitions(
-            graph_module.graph, [torch.ops.aten.adaptive_avg_pool1d.default]
+            graph_module.graph,
+            [torch.ops.aten.adaptive_avg_pool1d.default, torch.adaptive_avg_pool1d],
         )
         for src_partitions in partitions.values():
             for src_partition in src_partitions:
+                input_node = src_partition.input_nodes[0]
+                if input_node.target in dq_ops:
+                    quant_attrs = get_quant_attrs(self.edge_program, input_node)
+                    for n in src_partition.nodes:
+                        if n.target == exir_ops.edge.aten.unsqueeze_copy.default:
+                            n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()
+
                 output = src_partition.output_nodes[0]
                 if (list(output.users)[0].target) in q_ops:
                     quant_attrs = get_quant_attrs(
                         self.edge_program, list(output.users)[0]
                     )
                     for n in src_partition.nodes:
-                        n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()
+                        # For adaptive_avg_pool2d and squeeze
+                        if n.target != exir_ops.edge.aten.unsqueeze_copy.default:
+                            n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()
 
     def call(self, graph_module: torch.fx.GraphModule):
         self._annotate_adaptive_avg_pool1d(graph_module)
 
@@ -163,8 +163,10 @@ std::string GetQnnSdkBuildId(std::string library_path) {
   if (err != QNN_SUCCESS || id == nullptr) {
     throw std::runtime_error("Failed to get QNN backend build ID");
   }
+  // Copy id to avoid dangling pointer.
+  std::string build_id(id);
   qnn_loaded_backend.Unload();
-  return std::string(id);
+  return build_id;
 }
 
 py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
 
@@ -167,6 +167,11 @@ class OpElementWiseFloor:
     op_name: str = "ElementWiseFloor"
 
 
+@dataclass(init=False, frozen=True)
+class OpElementWiseFloorDiv:
+    op_name: str = "ElementWiseFloorDiv"
+
+
 @dataclass(init=False, frozen=True)
 class OpElementWiseGreater:
     op_name: str = "ElementWiseGreater"
 
@@ -43,24 +43,61 @@ In order to conduct PTQ for floating point precision graph, observers are requir
 Qualcomm backend will consume the generated encodings and lower operators with fixed precision. This tutorial will guide you through the details of inserting observer and some useful utilities.
 
 ### Register Annotation via Operator Type
-Let's start with hooking callback for designated operator target:
+Let's start with hooking callback for designated operator target in `annotators/{backend}_rules.py`:
 ```python
-def register_annotator(ops: List[OpOverload]):
-    def decorator(annotator: Callable):
-        for op in ops:
-            OP_ANNOTATOR[op] = annotator
+def register_annotator(aten_ops: List[OpOverload], qnn_op: Optional[str]):
+    def _wrap(op_def: GeneralOpDef):
+        for aten_op in aten_ops:
+            annotate_fn = op_def.annotate
+            validate_fn = op_def.validate
+            rule = OpQuantRule(
+                aten_op=aten_op,
+                qnn_op=qnn_op,
+                annotate_fn=annotate_fn,
+                validate_fn=validate_fn,
+            )
+            _RULES[rule.aten_op] = rule
+        return rule
 
-    return decorator
+    return _wrap
 ```
-The `register_annotator` decorator provides a convenient way to attach your own annotation logic, which requires list of operator type as its input argument.<br/> For example, the torch activation functions have `copy`, `in-place` implementation with small difference appears in naming (an extra `_` postfix), which will map to the same [Core ATen](https://pytorch.org/docs/stable/torch.compiler_ir.html) operators after `to_edge`:
+The `register_annotator` decorator provides a convenient way to attach your own annotation and validation logic, which requires list of operator type as its input argument and a QNN operation name<br/> For example, the torch activation functions have `copy`, `in-place` implementation with small difference appears in naming (an extra `_` postfix), which will map to the same [Core ATen](https://pytorch.org/docs/stable/torch.compiler_ir.html) operators after `to_edge`:
 ```python
-@register_annotator([torch.ops.aten.relu.default, torch.ops.aten.relu_.default])
+@register_annotator(
+    [torch.ops.aten.relu.default, torch.ops.aten.relu_.default],
+    QnnConstants.OpRelu.op_name,
+)
+```
+Where `torch.ops.aten.relu.default` / `torch.ops.aten.relu_.default` map to `copy` / `in-place` version and both will be converted into `torch.ops.aten.relu.default` ultimately.<br/>
+The `qnn_op` is used to specify quantization constraints for validation with the `BackendOpInfo` library. If an operator doesn’t directly correspond to a QNN operator, you can set its value to `None`, which will skip validation for that operator.
+```python
+@register_annotator([operator.getitem], qnn_op=None)
+```
+The `operator.getitem` function acts as a skip operator in the QNN backend and does not correspond to any QNN operator. Therefore, we assign `qnn_op=None`.<br/><br>
+
+Create a base class `GeneralOpDef` that establishes the standard annotation and validation function behaviors.
+```python
+class GeneralOpDef:
+    @staticmethod
+    def annotate(node: Node, quantization_config: QuantizationConfig):
+        annotate_single_in_single_out(node, quantization_config)
+
+    @staticmethod
+    def validate(
+        node: Node, constraints_list: List[NormalizedConstraints], soc_info: SocInfo
+    ) -> bool:
+        valid = True
+        # If there's no quantization annotation, we can't validate against constraints.
+        if not _is_annotated([node]):
+            return valid
+        valid &= validate_against_backend_constraints(node, constraints_list)
+        return valid
 ```
-Where `torch.ops.aten.relu.default` / `torch.ops.aten.relu_.default` map to `copy` / `in-place` version and both will be converted into `torch.ops.aten.relu.default` ultimately.<br/><br>
 
-The function signature is defined as follow with two arguments:
+The `annotate` function signature is defined as follow with two arguments:
 ```python
-def annotate_xxx(node: Node, quantization_config: QuantizationConfig) -> None:
+@staticmethod
+def annotate(node: Node, quantization_config: QuantizationConfig) -> None:
 ```
 - __node__: graph node required to be observed
 - __quantization_config__: data structure describing quantization configurations for IO activation / weight / bias
@@ -112,75 +149,161 @@ Now, we can start to fill in the function body:
     ```python
     @register_annotator(
         [
-            torch.ops.aten.conv2d.default,
             torch.ops.aten.conv1d.default,
-            torch.ops.aten.conv_transpose2d.input,
+            torch.ops.aten.conv2d.default,
+            torch.ops.aten.conv2d.padding,
+            torch.ops.aten.convolution.default,
         ]
     )
-    def annotate_conv2d(node: Node, quantization_config: QuantizationConfig) -> None:
+    class Conv2d(GeneralOpDef):
     ```
     There are multiple targets expected to meet our annotation criteria, it's encouraged to do so for code reuse.
-
+- Define a annotation function interface
+    ```python
+        @staticmethod
+        def annotate(node: Node, quantization_config: QuantizationConfig) -> None:
+    ```
 - Define map of input quantization spec
     ```python
-        if _is_annotated([node]):
-            return
+            if _is_annotated([node]):
+                return
+
+            # block quantization
+            if quantization_config.block_size is not None:
+                quantization_config.weight.observer_or_fake_quant_ctr.p.keywords.update(
+                    {QCOM_BLOCK_SIZE: quantization_config.block_size}
+                )
 
-        input_qspec_map = {}
+            input_qspec_map = {}
 
-        # annotate input activation
-        input_act = node.args[0]
-        input_spec = quantization_config.input_activation
-        input_qspec_map[input_act] = input_spec
+            # annotate input activation
+            input_act = node.args[0]
+            input_spec = quantization_config.input_activation
+            input_qspec_map[input_act] = input_spec
 
-        # annotate kernel
-        kernel = node.args[1]
-        input_qspec_map[kernel] = quantization_config.weight
+            # annotate kernel
+            kernel = node.args[1]
+            input_qspec_map[kernel] = quantization_config.weight
 
-        # annotate bias
-        if len(node.args) > 2:
-            bias = node.args[2]
-            input_qspec_map[bias] = quantization_config.bias(node)
+            # annotate bias
+            if len(node.args) > 2:
+                bias = node.args[2]
+                input_qspec_map[bias] = quantization_config.bias(node)
     ```
     We first check if current graph node has been annotated. If not, an `input_qspec_map` dictionary required by PyTorch framework will be declared for providing mapping between graph nodes and their configurations.<br/>
     The parameters' order could be found [here](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/Convolution.cpp) mentioned in [ATen Operator Definitions](#pytorch). Since bias node is optional, the implementation will invoke `_derived_bias_quant_spec` to calculate the per-channel bias encoding only if it exists.
 
 - Update node's meta with framework compatible data structure
     ```python
-        node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation(
-            input_qspec_map=input_qspec_map,
-            output_qspec=quantization_config.output_activation,
-            _annotated=True,
-        )
+            node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation(
+                input_qspec_map=input_qspec_map,
+                output_qspec=quantization_config.output_activation,
+                _annotated=True,
+            )
     ```
     After done processing `input_qspec_map`, it's required to have it in node's meta with special tag (`Q_ANNOTATION_KEY`) for `convert_pt2e` to properly insert observers.
 
+- Define a validation function interface
+    ```python
+        @staticmethod
+        def validate(
+            node: Node, constraints_list: List[NormalizedConstraints], soc_info: SocInfo
+        ) -> bool:
+    ```
+- Check if current node is annotated
+    ```python
+            valid = True
+            if not _is_annotated([node]):
+                return valid
+    ```
+- Check if current node supports LPBQ
+    ```python
+            weight_node = node.args[1]
+            weight_qspec = node.meta[Q_ANNOTATION_KEY].input_qspec_map.get(
+                weight_node, None
+            )
+            if (
+                weight_qspec
+                and weight_qspec.observer_or_fake_quant_ctr.p.keywords.get(
+                    QCOM_BLOCK_SIZE, None
+                )
+                is not None
+            ):
+                valid &= validate_lpbq_support(soc_info)
+                if not valid:
+                    logging.warning(
+                        f"LPBQ (16a4w block-wise quantization) requires V69 or newer for {node.name}"
+                    )
+    ```
+- Check if current node supports 16a16w quantization
+    ```python
+        act_node = node.args[0]
+        act_qspec = node.meta[Q_ANNOTATION_KEY].input_qspec_map.get(act_node, None)
+        if (
+            act_qspec
+            and act_qspec.dtype == torch.int32
+            and weight_qspec
+            and weight_qspec.dtype == torch.int32
+        ):
+            valid &= validate_16a16w_support(soc_info)
+            if not valid:
+                logging.warning(
+                    f"16-bit activations + 16-bit weights requires V73 or newer for {node.name}"
+                )
+    ```
+- Validate the current node against the backend constraints obtained from `BackendOpInfo` based on the `qnn_op`.
+    ```python
+        valid &= validate_against_backend_constraints(node, constraints_list)
+        return valid
+    ```
+    - Validate against the backend constraints by doing the following:
+      - Make sure that `SharedQuantizationSpec` is applied for `is_math_invariant` operator, such as view operations.
+      - Check the `scale` and `zero_point` values for specific operations. For example, sigmoid op requires `scale = 1 / (q_max - q_min + 1)` and `zero_point = 0`.
+      - Ensure that the `qscheme` satisfies symmetric constraints.
+      - Verify that the input and output `dtype` are supported.
 ### Common Annotators
 For operators without extra parameters to be observed, there are pre-defined annotation method for convenience:
 - Single in single out operators, e.g.:
     ```python
-    @register_annotator([torch.ops.aten.relu.default, torch.ops.aten.relu_.default])
-    def annotate_relu(node: Node, quantization_config: QuantizationConfig) -> None:
-        annotate_single_in_single_out(node, quantization_config)
+    @register_annotator(
+        [torch.ops.aten.relu.default, torch.ops.aten.relu_.default],
+        QnnConstants.OpRelu.op_name,
+    )
+    class Relu(GeneralOpDef):
+        pass
     ```
 
 - Binary in single out operators, e.g.:
     ```python
-    @register_annotator([torch.ops.aten.add, torch.ops.aten.add.Tensor])
-    def annotate_add(node: Node, quantization_config: QuantizationConfig) -> None:
-        annotate_binary(node, quantization_config)
+    @register_annotator(
+        [torch.ops.aten.add, torch.ops.aten.add.Tensor, torch.ops.aten.add_.Tensor],
+        QnnConstants.OpElementWiseAdd.op_name,
+    )
+    class Add(GeneralOpDef):
+        @staticmethod
+        def annotate(node: Node, quantization_config: QuantizationConfig) -> None:
+            annotate_binary(node, quantization_config)
     ```
 
 - Shared encodings between input / output, e.g.:<br/>
     ```python
     # For operators without arithmetical function, IOs are expected to own the same encodings.
-    @register_annotator([torch.ops.aten.transpose.int])
-    def annotate_transpose(node: Node, quantization_config: QuantizationConfig) -> None:
-        annotate_in_out_obs_sharing_op(node, quantization_config)
-        if not _is_annotated([node]):
-            annotate_single_in_single_out(node, quantization_config)
+   @register_annotator(
+        [
+            torch.ops.aten.permute.default,
+            torch.ops.aten.swapaxes.default,
+            torch.ops.aten.transpose.int,
+        ],
+        QnnConstants.OpTranspose.op_name,
+    )
+    class Permute(GeneralOpDef):
+        @staticmethod
+        def annotate(node: Node, quantization_config: QuantizationConfig) -> None:
+            annotate_in_out_obs_sharing_op(node, quantization_config)
+            if not _is_annotated([node]):
+                annotate_single_in_share_out(node, quantization_config)
     ```
-    This annotator only works for single-in-single-out scenario with node's input that has already been annotated. If not, we still need to invoke `annotate_single_in_single_out` again (this path should be less likely).
+    This annotator only works for single-in-single-out scenario with node's input that has already been annotated. If not, we still need to invoke `annotate_single_in_share_out` again (this path should be less likely).
 
 ## Issues
 Please refer to the [issue section](../README.md#issues) for more information.
Original file line number	Diff line number	Diff line change
`@@ -163,8 +163,10 @@ std::string GetQnnSdkBuildId(std::string library_path) {`
`163`	`163`	`if (err != QNN_SUCCESS \|\| id == nullptr) {`
`164`	`164`	`throw std::runtime_error("Failed to get QNN backend build ID");`
`165`	`165`	`}`
	`166`	`+ // Copy id to avoid dangling pointer.`
	`167`	`+ std::string build_id(id);`
`166`	`168`	`qnn_loaded_backend.Unload();`
`167`		`- return std::string(id);`
	`169`	`+ return build_id;`
`168`	`170`	`}`
`169`	`171`
`170`	`172`	`py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {`