CodeLinaro
diff --git a/‎backends/qualcomm/debugger/README.md‎
Lines changed: 77 additions & 1 deletion b/‎backends/qualcomm/debugger/README.md‎
Lines changed: 77 additions & 1 deletion
diff --git a/‎backends/qualcomm/export_utils.py‎
Lines changed: 5 additions & 0 deletions b/‎backends/qualcomm/export_utils.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/qualcomm/runtime/QnnBackendOptions.cpp‎
Lines changed: 8 additions & 0 deletions b/‎backends/qualcomm/runtime/QnnBackendOptions.cpp‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/qualcomm/runtime/QnnBackendOptions.h‎
Lines changed: 13 additions & 0 deletions b/‎backends/qualcomm/runtime/QnnBackendOptions.h‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎backends/qualcomm/runtime/QnnExecuTorch.h‎
Lines changed: 1 addition & 0 deletions b/‎backends/qualcomm/runtime/QnnExecuTorch.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/qualcomm/runtime/QnnExecuTorchBackend.cpp‎
Lines changed: 12 additions & 0 deletions b/‎backends/qualcomm/runtime/QnnExecuTorchBackend.cpp‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/qualcomm/runtime/QnnExecuTorchBackend.h‎
Lines changed: 1 addition & 0 deletions b/‎backends/qualcomm/runtime/QnnExecuTorchBackend.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/qualcomm/runtime/backends/QnnBackendFactory.cpp‎
Lines changed: 2 additions & 1 deletion b/‎backends/qualcomm/runtime/backends/QnnBackendFactory.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/qualcomm/runtime/backends/QnnContextCommon.cpp‎
Lines changed: 70 additions & 3 deletions b/‎backends/qualcomm/runtime/backends/QnnContextCommon.cpp‎
Lines changed: 70 additions & 3 deletions
diff --git a/‎backends/qualcomm/runtime/backends/QnnContextCommon.h‎
Lines changed: 21 additions & 2 deletions b/‎backends/qualcomm/runtime/backends/QnnContextCommon.h‎
Lines changed: 21 additions & 2 deletions
@@ -78,7 +78,7 @@ qairt_visualizer.view(reports=[optrace, qhas])
 - `model`: Path to your QNN model file (e.g., `path_to_your_model.dlc`).
 - **`reports`**: List of report file paths, including the optrace (`optrace.json`) and QHAS (`optrace_qnn_htp_analysis_summary.json`).
 
-Note: Files ending with `.bin ` do not support graph visualization in qairt_visualizer.
+Note: Files ending with `.bin` do not support graph visualization in qairt_visualizer.
 
 ## Demo
 
@@ -266,3 +266,79 @@ python -m examples.qualcomm.util_scripts.qnn_intermediate_debugger_demo -b build
 3. Does not support graphs with partitions (partial delegation).
 4. Does not support LLM models.
 5. Does not support graphs with multiple methods.
+
+
+## ExecuTorch QNN HTP Heap Profiling
+
+Measures DSP memory usage when using context binary models on the HTP backend.
+
+### Introduction
+
+DSP heap profiling is available for `QnnContext_createFromBinary` use-cases. It captures total DSP heap usage at two checkpoints:
+
+- **Before the first context is created** (`before_context_created`)
+- **After the last context is freed** (`after_context_freed`)
+
+The difference between the two values represents heap consumed during context execution. The value after freeing is typically equal to or greater than before creation.
+
+### Instructions
+
+#### Run the example test
+
+```bash
+python backends/qualcomm/tests/test_qnn_delegate.py \
+    TestQNNQuantizedUtils.test_qnn_backend_runtime_option_heap_profile \
+    -b build-android -H ${HOST} -s ${SN} -m ${SOC_MODEL}
+```
+
+See [test_qnn_delegate.py](../tests/test_qnn_delegate.py) for the full test implementation.
+
+#### Setting
+
+```python
+from executorch.backends.qualcomm.utils.utils import generate_htp_compiler_spec
+from executorch.backends.qualcomm.utils.utils import generate_qnn_executorch_compiler_spec
+
+backend_options = generate_htp_compiler_spec(
+    use_multi_contexts=True,
+)
+
+compiler_specs = generate_qnn_executorch_compiler_spec(
+    soc_model=self.chipset_table[TestQNN.soc_model],
+    backend_options=backend_options,
+    profile_level=2,
+)
+
+# ...
+
+self.verify_output(
+    module,
+    sample_input,
+    exec_prog,
+    save_heap_result=True,
+)
+```
+
+#### Output file format
+
+The result is written to a text file (default: `htp_heap_usage.txt`) with two lines:
+
+```
+DSP:before_context_created (bytes), <value>
+DSP:after_context_freed (bytes), <value>
+```
+
+#### Reference result
+
+Measured on SM8850. A difference of 0 means no additional heap is consumed during context binary execution.
+
+```console
+First value (before_context_created): 928212 bytes
+Second value (after_context_freed): 928212 bytes
+difference: 0.00 bytes
+```
+
+### Limitations
+
+1. Only supported HTP backend on Android and QNX platforms.
+2. By enabling this feature, initialization and cleanup time might be impacted.
@@ -493,6 +493,11 @@ def pull_debug_output(self, etdump_path, debug_ouput_path, callback=None):
         if callback:
             callback()
 
+    def pull_heap_output(self, src_file_path, dst_folder, callback=None):
+        self._adb(["pull", src_file_path, dst_folder])
+        if callback:
+            callback()
+
 
 def build_executorch_binary(
     model: torch.nn.Module,  # noqa: B006
 
@@ -52,6 +52,14 @@ template QnnExecuTorchProfileLevel get_option<QnnExecuTorchProfileLevel>(
     QnnExecuTorchProfileLevel,
     const char*);
 
+executorch::runtime::Error get_runtime_option(
+    const char* key,
+    executorch::runtime::BackendOption& backend_option) {
+  std::strncpy(backend_option.key, key, runtime::kMaxOptionKeyLength);
+  backend_option.key[runtime::kMaxOptionKeyLength - 1] = '\0';
+  return get_option(QNN_BACKEND, backend_option);
+}
+
 } // namespace qnn
 } // namespace backends
 } // namespace executorch
@@ -37,6 +37,19 @@ struct RuntimeOption {
 template <typename T>
 T get_option(T aot_option, const char* aot_key);
 
+/**
+ * @brief
+ * Get the backend option.
+ * This method checks runtime option only.
+ *
+ * @param key The key of runtime option.
+ * @param backend_option The backend_option to be restored in runtime.
+ */
+
+executorch::runtime::Error get_runtime_option(
+    const char* key,
+    executorch::runtime::BackendOption& backend_option);
+
 } // namespace qnn
 } // namespace backends
 } // namespace executorch
@@ -25,6 +25,7 @@
 #define QNN_RUNTIME_LPAI_CLIENT_PERF_TYPE "qnn_runtime_lpai_client_perf_type"
 #define QNN_RUNTIME_LPAI_AFFINITY "qnn_runtime_lpai_affinity"
 #define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection"
+#define QNN_RUNTIME_HEAP_PROFILING_PATH "qnn_runtime_heap_profiling_path"
 
 #ifdef __cplusplus
 extern "C" {
 
@@ -245,6 +245,13 @@ executorch::runtime::Error QnnExecuTorchBackend::set_option(
         qnn_runtime_lpai_core_selection_.value = *val;
         qnn_runtime_lpai_core_selection_.is_set = true;
       }
+    } else if (strcmp(option.key, QNN_RUNTIME_HEAP_PROFILING_PATH) == 0) {
+      if (auto* val =
+              std::get_if<std::array<char, runtime::kMaxOptionValueLength>>(
+                  &option.value)) {
+        qnn_runtime_heap_profiling_path_.value = *val;
+        qnn_runtime_heap_profiling_path_.is_set = true;
+      }
     } else {
       ET_LOG(
           Error,
@@ -268,6 +275,7 @@ executorch::runtime::Error QnnExecuTorchBackend::get_option(
     executorch::runtime::BackendOptionContext& context,
     executorch::runtime::Span<executorch::runtime::BackendOption>&
         backend_options) {
+  std::lock_guard<std::mutex> guard(runtime_option_mutex_);
   size_t matches = backend_options.size();
   for (size_t i = 0; i < backend_options.size(); ++i) {
     // Set the value to what was stored by set_option
@@ -303,6 +311,10 @@ executorch::runtime::Error QnnExecuTorchBackend::get_option(
         strcmp(backend_options[i].key, QNN_RUNTIME_LPAI_CORE_SELECTION) == 0 &&
         qnn_runtime_lpai_core_selection_.is_set) {
       backend_options[i].value = qnn_runtime_lpai_core_selection_.value;
+    } else if (
+        strcmp(backend_options[i].key, QNN_RUNTIME_HEAP_PROFILING_PATH) == 0 &&
+        qnn_runtime_heap_profiling_path_.is_set) {
+      backend_options[i].value = qnn_runtime_heap_profiling_path_.value;
     } else {
       // either runtime never called set_option or key does not exist
       matches--;
 
@@ -71,6 +71,7 @@ class QnnExecuTorchBackend final
   RuntimeOption qnn_runtime_lpai_client_perf_type_{false, 0};
   RuntimeOption qnn_runtime_lpai_affinity_{false, 0};
   RuntimeOption qnn_runtime_lpai_core_selection_{false, 0};
+  RuntimeOption qnn_runtime_heap_profiling_path_{false, {}};
 };
 
 } // namespace qnn
 
@@ -71,7 +71,8 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
           qnn_device_ptr,
           backend_params->qnn_backend_cache_ptr_.get(),
           htp_options,
-          qnn_dlc_manager);
+          qnn_dlc_manager,
+          get_option(options->profile_level(), QNN_RUNTIME_PROFILE_LEVEL));
 
       backend_params->qnn_graph_ptr_ = std::make_unique<HtpGraph>(
           implementation_ptr,
 
@@ -6,26 +6,63 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
 
 namespace executorch {
 namespace backends {
 namespace qnn {
 
+std::mutex QnnContext::htp_context_mutex_;
+int QnnContext::htp_context_count_{0};
+
+void QnnContext::WriteHeapProfile() {
+  executorch::runtime::BackendOption backend_option;
+  std::string heap_profiling_path;
+  if (get_runtime_option(QNN_RUNTIME_HEAP_PROFILING_PATH, backend_option) ==
+      Error::Ok) {
+    auto* arr = std::get_if<std::array<char, runtime::kMaxOptionValueLength>>(
+        &backend_option.value);
+    if (arr) {
+      heap_profiling_path = arr->data();
+    }
+  }
+  Qnn_ErrorHandle_t error_profile =
+      qnn_profiler_->ProfileDataToFile(heap_profiling_path);
+  if (error_profile != QNN_SUCCESS) {
+    QNN_EXECUTORCH_LOG_ERROR(
+        "Failed to profile. Cannot get profile from handle. Error %d",
+        QNN_GET_ERROR_CODE(error_profile));
+  }
+}
+
 QnnContext::~QnnContext() {
   const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
+
   if (handle_ != nullptr) {
     QNN_EXECUTORCH_LOG_INFO("Destroy Qnn context");
-    error = qnn_interface.qnn_context_free(handle_, /*profile=*/nullptr);
+
+    bool do_heap_profile = false;
+    {
+      std::lock_guard<std::mutex> lock(htp_context_mutex_);
+      if (is_htp_backend_ && htp_context_count_ > 0 && need_to_profile_) {
+        --htp_context_count_;
+        do_heap_profile = (htp_context_count_ == 0);
+      }
+    }
+    error = qnn_interface.qnn_context_free(
+        handle_, do_heap_profile ? qnn_profiler_->GetHandle() : nullptr);
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_ERROR(
           "Failed to free QNN "
           "context_handle_. Backend "
           "ID %u, error %d",
           qnn_interface.GetBackendId(),
           QNN_GET_ERROR_CODE(error));
+    } else if (do_heap_profile) {
+      WriteHeapProfile();
     }
     handle_ = nullptr;
   }
@@ -45,21 +82,51 @@ Error QnnContext::Configure() {
   if (cache_->GetCacheState() == QnnBackendCache::DESERIALIZE) {
     const QnnExecuTorchContextBinary& qnn_context_blob =
         cache_->GetQnnContextBlob();
+    /*
+    Total DSP heap usage can be measured in two conditions, first context
+    creation and last context free. By the QNN documentation, we need to insert
+    profileHandle in qnn_context_create_from_binary when creating first context
+    and closing last context.
+
+    Limitations are two:
+    1.Only supported on Android and QNX platforms.
+    2.By enabling this feature initialization and cleanup time might be
+    impacted.
+    */
+
+    bool do_heap_profile = false;
+    {
+      std::lock_guard<std::mutex> lock(htp_context_mutex_);
+      do_heap_profile =
+          is_htp_backend_ && (htp_context_count_ == 0) && need_to_profile_;
+      if (is_htp_backend_) {
+        ++htp_context_count_;
+      }
+    }
 
     error = qnn_interface.qnn_context_create_from_binary(
         backend_->GetHandle(),
         device_->GetHandle(),
-        temp_context_config.empty() ? nullptr : temp_context_config.data(),
+        (temp_context_config.empty() ? nullptr : temp_context_config.data()),
         static_cast<uint8_t*>(qnn_context_blob.buffer),
         qnn_context_blob.nbytes,
         &handle_,
-        /*profile=*/nullptr);
+        do_heap_profile ? qnn_profiler_->GetHandle() : nullptr);
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_ERROR(
           "Can't create context from "
           "binary. Error %d.",
           QNN_GET_ERROR_CODE(error));
+      // Rollback the count since context creation failed
+      {
+        std::lock_guard<std::mutex> lock(htp_context_mutex_);
+        if (is_htp_backend_ && htp_context_count_ > 0) {
+          --htp_context_count_;
+        }
+      }
       return Error::Internal;
+    } else if (do_heap_profile) {
+      WriteHeapProfile();
     }
   } else if (
       cache_->GetCacheState() == QnnBackendCache::SERIALIZE ||
 
@@ -13,7 +13,10 @@
 #include <executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>
 
+#include <executorch/backends/qualcomm/runtime/backends/QnnProfiler.h>
+
 #include <memory>
+#include <mutex>
 
 namespace executorch {
 namespace backends {
@@ -28,13 +31,22 @@ class QnnContext {
       QnnBackend* backend,
       QnnDevice* device,
       QnnBackendCache* cache,
-      QnnDlcManager* qnn_dlc_manager)
+      QnnDlcManager* qnn_dlc_manager,
+      const QnnExecuTorchProfileLevel& profile_level)
       : handle_(nullptr),
         implementation_(implementation),
         backend_(backend),
         device_(device),
         cache_(cache),
-        qnn_dlc_manager_(qnn_dlc_manager) {}
+        qnn_dlc_manager_(qnn_dlc_manager),
+        is_htp_backend_(
+            implementation->GetQnnInterface().GetBackendId() ==
+            QNN_BACKEND_ID_HTP),
+        need_to_profile_(
+            profile_level != QnnExecuTorchProfileLevel::kProfileOff) {
+    qnn_profiler_ =
+        std::make_unique<QnnProfile>(implementation_, backend_, profile_level);
+  }
 
   virtual ~QnnContext();
 
@@ -73,13 +85,20 @@ class QnnContext {
   };
 
  private:
+  void WriteHeapProfile();
   Qnn_ContextHandle_t handle_;
   QnnImplementation* implementation_;
   QnnBackend* backend_;
   QnnDevice* device_;
   QnnBackendCache* cache_;
   QnnContextCustomProtocol qnn_context_custom_protocol_;
   QnnDlcManager* qnn_dlc_manager_;
+
+  std::unique_ptr<QnnProfile> qnn_profiler_;
+  bool is_htp_backend_;
+  bool need_to_profile_;
+  static std::mutex htp_context_mutex_;
+  static int htp_context_count_;
 };
 } // namespace qnn
 } // namespace backends