Skip to content

Commit 576ed77

Browse files
authored
Revert "Qualcomm AI Engine Direct - heap profiling at runtime with HTP backend" (pytorch#19705)
Reverts pytorch#19224 This is failing internal CI tests: buck test [@fbcode](https://www.internalfb.com/intern/profile/fbcode)//mode/dev fbcode//executorch/backends/qualcomm/tests/fb:test_qnn_delegate_simulator -- --exact 'fbcode//executorch/backends/qualcomm/tests/fb:test_qnn_delegate_simulator - test_qnn_backend_runtime_option_heap_profile (executorch.backends.qualcomm.tests.fb.test_qnn_delegate_simulator.TestQNNQuantizedUtilsSimulator)' buck test [@fbcode](https://www.internalfb.com/intern/profile/fbcode)//mode/dev fbcode//executorch/backends/qualcomm/tests/fb:test_qnn_delegate_simulator -- --exact 'fbcode//executorch/backends/qualcomm/tests/fb:test_qnn_delegate_simulator - test_qnn_backend_runtime_option_heap_profile (executorch.backends.qualcomm.tests.fb.test_qnn_delegate_simulator.TestQNNFloatingPointUtilsSimulator)' ``` ====================================================================== ERROR: test_qnn_backend_runtime_option_heap_profile (executorch.backends.qualcomm.tests.fb.test_qnn_delegate_simulator.TestQNNFloatingPointUtilsSimulator) ---------------------------------------------------------------------- Traceback (most recent call last): File "/data/sandcastle/boxes/trunk-hg-full-fbsource/buck-out/v2/art/fbcode/4c4ca07d1cf3712f/executorch/backends/qualcomm/tests/fb/__test_qnn_delegate_simulator__/test_qnn_delegate_simulator#link-tree/executorch/backends/qualcomm/tests/test_qnn_delegate.py", line 5851, in test_qnn_backend_runtime_option_heap_profile self.verify_output( TypeError: TestQNNFloatingPointUtilsSimulator.verify_output() got an unexpected keyword argument 'save_heap_result' ```
1 parent 4f4fb09 commit 576ed77

22 files changed

Lines changed: 70 additions & 539 deletions

backends/qualcomm/debugger/README.md

Lines changed: 1 addition & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ qairt_visualizer.view(reports=[optrace, qhas])
7878
- `model`: Path to your QNN model file (e.g., `path_to_your_model.dlc`).
7979
- **`reports`**: List of report file paths, including the optrace (`optrace.json`) and QHAS (`optrace_qnn_htp_analysis_summary.json`).
8080

81-
Note: Files ending with `.bin` do not support graph visualization in qairt_visualizer.
81+
Note: Files ending with `.bin ` do not support graph visualization in qairt_visualizer.
8282

8383
## Demo
8484

@@ -266,79 +266,3 @@ python -m examples.qualcomm.util_scripts.qnn_intermediate_debugger_demo -b build
266266
3. Does not support graphs with partitions (partial delegation).
267267
4. Does not support LLM models.
268268
5. Does not support graphs with multiple methods.
269-
270-
271-
## ExecuTorch QNN HTP Heap Profiling
272-
273-
Measures DSP memory usage when using context binary models on the HTP backend.
274-
275-
### Introduction
276-
277-
DSP heap profiling is available for `QnnContext_createFromBinary` use-cases. It captures total DSP heap usage at two checkpoints:
278-
279-
- **Before the first context is created** (`before_context_created`)
280-
- **After the last context is freed** (`after_context_freed`)
281-
282-
The difference between the two values represents heap consumed during context execution. The value after freeing is typically equal to or greater than before creation.
283-
284-
### Instructions
285-
286-
#### Run the example test
287-
288-
```bash
289-
python backends/qualcomm/tests/test_qnn_delegate.py \
290-
TestQNNQuantizedUtils.test_qnn_backend_runtime_option_heap_profile \
291-
-b build-android -H ${HOST} -s ${SN} -m ${SOC_MODEL}
292-
```
293-
294-
See [test_qnn_delegate.py](../tests/test_qnn_delegate.py) for the full test implementation.
295-
296-
#### Setting
297-
298-
```python
299-
from executorch.backends.qualcomm.utils.utils import generate_htp_compiler_spec
300-
from executorch.backends.qualcomm.utils.utils import generate_qnn_executorch_compiler_spec
301-
302-
backend_options = generate_htp_compiler_spec(
303-
use_multi_contexts=True,
304-
)
305-
306-
compiler_specs = generate_qnn_executorch_compiler_spec(
307-
soc_model=self.chipset_table[TestQNN.soc_model],
308-
backend_options=backend_options,
309-
profile_level=2,
310-
)
311-
312-
# ...
313-
314-
self.verify_output(
315-
module,
316-
sample_input,
317-
exec_prog,
318-
save_heap_result=True,
319-
)
320-
```
321-
322-
#### Output file format
323-
324-
The result is written to a text file (default: `htp_heap_usage.txt`) with two lines:
325-
326-
```
327-
DSP:before_context_created (bytes), <value>
328-
DSP:after_context_freed (bytes), <value>
329-
```
330-
331-
#### Reference result
332-
333-
Measured on SM8850. A difference of 0 means no additional heap is consumed during context binary execution.
334-
335-
```console
336-
First value (before_context_created): 928212 bytes
337-
Second value (after_context_freed): 928212 bytes
338-
difference: 0.00 bytes
339-
```
340-
341-
### Limitations
342-
343-
1. Only supported HTP backend on Android and QNX platforms.
344-
2. By enabling this feature, initialization and cleanup time might be impacted.

backends/qualcomm/export_utils.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -493,11 +493,6 @@ def pull_debug_output(self, etdump_path, debug_ouput_path, callback=None):
493493
if callback:
494494
callback()
495495

496-
def pull_heap_output(self, src_file_path, dst_folder, callback=None):
497-
self._adb(["pull", src_file_path, dst_folder])
498-
if callback:
499-
callback()
500-
501496

502497
def build_executorch_binary(
503498
model: torch.nn.Module, # noqa: B006

backends/qualcomm/runtime/QnnBackendOptions.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,6 @@ template QnnExecuTorchProfileLevel get_option<QnnExecuTorchProfileLevel>(
5252
QnnExecuTorchProfileLevel,
5353
const char*);
5454

55-
executorch::runtime::Error get_runtime_option(
56-
const char* key,
57-
executorch::runtime::BackendOption& backend_option) {
58-
std::strncpy(backend_option.key, key, runtime::kMaxOptionKeyLength);
59-
backend_option.key[runtime::kMaxOptionKeyLength - 1] = '\0';
60-
return get_option(QNN_BACKEND, backend_option);
61-
}
62-
6355
} // namespace qnn
6456
} // namespace backends
6557
} // namespace executorch

backends/qualcomm/runtime/QnnBackendOptions.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,6 @@ struct RuntimeOption {
3737
template <typename T>
3838
T get_option(T aot_option, const char* aot_key);
3939

40-
/**
41-
* @brief
42-
* Get the backend option.
43-
* This method checks runtime option only.
44-
*
45-
* @param key The key of runtime option.
46-
* @param backend_option The backend_option to be restored in runtime.
47-
*/
48-
49-
executorch::runtime::Error get_runtime_option(
50-
const char* key,
51-
executorch::runtime::BackendOption& backend_option);
52-
5340
} // namespace qnn
5441
} // namespace backends
5542
} // namespace executorch

backends/qualcomm/runtime/QnnExecuTorch.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#define QNN_RUNTIME_LPAI_CLIENT_PERF_TYPE "qnn_runtime_lpai_client_perf_type"
2626
#define QNN_RUNTIME_LPAI_AFFINITY "qnn_runtime_lpai_affinity"
2727
#define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection"
28-
#define QNN_RUNTIME_HEAP_PROFILING_PATH "qnn_runtime_heap_profiling_path"
2928

3029
#ifdef __cplusplus
3130
extern "C" {

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,6 @@ executorch::runtime::Error QnnExecuTorchBackend::set_option(
245245
qnn_runtime_lpai_core_selection_.value = *val;
246246
qnn_runtime_lpai_core_selection_.is_set = true;
247247
}
248-
} else if (strcmp(option.key, QNN_RUNTIME_HEAP_PROFILING_PATH) == 0) {
249-
if (auto* val =
250-
std::get_if<std::array<char, runtime::kMaxOptionValueLength>>(
251-
&option.value)) {
252-
qnn_runtime_heap_profiling_path_.value = *val;
253-
qnn_runtime_heap_profiling_path_.is_set = true;
254-
}
255248
} else {
256249
ET_LOG(
257250
Error,
@@ -275,7 +268,6 @@ executorch::runtime::Error QnnExecuTorchBackend::get_option(
275268
executorch::runtime::BackendOptionContext& context,
276269
executorch::runtime::Span<executorch::runtime::BackendOption>&
277270
backend_options) {
278-
std::lock_guard<std::mutex> guard(runtime_option_mutex_);
279271
size_t matches = backend_options.size();
280272
for (size_t i = 0; i < backend_options.size(); ++i) {
281273
// Set the value to what was stored by set_option
@@ -311,10 +303,6 @@ executorch::runtime::Error QnnExecuTorchBackend::get_option(
311303
strcmp(backend_options[i].key, QNN_RUNTIME_LPAI_CORE_SELECTION) == 0 &&
312304
qnn_runtime_lpai_core_selection_.is_set) {
313305
backend_options[i].value = qnn_runtime_lpai_core_selection_.value;
314-
} else if (
315-
strcmp(backend_options[i].key, QNN_RUNTIME_HEAP_PROFILING_PATH) == 0 &&
316-
qnn_runtime_heap_profiling_path_.is_set) {
317-
backend_options[i].value = qnn_runtime_heap_profiling_path_.value;
318306
} else {
319307
// either runtime never called set_option or key does not exist
320308
matches--;

backends/qualcomm/runtime/QnnExecuTorchBackend.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ class QnnExecuTorchBackend final
7171
RuntimeOption qnn_runtime_lpai_client_perf_type_{false, 0};
7272
RuntimeOption qnn_runtime_lpai_affinity_{false, 0};
7373
RuntimeOption qnn_runtime_lpai_core_selection_{false, 0};
74-
RuntimeOption qnn_runtime_heap_profiling_path_{false, {}};
7574
};
7675

7776
} // namespace qnn

backends/qualcomm/runtime/backends/QnnBackendFactory.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
7171
qnn_device_ptr,
7272
backend_params->qnn_backend_cache_ptr_.get(),
7373
htp_options,
74-
qnn_dlc_manager,
75-
get_option(options->profile_level(), QNN_RUNTIME_PROFILE_LEVEL));
74+
qnn_dlc_manager);
7675

7776
backend_params->qnn_graph_ptr_ = std::make_unique<HtpGraph>(
7877
implementation_ptr,

backends/qualcomm/runtime/backends/QnnContextCommon.cpp

Lines changed: 3 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -6,63 +6,26 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
109
#include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
1110
#include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
1211

1312
namespace executorch {
1413
namespace backends {
1514
namespace qnn {
1615

17-
std::mutex QnnContext::htp_context_mutex_;
18-
int QnnContext::htp_context_count_{0};
19-
20-
void QnnContext::WriteHeapProfile() {
21-
executorch::runtime::BackendOption backend_option;
22-
std::string heap_profiling_path;
23-
if (get_runtime_option(QNN_RUNTIME_HEAP_PROFILING_PATH, backend_option) ==
24-
Error::Ok) {
25-
auto* arr = std::get_if<std::array<char, runtime::kMaxOptionValueLength>>(
26-
&backend_option.value);
27-
if (arr) {
28-
heap_profiling_path = arr->data();
29-
}
30-
}
31-
Qnn_ErrorHandle_t error_profile =
32-
qnn_profiler_->ProfileDataToFile(heap_profiling_path);
33-
if (error_profile != QNN_SUCCESS) {
34-
QNN_EXECUTORCH_LOG_ERROR(
35-
"Failed to profile. Cannot get profile from handle. Error %d",
36-
QNN_GET_ERROR_CODE(error_profile));
37-
}
38-
}
39-
4016
QnnContext::~QnnContext() {
4117
const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
4218
Qnn_ErrorHandle_t error = QNN_SUCCESS;
43-
4419
if (handle_ != nullptr) {
4520
QNN_EXECUTORCH_LOG_INFO("Destroy Qnn context");
46-
47-
bool do_heap_profile = false;
48-
{
49-
std::lock_guard<std::mutex> lock(htp_context_mutex_);
50-
if (is_htp_backend_ && htp_context_count_ > 0 && need_to_profile_) {
51-
--htp_context_count_;
52-
do_heap_profile = (htp_context_count_ == 0);
53-
}
54-
}
55-
error = qnn_interface.qnn_context_free(
56-
handle_, do_heap_profile ? qnn_profiler_->GetHandle() : nullptr);
21+
error = qnn_interface.qnn_context_free(handle_, /*profile=*/nullptr);
5722
if (error != QNN_SUCCESS) {
5823
QNN_EXECUTORCH_LOG_ERROR(
5924
"Failed to free QNN "
6025
"context_handle_. Backend "
6126
"ID %u, error %d",
6227
qnn_interface.GetBackendId(),
6328
QNN_GET_ERROR_CODE(error));
64-
} else if (do_heap_profile) {
65-
WriteHeapProfile();
6629
}
6730
handle_ = nullptr;
6831
}
@@ -82,51 +45,21 @@ Error QnnContext::Configure() {
8245
if (cache_->GetCacheState() == QnnBackendCache::DESERIALIZE) {
8346
const QnnExecuTorchContextBinary& qnn_context_blob =
8447
cache_->GetQnnContextBlob();
85-
/*
86-
Total DSP heap usage can be measured in two conditions, first context
87-
creation and last context free. By the QNN documentation, we need to insert
88-
profileHandle in qnn_context_create_from_binary when creating first context
89-
and closing last context.
90-
91-
Limitations are two:
92-
1.Only supported on Android and QNX platforms.
93-
2.By enabling this feature initialization and cleanup time might be
94-
impacted.
95-
*/
96-
97-
bool do_heap_profile = false;
98-
{
99-
std::lock_guard<std::mutex> lock(htp_context_mutex_);
100-
do_heap_profile =
101-
is_htp_backend_ && (htp_context_count_ == 0) && need_to_profile_;
102-
if (is_htp_backend_) {
103-
++htp_context_count_;
104-
}
105-
}
10648

10749
error = qnn_interface.qnn_context_create_from_binary(
10850
backend_->GetHandle(),
10951
device_->GetHandle(),
110-
(temp_context_config.empty() ? nullptr : temp_context_config.data()),
52+
temp_context_config.empty() ? nullptr : temp_context_config.data(),
11153
static_cast<uint8_t*>(qnn_context_blob.buffer),
11254
qnn_context_blob.nbytes,
11355
&handle_,
114-
do_heap_profile ? qnn_profiler_->GetHandle() : nullptr);
56+
/*profile=*/nullptr);
11557
if (error != QNN_SUCCESS) {
11658
QNN_EXECUTORCH_LOG_ERROR(
11759
"Can't create context from "
11860
"binary. Error %d.",
11961
QNN_GET_ERROR_CODE(error));
120-
// Rollback the count since context creation failed
121-
{
122-
std::lock_guard<std::mutex> lock(htp_context_mutex_);
123-
if (is_htp_backend_ && htp_context_count_ > 0) {
124-
--htp_context_count_;
125-
}
126-
}
12762
return Error::Internal;
128-
} else if (do_heap_profile) {
129-
WriteHeapProfile();
13063
}
13164
} else if (
13265
cache_->GetCacheState() == QnnBackendCache::SERIALIZE ||

backends/qualcomm/runtime/backends/QnnContextCommon.h

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,7 @@
1313
#include <executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
1414
#include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>
1515

16-
#include <executorch/backends/qualcomm/runtime/backends/QnnProfiler.h>
17-
1816
#include <memory>
19-
#include <mutex>
2017

2118
namespace executorch {
2219
namespace backends {
@@ -31,22 +28,13 @@ class QnnContext {
3128
QnnBackend* backend,
3229
QnnDevice* device,
3330
QnnBackendCache* cache,
34-
QnnDlcManager* qnn_dlc_manager,
35-
const QnnExecuTorchProfileLevel& profile_level)
31+
QnnDlcManager* qnn_dlc_manager)
3632
: handle_(nullptr),
3733
implementation_(implementation),
3834
backend_(backend),
3935
device_(device),
4036
cache_(cache),
41-
qnn_dlc_manager_(qnn_dlc_manager),
42-
is_htp_backend_(
43-
implementation->GetQnnInterface().GetBackendId() ==
44-
QNN_BACKEND_ID_HTP),
45-
need_to_profile_(
46-
profile_level != QnnExecuTorchProfileLevel::kProfileOff) {
47-
qnn_profiler_ =
48-
std::make_unique<QnnProfile>(implementation_, backend_, profile_level);
49-
}
37+
qnn_dlc_manager_(qnn_dlc_manager) {}
5038

5139
virtual ~QnnContext();
5240

@@ -85,20 +73,13 @@ class QnnContext {
8573
};
8674

8775
private:
88-
void WriteHeapProfile();
8976
Qnn_ContextHandle_t handle_;
9077
QnnImplementation* implementation_;
9178
QnnBackend* backend_;
9279
QnnDevice* device_;
9380
QnnBackendCache* cache_;
9481
QnnContextCustomProtocol qnn_context_custom_protocol_;
9582
QnnDlcManager* qnn_dlc_manager_;
96-
97-
std::unique_ptr<QnnProfile> qnn_profiler_;
98-
bool is_htp_backend_;
99-
bool need_to_profile_;
100-
static std::mutex htp_context_mutex_;
101-
static int htp_context_count_;
10283
};
10384
} // namespace qnn
10485
} // namespace backends

0 commit comments

Comments
 (0)