Skip to content

Commit cd81156

Browse files
authored
Qualcomm AI Engine Direct - Support for LPAI in cli.py (pytorch#18995)
1 parent 063f9c9 commit cd81156

4 files changed

Lines changed: 92 additions & 16 deletions

File tree

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8746,6 +8746,9 @@ def required_envs(self, conditions=None) -> bool:
87468746
)
87478747

87488748
def test_cli(self):
8749+
# TODO: Add gpu support in cli.py
8750+
if get_backend_type(self.backend) == QnnExecuTorchBackendType.kGpuBackend:
8751+
self.skipTest("Currently, the GPU does not support CLI.")
87498752
with tempfile.TemporaryDirectory() as tmp_dir:
87508753
sample_input = torch.randn(1, 2, 3, 4)
87518754
ep = torch.export.export(Relu(), (sample_input,)) # noqa: F405
@@ -8768,6 +8771,8 @@ def test_cli(self):
87688771
f"{tmp_dir}/input_list",
87698772
"--soc_model",
87708773
self.soc_model,
8774+
"--backend",
8775+
self.backend,
87718776
]
87728777
subprocess.run(cmds, stdout=subprocess.DEVNULL)
87738778
self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/relu_quantized.pt2"))
@@ -8783,6 +8788,8 @@ def test_cli(self):
87838788
f"{tmp_dir}/c_out",
87848789
"--soc_model",
87858790
self.soc_model,
8791+
"--backend",
8792+
self.backend,
87868793
]
87878794
subprocess.run(cmds, stdout=subprocess.DEVNULL)
87888795
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.pte"))
@@ -8807,13 +8814,18 @@ def test_cli(self):
88078814
self.target,
88088815
"--device",
88098816
self.device,
8817+
"--backend",
8818+
self.backend,
88108819
]
88118820
if self.host:
88128821
cmds.extend(["--host", self.host])
88138822
subprocess.run(cmds, stdout=subprocess.DEVNULL)
88148823
self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/Result_0/output_0.pt"))
88158824

88168825
def test_cli_with_input_list_assignment(self):
8826+
# TODO: Add gpu support in cli.py
8827+
if get_backend_type(self.backend) == QnnExecuTorchBackendType.kGpuBackend:
8828+
self.skipTest("Currently, the GPU does not support CLI.")
88178829
with tempfile.TemporaryDirectory() as tmp_dir:
88188830
sample_input = torch.randn(1, 2, 3, 4)
88198831
sample_input2 = torch.randn(1, 2, 3, 4)
@@ -8840,6 +8852,8 @@ def test_cli_with_input_list_assignment(self):
88408852
f"{tmp_dir}/input_list",
88418853
"--soc_model",
88428854
self.soc_model,
8855+
"--backend",
8856+
self.backend,
88438857
]
88448858
subprocess.run(cmds, stdout=subprocess.DEVNULL)
88458859
self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/sub_quantized.pt2"))
@@ -8855,6 +8869,8 @@ def test_cli_with_input_list_assignment(self):
88558869
f"{tmp_dir}/c_out",
88568870
"--soc_model",
88578871
self.soc_model,
8872+
"--backend",
8873+
self.backend,
88588874
]
88598875
subprocess.run(cmds, stdout=subprocess.DEVNULL)
88608876
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.pte"))
@@ -8879,6 +8895,8 @@ def test_cli_with_input_list_assignment(self):
88798895
self.build_folder,
88808896
"--input_list",
88818897
f"{tmp_dir}/input_list",
8898+
"--backend",
8899+
self.backend,
88828900
]
88838901
if self.host:
88848902
cmds.extend(["--host", self.host])

backends/qualcomm/utils/utils.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1116,7 +1116,7 @@ def generate_lpai_compiler_spec(
11161116
)
11171117

11181118

1119-
def generate_qnn_executorch_compiler_spec(
1119+
def generate_qnn_executorch_compiler_spec( # noqa: C901
11201120
soc_model: QcomChipset,
11211121
backend_options: QnnExecuTorchBackendOptions,
11221122
debug: bool = False,
@@ -1224,6 +1224,21 @@ def generate_qnn_executorch_compiler_spec(
12241224
):
12251225
raise ValueError("LPAI does not support online prepare.")
12261226

1227+
if backend_options.backend_type == QnnExecuTorchBackendType.kLpaiBackend:
1228+
if soc_model.name not in get_soc_to_lpai_hw_ver_map():
1229+
raise ValueError(
1230+
f"Target soc_model({soc_model.name}) doesn't support LPAI backend. \n"
1231+
"Please choose the following SOC: "
1232+
f"{list(get_soc_to_lpai_hw_ver_map().keys())}"
1233+
)
1234+
elif get_soc_to_lpai_hw_ver_map()[
1235+
soc_model.name
1236+
] == LpaiHardwareVersion.V6 and is_qnn_sdk_version_less_than("2.39"):
1237+
raise ValueError(
1238+
f"Target soc_model({soc_model.name}) with LPAI backend v6 requires QNN SDK version >= 2.39. \n"
1239+
f"Current QNN SDK version: {get_sdk_build_id()}"
1240+
)
1241+
12271242
qnn_executorch_options.shared_buffer = shared_buffer
12281243
qnn_executorch_options.online_prepare = online_prepare
12291244
qnn_executorch_options.is_from_context_binary = is_from_context_binary

examples/qualcomm/util_scripts/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ This tool aims for users who want to deploy models with ExecuTorch runtime. It's
3636
current_input += f"{file_name} "
3737
input_list += f"{current_input.strip()}\n"
3838

39-
with open(f"{ws}/input_list", 'w') as f:
39+
with open(f"{ws}/input_list.txt", 'w') as f:
4040
f.write(input_list)
4141
```
4242

4343
* Quantize
4444
```bash
4545
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -h
46-
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -a cli_example/simple_model.pt2 -o cli_example/quantize_output -c use_8a8w -i cli_example/input_list.txt --per_channel
46+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -a cli_example/simple_model.pt2 -o cli_example/quantize_output -c use_8a8w -i cli_example/input_list.txt --per_channel -m SM8750 --backend htp
4747
```
4848
* Artifacts for quantized .pt2 file
4949
- `cli_example/quantize_output/simple_model_quantized.pt2`
@@ -61,7 +61,7 @@ This tool aims for users who want to deploy models with ExecuTorch runtime. It's
6161
```bash
6262
# `pip install pydot` if package is missing
6363
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
64-
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a model.bin -o path/to/model/output -m SM8750
64+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a model.bin -o path/to/model/output -m SM8750 --backend htp
6565
```
6666
* Artifacts for .pte file and figure of graph information
6767
- `cli_example/compile_output/simple_model_quantized.pte`
@@ -72,7 +72,7 @@ This tool aims for users who want to deploy models with ExecuTorch runtime. It's
7272
* Execute .pte program
7373
```bash
7474
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -h
75-
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -a cli_example/compile_output/simple_model_quantized.pte -o cli_example/execute_output -i cli_example/input_list.txt -H $HOST_NAME -s $DEVICE_SERIAL -b build-android -m SM8750
75+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -a cli_example/compile_output/simple_model_quantized.pte -o cli_example/execute_output -i cli_example/input_list.txt -H $HOST_NAME -s $DEVICE_SERIAL -b build-android -m SM8750 --backend htp
7676
```
7777
* Artifacts for .pte file and figure of graph information
7878
- `cli_example/execute_output/output_{data_index}_{output_index}.pt`.<br/>

examples/qualcomm/util_scripts/cli.py

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,18 @@
3030
SimpleADB,
3131
)
3232
from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype
33-
from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
33+
from executorch.backends.qualcomm.serialization.qc_schema import (
34+
QcomChipset,
35+
QnnExecuTorchBackendType,
36+
QnnExecuTorchLpaiTargetEnv,
37+
)
3438
from executorch.backends.qualcomm.utils.constants import QCOM_PASS_ACTIVATE_KEY
3539
from executorch.backends.qualcomm.utils.utils import (
3640
draw_graph,
3741
dump_context_from_pte,
3842
from_context_binary,
3943
generate_htp_compiler_spec,
44+
generate_lpai_compiler_spec,
4045
generate_qnn_executorch_compiler_spec,
4146
generate_qnn_executorch_option,
4247
QNN_QUANT_TYPE_MAP,
@@ -104,7 +109,7 @@ def fill_tensor_info(info, qnn_tensors, category):
104109
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
105110
generate_qnn_executorch_option(compiler_specs), ctx_bin
106111
)
107-
assert qnn_mgr.Init().value == 0, "failed to load context binary"
112+
assert qnn_mgr.Init().value == 0, "failed to initialize backend"
108113
graph_name = qnn_mgr.GetGraphNames()[0]
109114
qnn_mgr.AllocateTensor(graph_name)
110115
fill_tensor_info(tensor_info, qnn_mgr.GetGraphInputs(graph_name), in_key)
@@ -206,8 +211,17 @@ def compile(args):
206211

207212
file_name, extension = Path(args.artifact).stem, Path(args.artifact).suffix
208213
os.makedirs(args.output_folder, exist_ok=True)
209-
# setup compiler spec dedicated to QNN HTP backend
210-
backend_options = generate_htp_compiler_spec(use_fp16=True)
214+
# setup compiler spec
215+
backend_type = get_backend_type(args.backend)
216+
match backend_type:
217+
case QnnExecuTorchBackendType.kHtpBackend:
218+
backend_options = generate_htp_compiler_spec(use_fp16=True)
219+
case QnnExecuTorchBackendType.kLpaiBackend:
220+
backend_options = generate_lpai_compiler_spec(
221+
target_env=QnnExecuTorchLpaiTargetEnv.kArm
222+
)
223+
case _:
224+
raise ValueError("Backend is not implemented yet")
211225
# setup general compiler spec for QNN
212226
compiler_specs = generate_qnn_executorch_compiler_spec(
213227
soc_model=getattr(QcomChipset, args.soc_model),
@@ -305,8 +319,17 @@ def execute(args):
305319
user_inputs.append(inputs)
306320

307321
logger.info("retrieving graph I/O")
308-
# setup compiler spec dedicated to QNN HTP backend
309-
backend_options = generate_htp_compiler_spec(use_fp16=True)
322+
# setup compiler spec
323+
backend_type = get_backend_type(args.backend)
324+
match backend_type:
325+
case QnnExecuTorchBackendType.kHtpBackend:
326+
backend_options = generate_htp_compiler_spec(use_fp16=True)
327+
case QnnExecuTorchBackendType.kLpaiBackend:
328+
backend_options = generate_lpai_compiler_spec(
329+
target_env=QnnExecuTorchLpaiTargetEnv.kArm
330+
)
331+
case _:
332+
raise ValueError("Backend is not implemented yet")
310333
# setup general compiler spec for QNN
311334
compiler_specs = generate_qnn_executorch_compiler_spec(
312335
soc_model=getattr(QcomChipset, args.soc_model),
@@ -332,7 +355,7 @@ def execute(args):
332355

333356
logger.info("pushing QNN libraries & other artifacts")
334357

335-
adb.push(inputs=user_inputs)
358+
adb.push(inputs=user_inputs, backends=[backend_type])
336359

337360
logger.info("starting inference")
338361
adb.execute()
@@ -364,10 +387,16 @@ def post_process():
364387

365388
output_result_folder = f"{args.output_folder}/Result_{data_index}"
366389
os.makedirs(output_result_folder, exist_ok=True)
390+
# For the LPAI backend, a dequantize node will be retained for the output, ensuring that the output remains in float32 format.
391+
# TODO: add support for other dtypes for LPAI backend
367392
output = np.fromfile(
368393
filename,
369-
dtype=eval(
370-
f"np.{torch_to_numpy_dtype_dict[output_info[output_index]['dtype']]}"
394+
dtype=(
395+
eval(
396+
f"np.{torch_to_numpy_dtype_dict[output_info[output_index]['dtype']]}"
397+
)
398+
if backend_type != QnnExecuTorchBackendType.kLpaiBackend
399+
else np.float32
371400
),
372401
)
373402
output = torch.from_numpy(
@@ -460,9 +489,9 @@ def main():
460489
sub_quantize.add_argument(
461490
"--backend",
462491
type=str,
463-
choices=["htp", "gpu"],
492+
choices=["htp", "lpai"],
464493
default="htp",
465-
help="Backend to be deployed ('htp'/'gpu' are currently supported).",
494+
help="Backend to be deployed ('htp'/'lpai' are currently supported).",
466495
)
467496
sub_quantize.add_argument(
468497
"--eps",
@@ -514,6 +543,13 @@ def main():
514543
),
515544
action="store_true",
516545
)
546+
sub_compile.add_argument(
547+
"--backend",
548+
type=str,
549+
choices=["htp", "lpai"],
550+
default="htp",
551+
help="Backend to be deployed ('htp'/'lpai' are currently supported).",
552+
)
517553
sub_compile.set_defaults(callback=compile)
518554

519555
sub_execute = subparsers.add_parser(
@@ -590,6 +626,13 @@ def main():
590626
),
591627
action="store_true",
592628
)
629+
sub_execute.add_argument(
630+
"--backend",
631+
type=str,
632+
choices=["htp", "lpai"],
633+
default="htp",
634+
help="Backend to be deployed ('htp'/'lpai' are currently supported).",
635+
)
593636
sub_execute.set_defaults(callback=execute)
594637

595638
args = parser.parse_args()

0 commit comments

Comments
 (0)