Skip to content

Commit 8008cd2

Browse files
[QNN] Harden SDK download and skip auto-download in CI (pytorch#18416)
1 parent 2f496bb commit 8008cd2

2 files changed

Lines changed: 95 additions & 4 deletions

File tree

backends/qualcomm/scripts/download_qnn_sdk.py

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import sys
1111
import tarfile
1212
import tempfile
13+
import time
1314
import urllib.request
1415
import zipfile
1516
from typing import Dict, List, Optional, Tuple
@@ -203,7 +204,7 @@ def _stream_to_file(
203204
downloaded = archive_path.stat().st_size if archive_path.exists() else 0
204205
headers = {"Range": f"bytes={downloaded}-"} if downloaded > 0 else {}
205206

206-
with session.get(url, stream=True, headers=headers) as r:
207+
with session.get(url, stream=True, headers=headers, timeout=(30, 60)) as r:
207208
if r.status_code == 200 and downloaded > 0:
208209
downloaded = 0 # Server doesn't support Range — restart
209210
r.raise_for_status()
@@ -235,12 +236,17 @@ def _stream_to_file(
235236
if total:
236237
_progress_newline()
237238

239+
if total > 0 and downloaded < total:
240+
raise requests.exceptions.ConnectionError(
241+
f"Incomplete download: {downloaded}/{total} bytes"
242+
)
243+
238244
logger.info("[QNN] Download complete.")
239245
return True
240246

241247

242248
def _download_archive(
243-
url: str, archive_path: pathlib.Path, max_retries: int = 3
249+
url: str, archive_path: pathlib.Path, max_retries: int = 5
244250
) -> bool:
245251
"""Streaming download with retry + resume on mid-stream failures."""
246252
logger.debug("Archive will be saved to: %s", archive_path)
@@ -265,10 +271,12 @@ def _download_archive(
265271
) as e:
266272
_progress_newline()
267273
if attempt < max_retries:
274+
backoff = min(2 ** (attempt - 1), 30)
268275
logger.warning(
269276
f"[QNN] Download interrupted: {type(e).__name__}. "
270-
f"Retrying ({attempt}/{max_retries})..."
277+
f"Retrying in {backoff}s ({attempt}/{max_retries})..."
271278
)
279+
time.sleep(backoff)
272280
else:
273281
logger.error(f"[QNN] Download failed after {max_retries} attempts: {e}")
274282
return False
@@ -281,6 +289,20 @@ def _download_archive(
281289
logger.error("[QNN] Downloaded file is empty or missing!")
282290
return False
283291

292+
# Validate archive integrity — catches truncation and corruption that
293+
# size checks alone would miss (e.g. no Content-Length, or bit flips).
294+
try:
295+
if url.endswith(".zip"):
296+
with zipfile.ZipFile(archive_path, "r"):
297+
pass # Reading central directory is enough to detect truncation
298+
elif url.endswith((".tar.gz", ".tgz")):
299+
with tarfile.open(archive_path, "r:gz"):
300+
pass
301+
except (zipfile.BadZipFile, tarfile.TarError) as e:
302+
logger.error(f"[QNN] Downloaded archive is corrupt: {e}")
303+
archive_path.unlink(missing_ok=True)
304+
return False
305+
284306
return True
285307

286308

@@ -748,6 +770,37 @@ def install_qnn_sdk() -> bool:
748770
return _ensure_libcxx_stack() and _ensure_qnn_sdk_lib()
749771

750772

773+
def _check_sdk_available() -> int:
774+
"""Return 0 if the SDK is cached or the download server is reachable, 1 otherwise.
775+
776+
Uses requests.head() so HTTPS_PROXY env vars are respected — devvms behind
777+
a proxy will succeed when the proxy is configured, and gracefully fail when
778+
it is not.
779+
"""
780+
if not is_linux_x86():
781+
return 1
782+
783+
try:
784+
sdk_dir = _get_sdk_dir()
785+
if sdk_dir.exists() and any(sdk_dir.iterdir()):
786+
return 0
787+
except Exception:
788+
pass
789+
790+
try:
791+
r = requests.head(
792+
"https://softwarecenter.qualcomm.com",
793+
timeout=5,
794+
allow_redirects=True,
795+
)
796+
if r.status_code < 500:
797+
return 0
798+
except requests.exceptions.RequestException:
799+
pass
800+
801+
return 1
802+
803+
751804
def main(argv: Optional[List[str]] = None) -> int:
752805
parser = argparse.ArgumentParser(
753806
description="Helper utility for Qualcomm SDK staging."
@@ -768,8 +821,17 @@ def main(argv: Optional[List[str]] = None) -> int:
768821
action="store_true",
769822
help="Ensure the SDK and runtime libraries are staged and loaded.",
770823
)
824+
parser.add_argument(
825+
"--check",
826+
action="store_true",
827+
help="Exit 0 if the SDK is cached or the download host is reachable, "
828+
"1 otherwise. Does not download anything.",
829+
)
771830
args = parser.parse_args(argv)
772831

832+
if args.check:
833+
return _check_sdk_available()
834+
773835
# When --print-sdk-path is used, stdout must contain ONLY the SDK path.
774836
# Redirect all logger and progress output to stderr.
775837
if args.print_sdk_path:

tools/cmake/preset/pybind.cmake

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,36 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
3737
set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER ON)
3838
set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM ON)
3939
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64|i.86)$")
40-
set_overridable_option(EXECUTORCH_BUILD_QNN ON)
40+
# Auto-enable QNN on Linux x86 when the SDK is available. - QNN_SDK_ROOT set
41+
# explicitly → always enable - GitHub Actions CI → skip (avoids flaky 1.3GB
42+
# downloads) - Otherwise → probe the download server; skip gracefully when
43+
# unreachable (e.g. devvms without proxy configured)
44+
if(DEFINED QNN_SDK_ROOT OR DEFINED ENV{QNN_SDK_ROOT})
45+
set_overridable_option(EXECUTORCH_BUILD_QNN ON)
46+
elseif("$ENV{GITHUB_ACTIONS}" STREQUAL "true")
47+
message(STATUS "GitHub Actions CI detected: skipping QNN auto-download. "
48+
"Set QNN_SDK_ROOT or -DEXECUTORCH_BUILD_QNN=ON to enable."
49+
)
50+
else()
51+
execute_process(
52+
COMMAND
53+
${PYTHON_EXECUTABLE}
54+
${CMAKE_CURRENT_LIST_DIR}/../../../backends/qualcomm/scripts/download_qnn_sdk.py
55+
--check
56+
RESULT_VARIABLE _qnn_available
57+
OUTPUT_QUIET ERROR_QUIET
58+
TIMEOUT 10
59+
)
60+
if(_qnn_available EQUAL 0)
61+
set_overridable_option(EXECUTORCH_BUILD_QNN ON)
62+
else()
63+
message(
64+
STATUS "QNN SDK not cached and download server unreachable. "
65+
"Skipping QNN backend. Set QNN_SDK_ROOT or use "
66+
"-DEXECUTORCH_BUILD_QNN=ON with network access to enable."
67+
)
68+
endif()
69+
endif()
4170
endif()
4271
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL
4372
"WIN32"

0 commit comments

Comments
 (0)