Skip to content

Commit 45fe55c

Browse files
Reapply PyTorch 2.12 pin with CI fixes (pytorch#19699)
Summary Trying again! This time working more on the cuda cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils @Sebastian-Larsson @robell @rascani
1 parent 554aecf commit 45fe55c

36 files changed

Lines changed: 342 additions & 97 deletions

.ci/docker/build.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ case "${IMAGE_NAME}" in
8181
LINTRUNNER=""
8282
GCC_VERSION=11
8383
CUDA_WINDOWS_CROSS_COMPILE=yes
84-
CUDA_VERSION=12.8
84+
CUDA_VERSION=13.0
8585
SKIP_PYTORCH=yes
8686
;;
8787
executorch-ubuntu-24.04-gcc14)
@@ -97,6 +97,10 @@ esac
9797
TORCH_VERSION=$(cat ci_commit_pins/pytorch.txt)
9898
BUILD_DOCS=1
9999

100+
if [[ "${GCC_VERSION:-}" == "11" && -z "${SKIP_PYTORCH:-}" ]]; then
101+
PYTORCH_BUILD_MAX_JOBS=6
102+
fi
103+
100104
# Copy requirements-lintrunner.txt from root to here
101105
cp ../../requirements-lintrunner.txt ./
102106

@@ -109,6 +113,7 @@ docker build \
109113
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
110114
--build-arg "MINICONDA_VERSION=${MINICONDA_VERSION}" \
111115
--build-arg "TORCH_VERSION=${TORCH_VERSION}" \
116+
--build-arg "PYTORCH_BUILD_MAX_JOBS=${PYTORCH_BUILD_MAX_JOBS:-}" \
112117
--build-arg "BUCK2_VERSION=${BUCK2_VERSION}" \
113118
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
114119
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
release/2.11
1+
release/2.12

.ci/docker/common/install_cache.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ init_sccache() {
7676
# This is the remote cache bucket
7777
export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
7878
export SCCACHE_S3_KEY_PREFIX=executorch
79+
export SCCACHE_REGION=us-east-1
80+
export AWS_REGION=us-east-1
81+
export AWS_DEFAULT_REGION=us-east-1
7982
export SCCACHE_IDLE_TIMEOUT=0
8083
export SCCACHE_ERROR_LOG=/tmp/sccache_error.log
8184
export RUST_LOG=sccache::server=error

.ci/docker/common/install_cuda.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010

1111
set -ex
1212

13-
# CUDA version must be specified (e.g., 12.8)
13+
# CUDA version must be specified (e.g., 13.0)
1414
CUDA_VERSION="${CUDA_VERSION:?CUDA_VERSION must be set}"
1515

16-
# Convert version format (e.g., 12.8 -> 12-8 for package names)
16+
# Convert version format (e.g., 13.0 -> 13-0 for package names)
1717
CUDA_VERSION_DASH=$(echo "${CUDA_VERSION}" | tr '.' '-')
1818

1919
# Add NVIDIA package repository

.ci/docker/common/install_cuda_windows_cross_compile.sh

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ declare -A CUDA_DRIVER_MAP=(
1717
["12.6"]="12.6.3:561.17"
1818
["12.8"]="12.8.1:572.61"
1919
["12.9"]="12.9.1:576.57"
20+
["13.0"]="13.0.2:"
2021
)
2122

2223
install_mingw() {
@@ -76,19 +77,26 @@ install_windows_cuda() {
7677
CUDA_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f1)
7778
CUDA_DRIVER_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f2)
7879

79-
echo "Using CUDA ${CUDA_VERSION} with driver ${CUDA_DRIVER_VERSION}"
80+
if [ -n "${CUDA_DRIVER_VERSION}" ]; then
81+
echo "Using CUDA ${CUDA_VERSION} with driver ${CUDA_DRIVER_VERSION}"
82+
CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe"
83+
else
84+
echo "Using CUDA ${CUDA_VERSION}"
85+
CUDA_INSTALLER="cuda_${CUDA_VERSION}_windows.exe"
86+
fi
8087

8188
echo "Installing Windows CUDA toolkit ${CUDA_VERSION}..."
8289

8390
mkdir -p "${INSTALL_DIR}"
8491
cd "${INSTALL_DIR}"
8592

86-
CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe"
8793
CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${CUDA_INSTALLER}"
8894

8995
# Check if already downloaded and extracted
9096
if [ -d "${INSTALL_DIR}/extracted/cuda_cudart" ]; then
9197
echo "Windows CUDA toolkit already installed, skipping download..."
98+
chmod -R a+rX "${INSTALL_DIR}"
99+
chmod -R a+rwX "${INSTALL_DIR}/extracted/cuda_cudart/cudart/lib"
92100
return 0
93101
fi
94102

@@ -98,8 +106,11 @@ install_windows_cuda() {
98106
echo "Extracting CUDA toolkit..."
99107
7z x "${CUDA_INSTALLER}" -o"extracted" -y
100108

101-
# Fix permissions so ci-user can access the files
109+
# Fix permissions so ci-user can access the files. PyTorch Inductor also
110+
# needs to write a MinGW import library beside cudart.lib during Windows
111+
# cross-compilation.
102112
chmod -R a+rX "${INSTALL_DIR}"
113+
chmod -R a+rwX "${INSTALL_DIR}/extracted/cuda_cudart/cudart/lib"
103114

104115
# Clean up installer to save space
105116
rm -f "${CUDA_INSTALLER}"

.ci/docker/common/install_pytorch.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,20 @@ install_pytorch_and_domains() {
2727
chown -R ci-user .
2828

2929
export _GLIBCXX_USE_CXX11_ABI=1
30+
if [[ "$(uname -m)" == "aarch64" ]]; then
31+
export BUILD_IGNORE_SVE_UNAVAILABLE=1
32+
fi
33+
if [[ -n "${PYTORCH_BUILD_MAX_JOBS:-}" ]]; then
34+
export MAX_JOBS="${PYTORCH_BUILD_MAX_JOBS}"
35+
fi
3036
# Then build and install PyTorch
3137
conda_run python setup.py bdist_wheel
3238
pip_install "$(echo dist/*.whl)"
3339

3440
# Grab the pinned audio and vision commits from PyTorch
3541
TORCHAUDIO_VERSION=release/2.11
3642
export TORCHAUDIO_VERSION
37-
TORCHVISION_VERSION=release/0.26
43+
TORCHVISION_VERSION=release/0.27
3844
export TORCHVISION_VERSION
3945

4046
install_domains

.ci/docker/ubuntu/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,12 @@ RUN bash ./install_cache.sh && rm install_cache.sh utils.sh
6262
ENV SCCACHE_BUCKET ossci-compiler-cache-circleci-v2
6363
ENV SCCACHE_S3_KEY_PREFIX executorch
6464
ENV SCCACHE_REGION us-east-1
65+
ENV AWS_REGION us-east-1
66+
ENV AWS_DEFAULT_REGION us-east-1
6567

6668
ARG TORCH_VERSION
6769
ARG SKIP_PYTORCH
70+
ARG PYTORCH_BUILD_MAX_JOBS
6871
COPY ./common/install_pytorch.sh install_pytorch.sh
6972
COPY ./common/utils.sh utils.sh
7073
RUN if [ -z "${SKIP_PYTORCH}" ]; then bash ./install_pytorch.sh; fi && rm install_pytorch.sh utils.sh

.ci/scripts/export_model_artifact.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ fi
518518

519519
DEVICE_ARG=""
520520
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
521-
DEVICE_ARG="--device cuda"
521+
DEVICE_ARG="--device cuda:0"
522522
elif [ "$DEVICE" = "metal" ]; then
523523
DEVICE_ARG="--device mps"
524524
fi

.ci/scripts/test-cuda-build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
set -exu
99

10-
CUDA_VERSION=${1:-"12.6"}
10+
CUDA_VERSION=${1:-"13.0"}
1111

1212
echo "=== Testing ExecuTorch CUDA ${CUDA_VERSION} Build ==="
1313

.ci/scripts/test_model_e2e_windows.ps1

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,18 +159,31 @@ try {
159159
}
160160
Write-Host "CUDA version check passed: $actualCudaVersion"
161161
}
162+
$cmakeCudaArgs = @()
163+
if (-not [string]::IsNullOrWhiteSpace($env:CUDA_HOME)) {
164+
$cudaNvcc = Join-Path -Path $env:CUDA_HOME -ChildPath "bin\nvcc.exe"
165+
if (-not (Test-Path -Path $cudaNvcc -PathType Leaf)) {
166+
throw "CUDA compiler not found at '$cudaNvcc'"
167+
}
168+
$env:CUDACXX = $cudaNvcc
169+
$cmakeCudaArgs = @(
170+
"-T", "cuda=$env:CUDA_HOME",
171+
"-DCMAKE_CUDA_COMPILER=$cudaNvcc",
172+
"-DCUDAToolkit_ROOT=$env:CUDA_HOME"
173+
)
174+
}
162175
Write-Host "::endgroup::"
163176

164177
Write-Host "::group::Build ExecuTorch (CUDA)"
165178
$numCores = [Math]::Max([Environment]::ProcessorCount - 1, 1)
166-
cmake --preset llm-release-cuda
179+
cmake --preset llm-release-cuda @cmakeCudaArgs
167180
cmake --build cmake-out --target install --config Release -j $numCores
168181
Write-Host "::endgroup::"
169182

170183
Write-Host "::group::Build $runnerTarget"
171184
Push-Location (Join-Path -Path $executorchRoot -ChildPath "examples\models\$runnerPath")
172185
try {
173-
cmake --preset $runnerPreset
186+
cmake --preset $runnerPreset @cmakeCudaArgs
174187
cmake --build (Join-Path -Path $executorchRoot -ChildPath "cmake-out\examples\models\$runnerPath") --target $runnerTarget --config Release -j $numCores
175188
}
176189
finally {

0 commit comments

Comments
 (0)