Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions .github/workflows/linux_cuda_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
docker_image_repo: onnxruntimecuda12manylinuxbuild
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --parallel --nvcc_threads 4 --flash_nvcc_threads 4 --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
docker_image_repo: onnxruntimecuda13manylinuxbuild
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --parallel --nvcc_threads 4 --flash_nvcc_threads 4 --cuda_version=13.0 --cuda_home=/usr/local/cuda-13.0 --cudnn_home=/usr/local/cuda-13.0 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH'
run_tests: false # <<< Do not run tests in this job
upload_build_output: true # <<< Upload the build/Release directory
Expand Down Expand Up @@ -57,8 +57,8 @@ jobs:
id: build_docker_image_step
with:
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda13manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
push: true
azure-container-registry-name: onnxruntimebuildcache
env:
Expand Down Expand Up @@ -91,6 +91,15 @@ jobs:
echo "Warning: perms.txt not found in artifact."
fi

# Verify the GPU is accessible inside Docker before running the full test suite.
# If the NVIDIA Container Toolkit fails to expose /dev/nvidia* devices,
# tests will fail with "CUDA failure 100" and waste 10+ minutes.
- name: Verify GPU access in Docker
run: |
docker run --rm --gpus all \
"${{ steps.build_docker_image_step.outputs.full-image-name }}" \
nvidia-smi

# --- Run Tests using the downloaded build ---
# The run-build-script-in-docker action mounts ${{ runner.temp }} to /onnxruntime_src/build
# So build.py --build_dir build/Release inside the container correctly finds the artifacts.
Expand All @@ -102,5 +111,5 @@ jobs:
build_config: Release
mode: 'test' # Set mode to test
execution_providers: 'cuda'
extra_build_flags: '--use_binskim_compliant_compile_flags --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
extra_build_flags: '--use_binskim_compliant_compile_flags --cuda_version=13.0 --cuda_home=/usr/local/cuda-13.0 --cudnn_home=/usr/local/cuda-13.0 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH'
28 changes: 21 additions & 7 deletions .github/workflows/linux_cuda_plugin_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@ jobs:
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
docker_image_repo: onnxruntimecuda12manylinuxbuild
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
docker_image_repo: onnxruntimecuda13manylinuxbuild
extra_build_flags: >-
--use_binskim_compliant_compile_flags
--build_wheel
--parallel
--nvcc_threads 4
--flash_nvcc_threads 4
--cuda_version=12.8
--cuda_home=/usr/local/cuda-12.8
--cudnn_home=/usr/local/cuda-12.8
--cuda_version=13.0
--cuda_home=/usr/local/cuda-13.0
--cudnn_home=/usr/local/cuda-13.0
--enable_cuda_profiling
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
--cmake_extra_defines onnxruntime_QUICK_BUILD=ON
Expand Down Expand Up @@ -67,8 +67,8 @@ jobs:
id: build_docker_image_step
with:
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda13manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
push: true
azure-container-registry-name: onnxruntimebuildcache
env:
Expand Down Expand Up @@ -100,6 +100,15 @@ jobs:
echo "Warning: perms.txt not found in artifact."
fi

# Verify the GPU is accessible inside Docker before running the full test suite.
# If the NVIDIA Container Toolkit fails to expose /dev/nvidia* devices,
# tests will fail with "CUDA failure 100" and waste 10+ minutes.
- name: Verify GPU access in Docker
run: |
docker run --rm --gpus all \
"${{ steps.build_docker_image_step.outputs.full-image-name }}" \
nvidia-smi

# --- Install the ORT wheel and run CUDA plugin EP tests ---
- name: Run CUDA Plugin EP Python Tests
run: |
Expand All @@ -111,6 +120,11 @@ jobs:
bash -c "
set -ex
export PATH=/opt/python/cp312-cp312/bin:\$PATH
# Ensure libcudart.so.13 is findable regardless of host-runner NVIDIA Container Toolkit configuration.
# The CUDA runtime library lives in the container image at /usr/local/cuda-13.0/lib64, but the
# LD_LIBRARY_PATH may not include this path when the runner's NVIDIA toolkit only mounts driver
# libraries at /usr/local/nvidia/lib64.
export LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:\${LD_LIBRARY_PATH:-}

# Install the ORT wheel
python -m pip install /build/Release/Release/dist/onnxruntime*.whl
Expand Down
77 changes: 77 additions & 0 deletions .github/workflows/nightly_webgpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: Nightly ONNX Runtime WebGPU Builds

on:
schedule:
- cron: '0 9 * * *' # Daily at 09:00 UTC
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
webgpu_shader_key_validation:
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"JobId=webgpu_shader_validation-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
timeout-minutes: 90
env:
ALLOW_RELEASED_ONNX_OPSET_ONLY: "0"
ONNXRUNTIME_TEST_GPU_DEVICE_ID: "0"
steps:
- name: Checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: none

- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
architecture: x64

- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64

- name: Install python modules
run: python -m pip install -r tools\ci_build\github\windows\python\requirements.txt
shell: cmd
working-directory: ${{ github.workspace }}

- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: "24"

- name: Build and Test
shell: pwsh
run: |
$env:ORT_WEBGPU_EP_SHADER_DUMP_FILE = "${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\shader_dump.log"

python.exe ${{ github.workspace }}\tools\ci_build\build.py `
--config RelWithDebInfo `
--build_dir ${{ github.workspace }} `
--use_binskim_compliant_compile_flags `
--cmake_generator "Visual Studio 17 2022" `
--build_shared_lib `
--use_webgpu `
--wgsl_template static `
--cmake_extra_defines onnxruntime_BUILD_DAWN_SHARED_LIBRARY=ON `
--update `
--build --parallel `
--test

- name: Check log file
shell: cmd
run: |
dir ${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\shader_dump.log

- name: Validate shader keys
uses: ./.github/actions/webgpu-validate-shader-key
with:
log_file_path: ${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\shader_dump.log
8 changes: 8 additions & 0 deletions .github/workflows/windows_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ jobs:
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"1ES.ImageOverride=onnxruntime-Win-CPU-VS2022-Latest-NVMe-x64-test",
"JobId=windows-cuda-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
Expand Down Expand Up @@ -222,6 +223,13 @@ jobs:
with:
whl-directory: ${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo\dist

# Verify the GPU is accessible before running the full test suite.
# If the NVIDIA driver is not available, tests will fail with
# "CUDA failure 100" and waste significant time.
- name: Verify GPU access
shell: pwsh
run: nvidia-smi

- name: Run Tests
working-directory: ${{ runner.temp }}
run: |
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/windows_cuda_plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ jobs:
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"1ES.ImageOverride=onnxruntime-Win-CPU-VS2022-Latest-NVMe-x64-test",
"JobId=windows-cuda-plugin-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
Expand Down Expand Up @@ -187,6 +188,13 @@ jobs:
with:
whl-directory: ${{ runner.temp }}\build\Release\Release\dist

# Verify the GPU is accessible before running the full test suite.
# If the NVIDIA driver is not available, tests will fail with
# "CUDA failure 100" and waste significant time.
- name: Verify GPU access
shell: pwsh
run: nvidia-smi

- name: Run CUDA Plugin EP Python Tests
working-directory: ${{ github.workspace }}\onnxruntime\test\python\transformers
shell: pwsh
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ googletest;https://github.com/google/googletest/archive/refs/tags/v1.17.0.zip;f6
#xnnpack 2025.06.22
googlexnnpack;https://github.com/google/XNNPACK/archive/3cf85e705098622d59056dcb8f5f963ea7bb0a00.zip;6f6bbba627241f89463ca845febaf063982b34fe
json;https://github.com/nlohmann/json/archive/refs/tags/v3.11.3.zip;5e88795165cc8590138d1f47ce94ee567b85b4d6
microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14
microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.2.1.zip;1094e3bb7a8af763dcb136ccd676e6e75e614eec
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.250325.1.zip;826c8bd47c2258ec61b8b218e031e5b33d27f761
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
Expand Down
35 changes: 17 additions & 18 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -405,24 +405,16 @@ if (CPUINFO_SUPPORTED)
endif()
endif()

if(onnxruntime_USE_CUDA)
onnxruntime_fetchcontent_declare(
GSL
URL ${DEP_URL_microsoft_gsl}
URL_HASH SHA1=${DEP_SHA1_microsoft_gsl}
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/gsl/1064.patch
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS 4.0 NAMES Microsoft.GSL
)
else()
onnxruntime_fetchcontent_declare(
GSL
URL ${DEP_URL_microsoft_gsl}
URL_HASH SHA1=${DEP_SHA1_microsoft_gsl}
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS 4.0 NAMES Microsoft.GSL
)
endif()
onnxruntime_fetchcontent_declare(
GSL
URL ${DEP_URL_microsoft_gsl}
URL_HASH SHA1=${DEP_SHA1_microsoft_gsl}
# Stringify fix for GSL_SUPPRESS on MSVC (C4875). Remove when GSL ships a release
# containing microsoft/GSL#1213 (commit 543d0dd).
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/gsl/1213.patch
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS 4.0 NAMES Microsoft.GSL
)
set(GSL_TARGET "Microsoft.GSL::GSL")
set(GSL_INCLUDE_DIR "$<TARGET_PROPERTY:${GSL_TARGET},INTERFACE_INCLUDE_DIRECTORIES>")
onnxruntime_fetchcontent_makeavailable(GSL)
Expand Down Expand Up @@ -624,10 +616,17 @@ endif()
if(onnxruntime_ENABLE_TRAINING OR (onnxruntime_ENABLE_TRAINING_APIS AND onnxruntime_BUILD_UNIT_TESTS))
# Once code under orttraining/orttraining/models dir is removed "onnxruntime_ENABLE_TRAINING" should be removed from
# this conditional
if(Patch_FOUND)
set(ONNXRUNTIME_CXXOPTS_PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cxxopts/gcc-15-compat.patch)
else()
set(ONNXRUNTIME_CXXOPTS_PATCH_COMMAND "")
endif()

onnxruntime_fetchcontent_declare(
cxxopts
URL ${DEP_URL_cxxopts}
URL_HASH SHA1=${DEP_SHA1_cxxopts}
PATCH_COMMAND ${ONNXRUNTIME_CXXOPTS_PATCH_COMMAND}
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS NAMES cxxopts
)
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ onnxruntime_add_static_library(onnxruntime_mlas
${MLAS_SRC_DIR}/qlutgemm.cpp
${MLAS_SRC_DIR}/sqnbitgemm_q8_block.h
${MLAS_SRC_DIR}/flashattn.cpp
${MLAS_SRC_DIR}/flashattn_qkv.cpp
${MLAS_SRC_DIR}/qkv_quant.cpp
${MLAS_SRC_DIR}/cast.cpp
${MLAS_SRC_DIR}/layernorm.cpp
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ if (onnxruntime_USE_CUDA AND NOT WIN32)
)
include(cutlass)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${cutlass_SOURCE_DIR}/include)
target_link_libraries(onnxruntime_pybind11_state PRIVATE CUDA::cudart)
endif()
if (onnxruntime_USE_CUDA AND WIN32)
target_compile_definitions(onnxruntime_pybind11_state PRIVATE ORT_NO_CUDA_IN_PYBIND)
Expand Down
13 changes: 13 additions & 0 deletions cmake/patches/cxxopts/gcc-15-compat.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/include/cxxopts.hpp b/include/cxxopts.hpp
index 991ba3fc..a2e71faf 100644
--- a/include/cxxopts.hpp
+++ b/include/cxxopts.hpp
@@ -25,6 +25,7 @@
#ifndef CXXOPTS_HPP_INCLUDED
#define CXXOPTS_HPP_INCLUDED

+#include <cstdint>
#include <cstring>
#include <cctype>
#include <exception>

26 changes: 0 additions & 26 deletions cmake/patches/gsl/1064.patch

This file was deleted.

13 changes: 13 additions & 0 deletions cmake/patches/gsl/1213.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/include/gsl/assert b/include/gsl/assert
index 58e0426..b3f7c8a 100644
--- a/include/gsl/assert
+++ b/include/gsl/assert
@@ -50,7 +50,7 @@
#define GSL_SUPPRESS(x) [[gsl::suppress(#x)]]
#else
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__NVCC__)
-#define GSL_SUPPRESS(x) [[gsl::suppress(x)]]
+#define GSL_SUPPRESS(x) [[gsl::suppress(#x)]]
#else
#define GSL_SUPPRESS(x)
#endif // _MSC_VER
5 changes: 4 additions & 1 deletion cmake/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@
"name": "mimalloc",
"platform": "windows"
},
"ms-gsl",
{
"name": "ms-gsl",
"version>=": "4.2.1"
},
"nlohmann-json",
"onnx",
{
Expand Down
Loading
Loading