Skip to content

Commit e1caafb

Browse files
committed
ci(cu131+windows): build CU131 wheels with GGML dynamic backends for windows
- Replace the old CPU/AVX release tag matrix with a single CU131 backend wheel layout. - Enable `GGML_BACKEND_DL` and `GGML_CPU_ALL_VARIANTS` so Windows wheels ship runtime-loadable GGML backend DLLs and CPU variant backends. - Use the Windows LLVM toolchain and disable non-wheel targets such as examples, tests, tools, server, embedded UI, and curl. - Remove the `.basic` style local version suffix and publish wheels as `+cu130`. - Update CUDA architectures to CUDA 13.1 and simplify CMake argument handling. Signed-off-by: JamePeng <jame_peng@sina.com>
1 parent a8f928c commit e1caafb

2 files changed

Lines changed: 188 additions & 134 deletions

File tree

.github/workflows/build-wheels-cu130-win.yml

Lines changed: 0 additions & 134 deletions
This file was deleted.
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
name: Build Wheels (CU131) for Windows
2+
3+
on:
4+
workflow_dispatch:
5+
6+
permissions:
7+
contents: write
8+
9+
jobs:
10+
build_wheels:
11+
name: Build Wheel ${{ matrix.os }} py${{ matrix.pyver }} cu131
12+
runs-on: ${{ matrix.os }}
13+
14+
strategy:
15+
fail-fast: false
16+
matrix:
17+
os: ["windows-2022"]
18+
pyver: ["3.10", "3.11", "3.12", "3.13", "3.14"]
19+
cuda: ["13.1.1"]
20+
cudaarch: ["75-real;80-real;86-real;87-real;89-real;90-real;100-real;120-real"]
21+
22+
defaults:
23+
run:
24+
shell: pwsh
25+
26+
env:
27+
CUDAVER: ${{ matrix.cuda }}
28+
CUDAARCHVER: ${{ matrix.cudaarch }}
29+
MAX_JOBS: 8
30+
31+
steps:
32+
- name: Add MSBuild to PATH
33+
uses: microsoft/setup-msbuild@v3
34+
with:
35+
msbuild-architecture: x64
36+
37+
- name: Checkout
38+
uses: actions/checkout@v6
39+
with:
40+
submodules: recursive
41+
42+
- name: Install CUDA ${{ matrix.cuda }}
43+
uses: Jimver/cuda-toolkit@v0.2.35
44+
id: cuda-toolkit
45+
with:
46+
cuda: ${{ matrix.cuda }}
47+
use-github-cache: false
48+
49+
- name: Install uv and Python ${{ matrix.pyver }}
50+
uses: astral-sh/setup-uv@v7
51+
with:
52+
python-version: ${{ matrix.pyver }}
53+
activate-environment: true
54+
enable-cache: true
55+
56+
- name: Install dependencies
57+
run: |
58+
git config --system core.longpaths true
59+
uv pip install --upgrade build setuptools wheel packaging
60+
61+
- name: Setup MSVC environment for nvcc
62+
shell: cmd
63+
run: |
64+
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
65+
echo PATH=%PATH%>>%GITHUB_ENV%
66+
echo INCLUDE=%INCLUDE%>>%GITHUB_ENV%
67+
echo LIB=%LIB%>>%GITHUB_ENV%
68+
echo LIBPATH=%LIBPATH%>>%GITHUB_ENV%
69+
70+
- name: Build wheel
71+
run: |
72+
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.', '')
73+
74+
$env:CUDA_HOME = $env:CUDA_PATH
75+
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CUDA_PATH
76+
$env:VERBOSE = '1'
77+
78+
# Force CMake to use Ninja + LLVM/Clang instead of the default
79+
# Visual Studio generator. MSVC skips several GGML CPU all-variant
80+
# backends, such as ivybridge, piledriver, cooperlake, zen4, and
81+
# sapphirerapids.
82+
$env:CMAKE_GENERATOR = 'Ninja Multi-Config'
83+
84+
$toolchainCandidates = @(
85+
(Join-Path $env:GITHUB_WORKSPACE "vendor\llama.cpp\cmake\x64-windows-llvm.cmake"),
86+
(Join-Path $env:GITHUB_WORKSPACE "cmake\x64-windows-llvm.cmake")
87+
)
88+
89+
$toolchainFile = $toolchainCandidates |
90+
Where-Object { Test-Path $_ } |
91+
Select-Object -First 1
92+
93+
if (!$toolchainFile) {
94+
Write-Error "Toolchain file not found. Checked: $($toolchainCandidates -join ', ')"
95+
exit 1
96+
}
97+
98+
$toolchainFile = $toolchainFile.Replace('\', '/')
99+
Write-Output "Using toolchain file: $toolchainFile"
100+
101+
# Build one CUDA wheel with dynamic GGML backends:
102+
# - GGML_BACKEND_DL enables runtime-loadable backend DLLs.
103+
# - GGML_CPU_ALL_VARIANTS builds CPU variant DLLs such as ggml-cpu-x64,
104+
# ggml-cpu-haswell, ggml-cpu-alderlake, etc.
105+
# - GGML_NATIVE=OFF avoids binding the wheel to the runner CPU.
106+
$cmakeArgs = @(
107+
# Windows toolchain / common runtime
108+
'-DCMAKE_TOOLCHAIN_FILE=vendor/llama.cpp/cmake/x64-windows-llvm.cmake'
109+
'-DLLAMA_BUILD_BORINGSSL=ON'
110+
111+
# Disable non-wheel targets
112+
'-DLLAMA_BUILD_EXAMPLES=OFF'
113+
'-DLLAMA_BUILD_TESTS=OFF'
114+
'-DLLAMA_BUILD_TOOLS=OFF'
115+
'-DLLAMA_BUILD_SERVER=OFF'
116+
'-DLLAMA_BUILD_UI=OFF'
117+
'-DLLAMA_USE_PREBUILT_UI=OFF'
118+
'-DLLAMA_CURL=OFF'
119+
120+
# GGML dynamic backend layout
121+
'-DGGML_CPU=ON'
122+
'-DGGML_CUDA=ON'
123+
'-DGGML_NATIVE=OFF'
124+
'-DGGML_BACKEND_DL=ON'
125+
'-DGGML_CPU_ALL_VARIANTS=ON'
126+
'-DGGML_OPENMP=ON'
127+
128+
# CUDA backend
129+
"-DCMAKE_CUDA_ARCHITECTURES=$env:CUDAARCHVER"
130+
'-DGGML_CUDA_FORCE_MMQ=ON'
131+
'-DCUDA_SEPARABLE_COMPILATION=ON'
132+
'-DCMAKE_CUDA_FLAGS=--diag-suppress=177'
133+
'-DCMAKE_CUDA_FLAGS_RELEASE=--diag-suppress=177'
134+
135+
# Build behavior
136+
"-DCMAKE_BUILD_PARALLEL_LEVEL=$env:MAX_JOBS"
137+
'-DENABLE_CCACHE=ON'
138+
)
139+
140+
$env:CMAKE_ARGS = $cmakeArgs -join ' '
141+
Write-Output "CMAKE_ARGS=$env:CMAKE_ARGS"
142+
143+
python -m build --wheel
144+
145+
# Check if wheel was built
146+
if (!(Test-Path '.\dist\*.whl')) {
147+
Write-Error "No wheel built in dist/ directory"
148+
exit 1
149+
}
150+
151+
$wheelFile = Get-Item '.\dist\*.whl' | Select-Object -First 1
152+
153+
# Wheel filename format:
154+
# name-version-python_tag-abi_tag-platform_tag.whl
155+
$parts = $wheelFile.Name.Split('-')
156+
$distName = $parts[0]
157+
$version = $parts[1]
158+
$pyTag = $parts[2]
159+
$abiTag = $parts[3]
160+
$platTag = $parts[4]
161+
162+
# CPU all-variants is now an internal runtime layout detail.
163+
$newVersion = "$version+cu$cudaVersion"
164+
$newName = "$distName-$newVersion-$pyTag-$abiTag-$platTag"
165+
166+
# Rename wheel file
167+
Rename-Item -Path $wheelFile.FullName -NewName $newName
168+
Write-Output "Renamed wheel to: $newName"
169+
170+
# Write the build tag to the output
171+
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
172+
Write-Output "TAG_VERSION=$version" >> $env:GITHUB_ENV
173+
174+
- name: Get current date
175+
id: get-date
176+
run: |
177+
$currentDate = Get-Date -UFormat "%Y%m%d"
178+
Write-Output "BUILD_DATE=$currentDate" >> $env:GITHUB_ENV
179+
180+
- name: Create release
181+
if: always() && env.TAG_VERSION != ''
182+
uses: softprops/action-gh-release@v3
183+
with:
184+
files: dist/*
185+
# Set tag_name to v<tag>-cu<cuda_version>-win-<date>
186+
tag_name: v${{ env.TAG_VERSION }}-cu${{ env.CUDA_VERSION }}-win-${{ env.BUILD_DATE }}
187+
env:
188+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

0 commit comments

Comments
 (0)