-
Notifications
You must be signed in to change notification settings - Fork 10
182 lines (167 loc) · 7.56 KB
/
docker.yml
File metadata and controls
182 lines (167 loc) · 7.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
name: docker
# Build the parakeet-cli container images and publish them to GitHub Container
# Registry (ghcr.io/<owner>/parakeet.cpp-cli).
#
# Each variant (cpu, cuda) is a multi-arch image (linux/amd64 + linux/arm64).
# Every arch is built natively on its own runner (no QEMU): amd64 on
# ubuntu-24.04, arm64 on ubuntu-24.04-arm. The per-arch images are pushed by
# digest, then a merge job assembles one multi-arch manifest per variant.
#
# The CUDA images use the CUDA 13 base so ggml compiles the Blackwell
# architectures (sm_120 + sm_121); that is what makes the arm64 CUDA image run
# on GB10 / Grace-Blackwell (DGX Spark). CUDA 12.6 tops out at sm_90.
#
# pull_request builds the CPU variant only, as a fast Dockerfile gate. The CUDA
# build takes tens of minutes (it compiles many GPU architectures), so it runs
# only on push to the default branch, tags, and manual dispatch, all of which
# also push the image. Use workflow_dispatch to exercise CUDA before merging.
on:
push:
branches: [master]
tags: ['v*']
pull_request:
workflow_dispatch:
env:
REGISTRY: ghcr.io
# Named for the binary it ships, so a future server image can live alongside
# it (e.g. parakeet.cpp-server). Resolves to <owner>/parakeet.cpp-cli.
IMAGE_NAME: ${{ github.repository }}-cli
jobs:
# -------------------------------------------------------------------------
# setup: choose the build matrix for this event. PRs get CPU only (fast
# gate); everything else gets CPU + CUDA.
# -------------------------------------------------------------------------
setup:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set.outputs.matrix }}
steps:
- name: Select build matrix
id: set
run: |
CPU='{"variant":"cpu","arch":"amd64","runner":"ubuntu-24.04","build_base":"ubuntu:24.04","runtime_base":"ubuntu:24.04","cmake_args":"","cuda_archs":""},{"variant":"cpu","arch":"arm64","runner":"ubuntu-24.04-arm","build_base":"ubuntu:24.04","runtime_base":"ubuntu:24.04","cmake_args":"","cuda_archs":""}'
# CUDA: drop the libcuda driver-lib dependency (GGML_CUDA_NO_VMM) since
# the build container has no GPU driver. amd64 takes ggml's default
# (broad) arch list; arm64 only targets Grace GPUs (Hopper + GB10).
CUDA='{"variant":"cuda","arch":"amd64","runner":"ubuntu-24.04","build_base":"nvidia/cuda:13.0.1-devel-ubuntu24.04","runtime_base":"nvidia/cuda:13.0.1-runtime-ubuntu24.04","cmake_args":"-DPARAKEET_GGML_CUDA=ON -DGGML_CUDA_NO_VMM=ON","cuda_archs":""},{"variant":"cuda","arch":"arm64","runner":"ubuntu-24.04-arm","build_base":"nvidia/cuda:13.0.1-devel-ubuntu24.04","runtime_base":"nvidia/cuda:13.0.1-runtime-ubuntu24.04","cmake_args":"-DPARAKEET_GGML_CUDA=ON -DGGML_CUDA_NO_VMM=ON","cuda_archs":"90;121-real"}'
if [ "${{ github.event_name }}" = "pull_request" ]; then
echo "matrix={\"include\":[${CPU}]}" >> "$GITHUB_OUTPUT"
else
echo "matrix={\"include\":[${CPU},${CUDA}]}" >> "$GITHUB_OUTPUT"
fi
# -------------------------------------------------------------------------
# build: one job per (variant, arch). Builds natively on the matching runner
# and pushes the image by digest (untagged). PRs build only (cache-only).
# -------------------------------------------------------------------------
build:
needs: setup
runs-on: ${{ matrix.runner }}
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.setup.outputs.matrix) }}
steps:
- name: Checkout (with submodules)
uses: actions/checkout@v4
with:
submodules: recursive
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
# Only authenticate when we actually push (i.e. not on pull_request).
- name: Log in to ghcr.io
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push by digest (${{ matrix.variant }}/${{ matrix.arch }})
id: build
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
platforms: linux/${{ matrix.arch }}
build-args: |
BUILD_BASE=${{ matrix.build_base }}
RUNTIME_BASE=${{ matrix.runtime_base }}
CMAKE_EXTRA_ARGS=${{ matrix.cmake_args }}
CUDA_ARCHS=${{ matrix.cuda_archs }}
# PRs: build only (cache-only, nothing pushed). Otherwise push the
# image by digest so the merge job can stitch the arches together.
outputs: ${{ github.event_name != 'pull_request' && format('type=image,name={0}/{1},push-by-digest=true,name-canonical=true,push=true', env.REGISTRY, env.IMAGE_NAME) || 'type=cacheonly' }}
cache-from: type=gha,scope=${{ matrix.variant }}-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=${{ matrix.variant }}-${{ matrix.arch }}
- name: Export digest
if: github.event_name != 'pull_request'
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
if: github.event_name != 'pull_request'
uses: actions/upload-artifact@v4
with:
name: digests-${{ matrix.variant }}-${{ matrix.arch }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
# -------------------------------------------------------------------------
# merge: combine the per-arch digests of each variant into one multi-arch
# manifest and tag it. Skipped on pull_request (nothing was pushed).
# -------------------------------------------------------------------------
merge:
if: github.event_name != 'pull_request'
needs: build
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix:
include:
- variant: cpu
suffix: ""
- variant: cuda
suffix: "-cuda"
steps:
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: digests-${{ matrix.variant }}-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to ghcr.io
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Compute image tags
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# cpu -> latest, sha-xxxx, vX.Y.Z
# cuda -> latest-cuda, sha-xxxx-cuda, vX.Y.Z-cuda
flavor: |
suffix=${{ matrix.suffix }},onlatest=true
tags: |
type=raw,value=latest,enable={{is_default_branch}}
type=ref,event=tag
type=sha
- name: Create multi-arch manifest and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create \
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@sha256:%s ' *)
- name: Inspect manifest
run: |
docker buildx imagetools inspect \
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest${{ matrix.suffix }}