parakeet.cpp/.github/workflows/docker.yml at master · mudler/parakeet.cpp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
name: docker

# Build the parakeet-cli container images and publish them to GitHub Container
# Registry (ghcr.io/<owner>/parakeet.cpp-cli).
#
# Each variant (cpu, cuda) is a multi-arch image (linux/amd64 + linux/arm64).
# Every arch is built natively on its own runner (no QEMU): amd64 on
# ubuntu-24.04, arm64 on ubuntu-24.04-arm. The per-arch images are pushed by
# digest, then a merge job assembles one multi-arch manifest per variant.
#
# The CUDA images use the CUDA 13 base so ggml compiles the Blackwell
# architectures (sm_120 + sm_121); that is what makes the arm64 CUDA image run
# on GB10 / Grace-Blackwell (DGX Spark). CUDA 12.6 tops out at sm_90.
#
# pull_request builds the CPU variant only, as a fast Dockerfile gate. The CUDA
# build takes tens of minutes (it compiles many GPU architectures), so it runs
# only on push to the default branch, tags, and manual dispatch, all of which
# also push the image. Use workflow_dispatch to exercise CUDA before merging.
on:
  push:
    branches: [master]
    tags: ['v*']
  pull_request:
  workflow_dispatch:

env:
  REGISTRY: ghcr.io
  # Named for the binary it ships, so a future server image can live alongside
  # it (e.g. parakeet.cpp-server). Resolves to <owner>/parakeet.cpp-cli.
  IMAGE_NAME: ${{ github.repository }}-cli

jobs:
  # -------------------------------------------------------------------------
  # setup: choose the build matrix for this event. PRs get CPU only (fast
  # gate); everything else gets CPU + CUDA.
  # -------------------------------------------------------------------------
  setup:
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set.outputs.matrix }}
    steps:
      - name: Select build matrix
        id: set
        run: |
          CPU='{"variant":"cpu","arch":"amd64","runner":"ubuntu-24.04","build_base":"ubuntu:24.04","runtime_base":"ubuntu:24.04","cmake_args":"","cuda_archs":""},{"variant":"cpu","arch":"arm64","runner":"ubuntu-24.04-arm","build_base":"ubuntu:24.04","runtime_base":"ubuntu:24.04","cmake_args":"","cuda_archs":""}'
          # CUDA: drop the libcuda driver-lib dependency (GGML_CUDA_NO_VMM) since
          # the build container has no GPU driver. amd64 takes ggml's default
          # (broad) arch list; arm64 only targets Grace GPUs (Hopper + GB10).
          CUDA='{"variant":"cuda","arch":"amd64","runner":"ubuntu-24.04","build_base":"nvidia/cuda:13.0.1-devel-ubuntu24.04","runtime_base":"nvidia/cuda:13.0.1-runtime-ubuntu24.04","cmake_args":"-DPARAKEET_GGML_CUDA=ON -DGGML_CUDA_NO_VMM=ON","cuda_archs":""},{"variant":"cuda","arch":"arm64","runner":"ubuntu-24.04-arm","build_base":"nvidia/cuda:13.0.1-devel-ubuntu24.04","runtime_base":"nvidia/cuda:13.0.1-runtime-ubuntu24.04","cmake_args":"-DPARAKEET_GGML_CUDA=ON -DGGML_CUDA_NO_VMM=ON","cuda_archs":"90;121-real"}'
          if [ "${{ github.event_name }}" = "pull_request" ]; then
            echo "matrix={\"include\":[${CPU}]}" >> "$GITHUB_OUTPUT"
          else
            echo "matrix={\"include\":[${CPU},${CUDA}]}" >> "$GITHUB_OUTPUT"
          fi

  # -------------------------------------------------------------------------
  # build: one job per (variant, arch). Builds natively on the matching runner
  # and pushes the image by digest (untagged). PRs build only (cache-only).
  # -------------------------------------------------------------------------
  build:
    needs: setup
    runs-on: ${{ matrix.runner }}
    permissions:
      contents: read
      packages: write
    strategy:
      fail-fast: false
      matrix: ${{ fromJSON(needs.setup.outputs.matrix) }}
    steps:
      - name: Checkout (with submodules)
        uses: actions/checkout@v4
        with:
          submodules: recursive

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      # Only authenticate when we actually push (i.e. not on pull_request).
      - name: Log in to ghcr.io
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Build and push by digest (${{ matrix.variant }}/${{ matrix.arch }})
        id: build
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./Dockerfile
          platforms: linux/${{ matrix.arch }}
          build-args: |
            BUILD_BASE=${{ matrix.build_base }}
            RUNTIME_BASE=${{ matrix.runtime_base }}
            CMAKE_EXTRA_ARGS=${{ matrix.cmake_args }}
            CUDA_ARCHS=${{ matrix.cuda_archs }}
          # PRs: build only (cache-only, nothing pushed). Otherwise push the
          # image by digest so the merge job can stitch the arches together.
          outputs: ${{ github.event_name != 'pull_request' && format('type=image,name={0}/{1},push-by-digest=true,name-canonical=true,push=true', env.REGISTRY, env.IMAGE_NAME) || 'type=cacheonly' }}
          cache-from: type=gha,scope=${{ matrix.variant }}-${{ matrix.arch }}
          cache-to: type=gha,mode=max,scope=${{ matrix.variant }}-${{ matrix.arch }}

      - name: Export digest
        if: github.event_name != 'pull_request'
        run: |
          mkdir -p /tmp/digests
          digest="${{ steps.build.outputs.digest }}"
          touch "/tmp/digests/${digest#sha256:}"

      - name: Upload digest
        if: github.event_name != 'pull_request'
        uses: actions/upload-artifact@v4
        with:
          name: digests-${{ matrix.variant }}-${{ matrix.arch }}
          path: /tmp/digests/*
          if-no-files-found: error
          retention-days: 1

  # -------------------------------------------------------------------------
  # merge: combine the per-arch digests of each variant into one multi-arch
  # manifest and tag it. Skipped on pull_request (nothing was pushed).
  # -------------------------------------------------------------------------
  merge:
    if: github.event_name != 'pull_request'
    needs: build
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    strategy:
      fail-fast: false
      matrix:
        include:
          - variant: cpu
            suffix: ""
          - variant: cuda
            suffix: "-cuda"
    steps:
      - name: Download digests
        uses: actions/download-artifact@v4
        with:
          path: /tmp/digests
          pattern: digests-${{ matrix.variant }}-*
          merge-multiple: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Log in to ghcr.io
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Compute image tags
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          # cpu  -> latest, sha-xxxx, vX.Y.Z
          # cuda -> latest-cuda, sha-xxxx-cuda, vX.Y.Z-cuda
          flavor: |
            suffix=${{ matrix.suffix }},onlatest=true
          tags: |
            type=raw,value=latest,enable={{is_default_branch}}
            type=ref,event=tag
            type=sha

      - name: Create multi-arch manifest and push
        working-directory: /tmp/digests
        run: |
          docker buildx imagetools create \
            $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
            $(printf '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@sha256:%s ' *)

      - name: Inspect manifest
        run: |
          docker buildx imagetools inspect \
            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest${{ matrix.suffix }}