Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .envrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ export using_direnv=true

# Check if the NVIDIA kernel module is loaded
if [ "$(lsmod | grep nvidia | wc -l)" -ne 0 ]; then
use flake #withCUDA
use flake .#withCUDA
else
use flake #withoutCUDA
use flake .#withoutCUDA
fi
5 changes: 4 additions & 1 deletion .github/workflows/image.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: Create and publish a Docker image

on:
pull_request:
push:
branches:
- 'main'
Expand Down Expand Up @@ -31,6 +32,7 @@ jobs:

# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
- name: Log in to the Container registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
Expand All @@ -52,14 +54,15 @@ jobs:
uses: docker/build-push-action@v6
with:
context: .
push: true
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

# This step generates an artifact attestation for the image, which is an unforgeable statement about where and how it was built. It increases supply chain security for people who consume the image. For more information, see "[AUTOTITLE](/actions/security-guides/using-artifact-attestations-to-establish-provenance-for-builds)."
- name: Generate artifact attestation
if: github.event_name != 'pull_request'
uses: actions/attest-build-provenance@v1
with:
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
Expand Down
4 changes: 3 additions & 1 deletion .woodpecker/build-amd64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ steps:
image: bash
commands:
- attic login lounge-rocks https://cache.lounge.rocks $ATTIC_KEY --set-default
secrets: [attic_key]
environment:
ATTIC_KEY:
from_secret: attic_key

- name: build whisper_api
image: bash
Expand Down
17 changes: 7 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
# newest version:
# https://hub.docker.com/r/nvidia/cuda/tags?page=1&name=-base-ubuntu22.04&ordering=name

ARG CUDA_VERSION=12.4.1
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04

ARG PYTHON_VERSION=3.10
ARG CUDA_VERSION=12.6.2
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu24.04

WORKDIR /workspace

RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get -qq update && \
apt-get -qq install --no-install-recommends \
ffmpeg \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
python3 \
python3-venv \
python3-pip && \
rm -rf /var/lib/apt/lists/* && \
pip3 install --upgrade pip setuptools
rm -rf /var/lib/apt/lists/*

COPY requirements.txt requirements.txt
RUN pip3 install --no-build-isolation -r requirements.txt
RUN pip3 install -r requirements.txt --break-system-packages

# disabled by default since GitHub Actions do not have enough space
ARG PREFETCH_MODEL=0
Expand All @@ -32,7 +29,7 @@ RUN if [ "$PREFETCH_MODEL" != 0 ]; then \
COPY . /workspace/code

RUN cd /workspace/code && \
pip3 install .
pip3 install . --break-system-packages

ENV PORT=3001 \
LISTEN=0.0.0.0 \
Expand Down
29 changes: 19 additions & 10 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,22 @@ services:
- LOAD_MODEL_ON_STARTUP=1
# - UNLOAD_MODEL_AFTER_S=300
# - DEVELOP_MODE=0
# include the following lines when using a NVIDIA GPU!
# make sure to have the NVIDIA Container Toolkit installed
# for more information visit:
# https://github.com/NVIDIA/nvidia-container-toolkit
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
# NVIDIA GPU access — enable exactly one of the two blocks below.
#
# Option A (preferred): CDI (Container Device Interface). Vendor-neutral
# standard where the NVIDIA Container Toolkit installs a spec at
# /etc/cdi/nvidia.yaml and the device is addressed by name. Requires
# Docker >=25 and Compose >=v2.30. This is what modern setups use.
devices:
- nvidia.com/gpu=all
#
# Option B (legacy): the old `nvidia` OCI runtime registered in
# /etc/docker/daemon.json by older versions of the NVIDIA Container
# Toolkit. Being phased out, but still works on hosts that have the
# runtime registered and no CDI spec installed.
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# capabilities: [gpu]
8 changes: 4 additions & 4 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 8 additions & 3 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

description = "A simple API for OpenAI's Whisper";

inputs = { nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; };
inputs = {
# Pinned to the commit just before torch 2.11.0 was introduced (2026-03-23).
# torch 2.11 fails to build with CUDA on sm_90 due to MSLK FP4 GEMM kernels
# (f4f4bf16_grouped) which target Blackwell (sm_100+) only.
nixpkgs.url = "github:nixos/nixpkgs/37d281c8b0315e5a03db0a3a66cead7c50361817";
};

outputs = { self, nixpkgs, ... }:
let
Expand Down Expand Up @@ -33,8 +38,8 @@
(system: nixpkgsFor.${system}.nixpkgs-fmt);

overlays.default = final: prev: {
devShell = final.callPackage nixos/devShell { inherit self; };
whisper_api = final.callPackage nixos/pkgs/whisper_api { inherit self; };
devShell = final.python3Packages.callPackage nixos/devShell { };
whisper_api = final.python3Packages.callPackage nixos/pkgs/whisper_api { inherit self; };
# Our code is not compatible with pydantic version 2 yet.
python3 = prev.python3.override {
packageOverrides = python-self: python-super: {
Expand Down
1 change: 0 additions & 1 deletion nixos/checks/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ in {
server.succeed("chown -R whisper_api:whisper_api /var/lib/whisper_api/.cache/whisper")

# wait until the server is up
server.wait_for_unit("network-online.target")
server.wait_for_unit("whisper_api")

# check if server can reach API
Expand Down
9 changes: 6 additions & 3 deletions nixos/devShell/default.nix
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
{ self, pkgs, ... }:
{ pkgs, ... }:
let
whisper_api = pkgs.whisper_api;
python-with-packages = pkgs.python3.withPackages (p: with p; [
# only needed for development
autopep8
black
httpx
isort
pylint
pip
] ++ self.packages.${pkgs.system}.whisper_api.propagatedBuildInputs);
pylint
pytest
] ++ whisper_api.propagatedBuildInputs);
in
pkgs.mkShell {
inputsFrom = [ whisper_api ];
buildInputs = with pkgs; [
# only needed for development
nixpkgs-fmt
Expand Down
37 changes: 28 additions & 9 deletions nixos/pkgs/whisper_api/default.nix
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
{ self
, lib
, python3
,
{
self,
lib,
buildPythonApplication,

# build-system
setuptools,
pythonRelaxDepsHook,

# dependencies
fastapi,
ffmpeg-python,
openai-whisper,
python-multipart,
uvicorn,

# tests
pytestCheckHook,
httpx,
}:
python3.pkgs.buildPythonApplication {
buildPythonApplication {

pname = "whisper_api";

Expand All @@ -18,24 +33,28 @@ python3.pkgs.buildPythonApplication {

pythonRelaxDeps = [ ];

nativeBuildInputs = with python3.pkgs; [
nativeBuildInputs = [
setuptools
pythonRelaxDepsHook
];

propagatedBuildInputs = with python3.pkgs; [
propagatedBuildInputs = [
fastapi
ffmpeg-python
openai-whisper
python-multipart
uvicorn
];

nativeCheckInputs = with python3.pkgs; [
unittestCheckHook
nativeCheckInputs = [
pytestCheckHook
httpx
];

disabledTestPaths = [
"test/test_api.py"
];

pythonImportsCheck = [ "whisper_api" ];

meta = with lib; {
Expand Down
10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
fastapi==0.112.0
fastapi==0.128.0
ffmpeg-python==0.2.0
openai-whisper==20240930
pydantic==2.8.2
python-multipart==0.0.9
uvicorn==0.29.0
openai-whisper==20250625
pydantic==2.12.5
python-multipart==0.0.21
uvicorn==0.40.0
Binary file not shown.
Loading
Loading