Use quantized base model #14

	name: Build and Push CUDA Image to Github Container Registry

	on:
	workflow_dispatch:
	push:
	branches:
	- main
	paths:
	- '**.py'
	- 'Dockerfile'
	- '.github/workflows/build-cuda.yaml'
	- 'auth/**'

	env:
	REGISTRY: ghcr.io
	IMAGE_NAME: inference
	jobs:
	build_cuda_image:
	runs-on: ubuntu-latest
	permissions:
	contents: read
	packages: write
	# Configure for larger disk space and better caching
	env:
	DOCKER_BUILDKIT: 1
	DOCKER_BUILDKIT_INLINE_CACHE: 1
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	- uses: docker/login-action@v2
	with:
	registry: ${{ env.REGISTRY }}
	username: ${{ github.actor }}
	password: ${{ secrets.GITHUB_TOKEN }}
	- name: Extract metadata (tags, labels) for Docker
	id: meta
	uses: docker/metadata-action@v4
	with:
	images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
	- uses: docker/setup-qemu-action@v3
	- uses: docker/setup-buildx-action@v3
	- name: Build and push Docker image
	uses: docker/build-push-action@v5
	with:
	context: .
	file: ./Dockerfile
	platforms: linux/amd64
	push: true
	cache-from: type=gha
	cache-to: type=gha,mode=max
	tags: \|
	${{ env.REGISTRY }}/rubriclab/${{ env.IMAGE_NAME }}:${{ github.sha }}
	${{ env.REGISTRY }}/rubriclab/${{ env.IMAGE_NAME }}:latest
	labels: ${{ steps.meta.outputs.labels }}

Provide feedback