Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
6a5a400
Replace `sentieon pyexec` with `sys.executable`
DonFreed Apr 9, 2026
f5b84a1
More informative errors for `bcftools` command failures
DonFreed Apr 9, 2026
316b230
Updates to support additional pangenomes
DonFreed Apr 10, 2026
6728fa4
Ensure the parent directory for the output file exists
DonFreed Apr 16, 2026
fb71623
Validate bwa index files
DonFreed Apr 17, 2026
16b8562
Update test helpers
DonFreed Apr 18, 2026
fe4c97f
More informative errors with invalid readgroups
DonFreed Apr 18, 2026
9f46c50
Replace `scipy.signal.find_peaks` and `Levenshtein.ratio` with Python…
DonFreed Apr 18, 2026
4e85638
Add back `Levenshtein.ratio` due to runtime impact
DonFreed Apr 19, 2026
ca9b800
Automate release upload to PyPI
DonFreed Apr 19, 2026
dbdb9c3
Update `poetry.lock` after rebase
DonFreed Apr 19, 2026
a1a793d
Automatically push container images to DockerHub
DonFreed Apr 20, 2026
5cc0015
Add handling of difficult segdups
DonFreed Apr 22, 2026
1adc5fe
Add STR calling to the pangenome pipeline
DonFreed Apr 23, 2026
f5f35bd
Enable SV calling without a `cnv.model` file
DonFreed Apr 28, 2026
4b3d705
Add HLA/KIR calling with t1k
DonFreed Apr 29, 2026
6713db7
Add new external tools to the Dockerfile
DonFreed Apr 30, 2026
6e21d7e
Update segdup-caller command
DonFreed May 1, 2026
93cdf1b
Update the Dockerfile to add an ExpansionHunter wrapper
DonFreed May 1, 2026
fa7cc0c
Update segdup-caller version in Dockerfile
DonFreed May 1, 2026
b2e2544
Bump version
DonFreed May 1, 2026
92167f5
Restructure bam-input pipeline in `dnascope-pangenome`
DonFreed May 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: Docker
on:
release:
types: [published]
workflow_dispatch:
inputs:
build_tag:
description: "Build tag suffix (appended after the cli version)"
default: "0"
required: true

permissions:
contents: read

jobs:
build-push:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: "3.14"
- name: Verify SENTIEON_VERSION repo variable
env:
SENTIEON_VERSION: ${{ vars.SENTIEON_VERSION }}
run: |
if [ -z "$SENTIEON_VERSION" ]; then
echo "::error::Repository variable SENTIEON_VERSION is not set." >&2
exit 1
fi
echo "Building against Sentieon $SENTIEON_VERSION"
- name: Resolve build metadata
id: meta
run: |
version=$(python -c 'import tomllib; print(tomllib.load(open("pyproject.toml","rb"))["project"]["version"])')
echo "version=$version" >> "$GITHUB_OUTPUT"
echo "build_date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$GITHUB_OUTPUT"
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64
push: true
tags: |
sentieon/sentieon-cli:${{ steps.meta.outputs.version }}-${{ inputs.build_tag || '0' }}
sentieon/sentieon-cli:latest
build-args: |
SENTIEON_VERSION=${{ vars.SENTIEON_VERSION }}
VCS_REF=${{ github.sha }}
BUILD_DATE=${{ steps.meta.outputs.build_date }}
cache-from: type=gha
cache-to: type=gha,mode=max
26 changes: 24 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,29 @@ jobs:
sentieon-cli -h
- name: build
run: |
poetry build -f sdist
gh release upload ${{github.event.release.tag_name}} dist/*.tar.gz
poetry build
gh release upload ${{github.event.release.tag_name}} dist/*.tar.gz dist/*.whl
env:
GITHUB_TOKEN: ${{ github.TOKEN }}
- name: Upload distribution artifacts
uses: actions/upload-artifact@v4
with:
name: dist
path: dist/*

publish-pypi:
needs: build-release
runs-on: ubuntu-22.04
environment:
name: pypi
url: https://pypi.org/p/sentieon_cli
permissions:
id-token: write
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@v4
with:
name: dist
path: dist
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
82 changes: 76 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ ARG VG_SHA256=d5752977237e801d971f0d044f43fde4f3005da35f0451b4f452d1c1c9b8414b
ARG KMC_VERSION=3.2.4
ARG KMC_RELEASE_TAG=v3.2.4-pipe2
ARG KMC_SHA256=d8bdf8edcd0577dba32e86e8f194b2eb04eb168c75a3e6d46721d5bef515ac96
ARG EXPANSIONHUNTER_VERSION=5.0.0
ARG EXPANSIONHUNTER_SHA256=ebf3ec0ace6e6e3bbce12c26463da5d9f8e16374eff1ad10f0f1a9123050fa86
# T1K and segdup-caller are built from git; pin the tag and verify the
# resolved commit SHA after checkout.
ARG T1K_VERSION=1.0.9
ARG T1K_GIT_TAG=v1.0.9
ARG T1K_COMMIT=9376b555c1d8d2f8ca357c2656f49f450462dbc3
ARG SEGDUP_CALLER_VERSION=0.5.1
ARG SEGDUP_CALLER_GIT_TAG=v0.5.1
ARG SEGDUP_CALLER_COMMIT=0406ea78b7ff53d7a169b9aa945d7fcc7257ff12
# Pinned Poetry toolchain; should match the version used to generate poetry.lock.
ARG POETRY_VERSION=2.3.4
ARG POETRY_PLUGIN_EXPORT_VERSION=1.9.0
Expand All @@ -27,7 +37,7 @@ RUN test -n "$SENTIEON_VERSION"

# Install all build dependencies for the downloader stage in one pass.
RUN apt-get update && apt-get install -y --no-install-recommends \
curl ca-certificates bzip2 autoconf automake libtool make gcc perl \
curl ca-certificates bzip2 autoconf automake libtool make gcc g++ perl \
nasm git python3 python3-venv zlib1g-dev libbz2-dev liblzma-dev \
libcurl4-gnutls-dev libssl-dev libncurses5-dev libdeflate-dev \
libperl-dev libgsl0-dev && \
Expand Down Expand Up @@ -94,6 +104,31 @@ RUN mkdir -p /opt/kmc-${KMC_VERSION} && \
/usr/local/bin/kmc_tools-${KMC_VERSION} \
/usr/local/bin/kmc_dump-${KMC_VERSION}

# Install ExpansionHunter
RUN mkdir -p /tmp/eh && \
curl -fL -o /tmp/eh.tar.gz "https://github.com/Illumina/ExpansionHunter/releases/download/v${EXPANSIONHUNTER_VERSION}/ExpansionHunter-v${EXPANSIONHUNTER_VERSION}-linux_x86_64.tar.gz" && \
echo "${EXPANSIONHUNTER_SHA256} /tmp/eh.tar.gz" | sha256sum -c - && \
tar -C /tmp/eh --strip-components=1 -zxf /tmp/eh.tar.gz && \
cp /tmp/eh/bin/ExpansionHunter /usr/local/bin/ExpansionHunter-${EXPANSIONHUNTER_VERSION} && \
chmod +x /usr/local/bin/ExpansionHunter-${EXPANSIONHUNTER_VERSION} && \
rm -rf /tmp/eh /tmp/eh.tar.gz

# Build T1K from source.
RUN git clone --branch ${T1K_GIT_TAG} https://github.com/mourisl/T1K.git /tmp/t1k-src && \
cd /tmp/t1k-src && \
test "$(git rev-parse HEAD)" = "${T1K_COMMIT}" && \
make -j"$(nproc)" && \
mkdir -p /opt/t1k-${T1K_VERSION} && \
cp run-t1k bam-extractor fastq-extractor genotyper analyzer \
/opt/t1k-${T1K_VERSION}/ && \
rm -rf /tmp/t1k-src

# Clone segdup-caller. The actual `pip install` happens in the
# python-builder stage so the venv lives in a clean image.
RUN git clone --branch ${SEGDUP_CALLER_GIT_TAG} https://github.com/Sentieon/segdup-caller.git /opt/segdup-caller-src && \
cd /opt/segdup-caller-src && \
test "$(git rev-parse HEAD)" = "${SEGDUP_CALLER_COMMIT}"

# Download the Sentieon software
RUN mkdir -p /opt/sentieon/ && \
curl -fL "https://s3.amazonaws.com/sentieon-release/software/sentieon-genomics-${SENTIEON_VERSION}.tar.gz" | \
Expand Down Expand Up @@ -124,12 +159,16 @@ FROM debian:13.4-slim AS python-builder
ARG SENTIEON_VERSION

COPY --from=downloader /opt/sentieon-cli/dist /opt/sentieon-cli/dist
COPY --from=downloader /opt/segdup-caller-src /opt/segdup-caller-src

# Build the sentieon-cli venv and a separate venv for segdup-caller.
# segdup-caller pulls in whatshap/pysam/scipy/pandas which would otherwise
# bloat (and risk version-conflicting with) the sentieon-cli env.
# Remove the kaleido and pyarrow packages to reduce image size.
RUN apt-get update && \
apt-get install -y --no-install-recommends \
python3 python3-venv python3-dev \
gcc g++ make zlib1g-dev && \
gcc g++ make zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev && \
rm -rf /var/lib/apt/lists/* && \
python3 -m venv /opt/sentieon-cli-venv && \
/opt/sentieon-cli-venv/bin/pip install --no-cache-dir \
Expand All @@ -138,7 +177,12 @@ RUN apt-get update && \
/opt/sentieon-cli/dist/*.whl && \
/opt/sentieon-cli-venv/bin/pip uninstall -y kaleido pyarrow && \
/opt/sentieon-cli-venv/bin/pip uninstall -y pip setuptools wheel && \
find /opt/sentieon-cli-venv -depth -type d -name __pycache__ -exec rm -rf {} +
python3 -m venv /opt/segdup-caller-venv && \
/opt/segdup-caller-venv/bin/pip install --no-cache-dir \
/opt/segdup-caller-src && \
/opt/segdup-caller-venv/bin/pip uninstall -y pip setuptools wheel && \
find /opt/sentieon-cli-venv /opt/segdup-caller-venv \
-depth -type d -name __pycache__ -exec rm -rf {} +

# Build the container
FROM debian:13.4-slim
Expand All @@ -147,6 +191,8 @@ ARG BEDTOOLS_VERSION=2.30.0
ARG MOSDEPTH_VERSION=0.3.9
ARG VG_VERSION=1.73.0
ARG KMC_VERSION=3.2.4
ARG EXPANSIONHUNTER_VERSION=5.0.0
ARG T1K_VERSION=1.0.9
ARG VCS_REF
ARG BUILD_DATE

Expand Down Expand Up @@ -175,10 +221,18 @@ COPY --from=downloader /usr/local/bin/vg-${VG_VERSION} /usr/local/bin/vg-${VG_VE
COPY --from=downloader /usr/local/bin/kmc-${KMC_VERSION} /usr/local/bin/kmc-${KMC_VERSION}
COPY --from=downloader /usr/local/bin/kmc_tools-${KMC_VERSION} /usr/local/bin/kmc_tools-${KMC_VERSION}
COPY --from=downloader /usr/local/bin/kmc_dump-${KMC_VERSION} /usr/local/bin/kmc_dump-${KMC_VERSION}
COPY --from=downloader /usr/local/bin/ExpansionHunter-${EXPANSIONHUNTER_VERSION} /usr/local/bin/ExpansionHunter-${EXPANSIONHUNTER_VERSION}
COPY --from=downloader /opt/t1k-${T1K_VERSION} /opt/t1k-${T1K_VERSION}
COPY --from=downloader /opt/sentieon-cli/dist /opt/sentieon-cli/dist

# Create symlinks for libisal (the .so lives on the default linker path in
# /usr/lib) and for the unversioned names of the third-party tools.
# `run-t1k` resolves its sibling binaries via abs_path($0), so symlinking
# it from /usr/local/bin keeps the binaries discoverable in /opt/t1k-*/.
# `ExpansionHunter` goes through a wrapper that strips LD_PRELOAD: the
# pre-built EH binary crashes with `std::bad_cast` while parsing its JSON
# variant catalog when the global jemalloc preload (set further down) is
# active.
RUN cd /usr/lib && \
ln -s libisal.so.2.0.30 libisal.so.2 && \
ln -s libisal.so.2 libisal.so && \
Expand All @@ -188,13 +242,18 @@ RUN cd /usr/lib && \
ln -s vg-${VG_VERSION} vg && \
ln -s kmc-${KMC_VERSION} kmc && \
ln -s kmc_tools-${KMC_VERSION} kmc_tools && \
ln -s kmc_dump-${KMC_VERSION} kmc_dump
ln -s kmc_dump-${KMC_VERSION} kmc_dump && \
ln -s /opt/t1k-${T1K_VERSION}/run-t1k run-t1k && \
printf '#!/bin/sh\nexec env -u LD_PRELOAD /usr/local/bin/ExpansionHunter-%s "$@"\n' \
"${EXPANSIONHUNTER_VERSION}" > ExpansionHunter && \
chmod +x ExpansionHunter

# Install runtime dependencies.
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libjemalloc2 procps libdeflate0 libbz2-1.0 liblzma5 \
libcurl3-gnutls libssl3 libperl5.40 libgsl28 libncurses6 \
libstdc++6 perl \
curl python3 && \
rm -rf /var/lib/apt/lists/*

Expand All @@ -205,11 +264,15 @@ ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
# A default jemalloc configuration that should work well for most use-cases, see http://jemalloc.net/jemalloc.3.html
ENV MALLOC_CONF=metadata_thp:auto,background_thread:true,dirty_decay_ms:30000,muzzy_decay_ms:30000

# Copy the pre-built venv from the python-builder stage. No compilers
# are installed in this stage.
# Copy the pre-built venvs from the python-builder stage. No compilers
# are installed in this stage. The segdup-caller venv stays isolated;
# only its CLI entry point is exposed on PATH.
COPY --from=python-builder /opt/sentieon-cli-venv /opt/sentieon-cli-venv
COPY --from=python-builder /opt/segdup-caller-venv /opt/segdup-caller-venv
ENV VIRTUAL_ENV=/opt/sentieon-cli-venv
ENV PATH=/opt/sentieon-cli-venv/bin:$PATH
# Append (not prepend) the segdup-caller venv
ENV PATH=$PATH:/opt/segdup-caller-venv/bin

# Create a non-root user for running the pipelines
RUN useradd --create-home --uid 1001 --shell /bin/bash sentieon
Expand All @@ -226,4 +289,11 @@ RUN sentieon driver --help && \
mosdepth -h && \
vg version && \
kmc --help && \
ExpansionHunter --help && \
perl -c "$(command -v run-t1k)" && \
segdup-caller --version && \
python -c "import sys; from packaging.version import Version; \
from sentieon_cli.sentieon_pangenome import SEGDUP_MIN_VERSION; \
from sentieon_cli.util import check_version; \
sys.exit(0 if all(check_version(c, v) for c, v in SEGDUP_MIN_VERSION.items()) else 1)" && \
sentieon-cli -h
Loading
Loading