nf-core · atrigila · Apr 12, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/modules/nf-core/quilt/quilt2/environment.yml b/modules/nf-core/quilt/quilt2/environment.yml
@@ -0,0 +1,8 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::r-quilt=2.0.4=r44h503566f_0
+  - r-base=4.4.3
diff --git a/modules/nf-core/quilt/quilt2/main.nf b/modules/nf-core/quilt/quilt2/main.nf
@@ -0,0 +1,105 @@
+process QUILT_QUILT2 {
+    tag "${meta.id}"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/r-quilt:2.0.4--r44h503566f_0'
+        : 'biocontainers/r-quilt:2.0.4--r44h503566f_0'}"
+
+    input:
+    tuple val(meta), path(bams), path(bais), path(bamlist), path(samplename), path(reference_vcf_file), path(reference_vcf_index), path(posfile), path(phasefile), path(genfile), val(chr), val(regions_start), val(regions_end), val(ngen), val(buffer), path(genetic_map)
+    tuple val(meta2), path(fasta), path(fasta_fai)
+
+    output:
+    tuple val(meta), path("*.vcf.gz")          , emit: vcf
+    tuple val(meta), path("*.vcf.gz.tbi")      , emit: tbi  , optional: true
+    tuple val(meta), path("RData", type: "dir"), emit: rdata, optional: true
+    tuple val(meta), path("plots", type: "dir"), emit: plots, optional: true
+    tuple val("${task.process}"), val('r-quilt'), eval('Rscript -e "cat(as.character(packageVersion(\'QUILT\')))"'), topic: versions, emit: versions_r_quilt
+    tuple val("${task.process}"), val('r-base'), eval('R --version | sed "1!d; s/.*version //; s/ .*//"'), topic: versions, emit: versions_r_base
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args   ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    def extensions   = bams.collect { path -> path.extension }
+    def extension    = extensions.flatten().unique()
+    def list_command = extension == ["bam"]
+        ? "--bamlist="
+        : extension == ["cram"] ? "--reference=${fasta} --cramlist=" : ""
+
+    def genetic_map_command = genetic_map ? "--genetic_map_file=${genetic_map}" : ""
+    def posfile_command     = posfile     ? "--posfile=${posfile}"               : ""
+    def phasefile_command   = phasefile   ? "--phasefile=${phasefile}"           : ""
+    def genfile_command     = genfile     ? "--genfile=${genfile}"               : ""
+    def samplename_command  = samplename  ? "--sampleNames_file=${samplename}"   : ""
+    def start_command       = regions_start ? "--regionStart=${regions_start}"    : ""
+    def end_command         = regions_end ? "--regionEnd=${regions_end}"         : ""
+    def buffer_command      = buffer      ? "--buffer=${buffer}"                 : ""
+
+    if (!(args ==~ /.*--seed.*/)) {
+        args += " --seed=1"
+    }
+
+    """
+    if [ -n "${bamlist}" ] ;
+    then
+        BAM_LIST="${bamlist}"
+    else
+        printf "%s\\n" ${bams} | tr -d '[],' > all_files.txt
+        BAM_LIST="all_files.txt"
+    fi
+
+    QUILT2.R \\
+        ${list_command}\$BAM_LIST \\
+        ${genetic_map_command} \\
+        ${posfile_command} \\
+        ${phasefile_command} \\
+        ${genfile_command} \\
+        ${samplename_command} \\
+        --chr=${chr} \\
+        ${start_command} \\
+        ${end_command} \\
+        ${buffer_command} \\
+        --nGen=${ngen} \\
+        --nCores=${task.cpus} \\
+        --outputdir="." \\
+        --reference_vcf_file=${reference_vcf_file} \\
+        --output_filename=${prefix}.vcf.gz \\
+        ${args}
+    """
+
+    stub:
+    def args          = task.ext.args   ?: ''
+    def prefix        = task.ext.prefix ?: "${meta.id}"
+    def make_plots    = args.contains("--make_plots=TRUE")
+    def save_ref      = args.contains("--save_prepared_reference=TRUE")
+    def nGibbsSamples = args.contains("--nGibbsSamples=") ? args.split("--nGibbsSamples=")[1].split(" ")[0] : 7
+    def n_seek_its    = args.contains("--n_seek_its=")    ? args.split("--n_seek_its=")[1].split(" ")[0]    : 3
+
+    """
+    echo '' | gzip > ${prefix}.vcf.gz
+    touch ${prefix}.vcf.gz.tbi
+    if [ "${save_ref}" == true ]
+    then
+        mkdir -p RData
+        touch "RData/QUILT_prepared_reference.${chr}.${regions_start}.${regions_end}.RData"
+    fi
+    if [ "${make_plots}" == true ]
+    then
+        mkdir -p plots
+        for nGibbs in {0..${nGibbsSamples}}
+        do
+            touch "plots/haps.${prefix}.${chr}.${regions_start}.${regions_end}_igs.\$((nGibbs+1)).0.truth.png"
+            for its in {1..${n_seek_its}}
+            do
+                touch "plots/haps.${prefix}.${chr}.${regions_start}.${regions_end}_igs.\$((nGibbs+1)).it\$its.gibbs.png"
+            done
+        done
+    fi
+    """
+}
diff --git a/modules/nf-core/quilt/quilt2/meta.yml b/modules/nf-core/quilt/quilt2/meta.yml
@@ -0,0 +1,224 @@
+name: "quilt_quilt2"
+description: QUILT2 is an R and C++ program for fast genotype imputation from
+  low-coverage sequence using a large phased reference panel in VCF/BCF format.
+keywords:
+  - imputation
+  - low-coverage
+  - genotype
+  - genomics
+  - vcf
+tools:
+  - "quilt":
+      description: "Fast read-aware genotype imputation from low-coverage sequence
+        using a large phased reference panel"
+      homepage: "https://github.com/rwdavies/QUILT"
+      documentation: "https://github.com/rwdavies/QUILT"
+      tool_dev_url: "https://github.com/rwdavies/QUILT"
+      doi: "10.1038/s41467-025-67218-1"
+      licence:
+        - "GPL v3"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - bams:
+        type: file
+        description: (Mandatory) BAM/CRAM files
+        pattern: "*.{bam,cram}"
+        ontologies: []
+    - bais:
+        type: file
+        description: (Mandatory) BAM/CRAM index files
+        pattern: "*.{bai,crai}"
+        ontologies: []
+    - bamlist:
+        type: file
+        description: (Optional) File with list of BAM/CRAM files to impute. One
+          file per line.
+        pattern: "*.{txt}"
+        ontologies: []
+    - samplename:
+        type: file
+        description: (Optional) File with list of samples names in the same order
+          as in bamlist to impute. One file per line.
+        pattern: "*.{txt}"
+        ontologies: []
+    - reference_vcf_file:
+        type: file
+        description: (Mandatory) Phased reference panel VCF/BCF file for
+          imputation.
+        pattern: "*.{vcf,vcf.gz,bcf}"
+        ontologies: []
+    - reference_vcf_index:
+        type: file
+        description: (Mandatory) Index for the reference panel VCF file.
+        pattern: "*.{tbi,csi}"
+        ontologies: []
+    - posfile:
+        type: file
+        description: (Optional) File with positions of where to impute, lining up
+          one-to-one with genfile. File is tab separated with no header, one row
+          per SNP, with col 1 = chromosome, col 2 = physical position (sorted from
+          smallest to largest), col 3 = reference base, col 4 = alternate base.
+          Bases are capitalized.
+        pattern: "*.{txt}"
+        ontologies: []
+    - phasefile:
+        type: file
+        description: (Optional) File with truth phasing results. Supersedes
+          genfile if both options given. File has a header row with a name for
+          each sample, matching what is found in the bam file. Each subject is
+          then a tab separated column, with 0 = ref and 1 = alt, separated by a
+          vertical bar |, e.g. 0|0 or 0|1. Note therefore this file has one more
+          row than posfile which has no header.
+        pattern: "*.{txt}"
+        ontologies: []
+    - genfile:
+        type: file
+        description: (Optional) Path to gen file with high coverage results. Empty
+          for no genfile. If both genfile and phasefile are given, only phasefile
+          is used, as genfile (unphased genotypes) is derivative to phasefile
+          (phased genotypes). File has a header row with a name for each sample,
+          matching what is found in the bam file. Each subject is then a tab
+          seperated column, with 0 = hom ref, 1 = het, 2 = hom alt and NA
+          indicating missing genotype, with rows corresponding to rows of the
+          posfile. Note therefore this file has one more row than posfile which
+          has no header [default ""]
+        pattern: "*.{txt}"
+        ontologies: []
+    - chr:
+        type: string
+        description: (Mandatory) What chromosome to run. Should match BAM headers.
+    - regions_start:
+        type: integer
+        description: (Mandatory) When running imputation, where to start from. The
+          1-based position x is kept if regionStart <= x <= regionEnd.
+    - regions_end:
+        type: integer
+        description: (Mandatory) When running imputation, where to stop.
+    - ngen:
+        type: integer
+        description: Number of generations since founding or mixing. Note that the
+          algorithm is relatively robust to this. Use nGen = 4 * Ne / K if unsure.
+    - buffer:
+        type: integer
+        description: Buffer of region to perform imputation over. So imputation is
+          run form regionStart-buffer to regionEnd+buffer, and reported for
+          regionStart to regionEnd, including the bases of regionStart and
+          regionEnd.
+    - genetic_map:
+        type: file
+        description: (Optional) File with genetic map information, a file with 3
+          white-space delimited entries giving position (1-based), genetic rate
+          map in cM/Mbp, and genetic map in cM. If no file included, rate is based
+          on physical distance and expected rate (expRate).
+        pattern: "*.{txt,map}{,gz}"
+        ontologies: []
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - fasta:
+        type: file
+        description: (Optional) File with reference genome.
+        pattern: "*.{fa,fasta}"
+        ontologies: []
+    - fasta_fai:
+        type: file
+        description: (Optional) File with reference genome index.
+        pattern: "*.{fai}"
+        ontologies: []
+output:
+  vcf:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.vcf.gz":
+          type: file
+          description: VCF file with both SNP annotation information and
+            per-sample genotype information.
+          pattern: "*.{vcf.gz}"
+          ontologies: []
+  tbi:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.vcf.gz.tbi":
+          type: file
+          description: TBI file of the VCF.
+          pattern: "*.{vcf.gz.tbi}"
+          ontologies: []
+  rdata:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - RData:
+          type: directory
+          description: |
+            Folder of RData objects generated during the imputation process.
+          pattern: "RData"
+  plots:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - plots:
+          type: directory
+          description: |
+            Folder of plots generated during the imputation process.
+          pattern: "plots"
+  versions_r_quilt:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - r-quilt:
+          type: string
+          description: The name of the tool
+      - Rscript -e "cat(as.character(packageVersion('QUILT')))":
+          type: eval
+          description: The expression to obtain the version of the tool
+  versions_r_base:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - r-base:
+          type: string
+          description: The name of the tool
+      - R --version | sed "1!d; s/.*version //; s/ .*//":
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - r-quilt:
+          type: string
+          description: The name of the tool
+      - Rscript -e "cat(as.character(packageVersion('QUILT')))":
+          type: eval
+          description: The expression to obtain the version of the tool
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - r-base:
+          type: string
+          description: The name of the tool
+      - R --version | sed "1!d; s/.*version //; s/ .*//":
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@atrigila"
+maintainers:
+  - "@atrigila"