Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions modules/nf-core/quilt/quilt2/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::r-quilt=2.0.4=r44h503566f_0
- r-base=4.4.3
105 changes: 105 additions & 0 deletions modules/nf-core/quilt/quilt2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
process QUILT_QUILT2 {
tag "${meta.id}"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/r-quilt:2.0.4--r44h503566f_0'
: 'biocontainers/r-quilt:2.0.4--r44h503566f_0'}"

input:
tuple val(meta), path(bams), path(bais), path(bamlist), path(samplename), path(reference_vcf_file), path(reference_vcf_index), path(posfile), path(phasefile), path(genfile), val(chr), val(regions_start), val(regions_end), val(ngen), val(buffer), path(genetic_map)
tuple val(meta2), path(fasta), path(fasta_fai)

output:
tuple val(meta), path("*.vcf.gz") , emit: vcf
tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi , optional: true
Comment thread
atrigila marked this conversation as resolved.
tuple val(meta), path("RData", type: "dir"), emit: rdata, optional: true
tuple val(meta), path("plots", type: "dir"), emit: plots, optional: true
tuple val("${task.process}"), val('r-quilt'), eval('Rscript -e "cat(as.character(packageVersion(\'QUILT\')))"'), topic: versions, emit: versions_r_quilt
tuple val("${task.process}"), val('r-base'), eval('R --version | sed "1!d; s/.*version //; s/ .*//"'), topic: versions, emit: versions_r_base

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def extensions = bams.collect { path -> path.extension }
def extension = extensions.flatten().unique()
def list_command = extension == ["bam"]
? "--bamlist="
: extension == ["cram"] ? "--reference=${fasta} --cramlist=" : ""
Comment thread
atrigila marked this conversation as resolved.

def genetic_map_command = genetic_map ? "--genetic_map_file=${genetic_map}" : ""
def posfile_command = posfile ? "--posfile=${posfile}" : ""
def phasefile_command = phasefile ? "--phasefile=${phasefile}" : ""
def genfile_command = genfile ? "--genfile=${genfile}" : ""
def samplename_command = samplename ? "--sampleNames_file=${samplename}" : ""
def start_command = regions_start ? "--regionStart=${regions_start}" : ""
def end_command = regions_end ? "--regionEnd=${regions_end}" : ""
def buffer_command = buffer ? "--buffer=${buffer}" : ""

if (!(args ==~ /.*--seed.*/)) {
args += " --seed=1"
}

"""
if [ -n "${bamlist}" ] ;
then
BAM_LIST="${bamlist}"
else
printf "%s\\n" ${bams} | tr -d '[],' > all_files.txt
BAM_LIST="all_files.txt"
fi

QUILT2.R \\
${list_command}\$BAM_LIST \\
${genetic_map_command} \\
${posfile_command} \\
${phasefile_command} \\
${genfile_command} \\
${samplename_command} \\
--chr=${chr} \\
${start_command} \\
${end_command} \\
${buffer_command} \\
--nGen=${ngen} \\
--nCores=${task.cpus} \\
--outputdir="." \\
--reference_vcf_file=${reference_vcf_file} \\
--output_filename=${prefix}.vcf.gz \\
${args}
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def make_plots = args.contains("--make_plots=TRUE")
def save_ref = args.contains("--save_prepared_reference=TRUE")
def nGibbsSamples = args.contains("--nGibbsSamples=") ? args.split("--nGibbsSamples=")[1].split(" ")[0] : 7
def n_seek_its = args.contains("--n_seek_its=") ? args.split("--n_seek_its=")[1].split(" ")[0] : 3

"""
echo '' | gzip > ${prefix}.vcf.gz
touch ${prefix}.vcf.gz.tbi
if [ "${save_ref}" == true ]
then
mkdir -p RData
touch "RData/QUILT_prepared_reference.${chr}.${regions_start}.${regions_end}.RData"
fi
if [ "${make_plots}" == true ]
then
mkdir -p plots
for nGibbs in {0..${nGibbsSamples}}
do
touch "plots/haps.${prefix}.${chr}.${regions_start}.${regions_end}_igs.\$((nGibbs+1)).0.truth.png"
for its in {1..${n_seek_its}}
do
touch "plots/haps.${prefix}.${chr}.${regions_start}.${regions_end}_igs.\$((nGibbs+1)).it\$its.gibbs.png"
done
done
fi
"""
}
224 changes: 224 additions & 0 deletions modules/nf-core/quilt/quilt2/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
name: "quilt_quilt2"
description: QUILT2 is an R and C++ program for fast genotype imputation from
low-coverage sequence using a large phased reference panel in VCF/BCF format.
keywords:
- imputation
- low-coverage
- genotype
- genomics
- vcf
tools:
- "quilt":
description: "Fast read-aware genotype imputation from low-coverage sequence
using a large phased reference panel"
homepage: "https://github.com/rwdavies/QUILT"
documentation: "https://github.com/rwdavies/QUILT"
tool_dev_url: "https://github.com/rwdavies/QUILT"
doi: "10.1038/s41467-025-67218-1"
licence:
- "GPL v3"
identifier: ""
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bams:
type: file
description: (Mandatory) BAM/CRAM files
pattern: "*.{bam,cram}"
ontologies: []
- bais:
type: file
description: (Mandatory) BAM/CRAM index files
pattern: "*.{bai,crai}"
ontologies: []
- bamlist:
type: file
description: (Optional) File with list of BAM/CRAM files to impute. One
file per line.
pattern: "*.{txt}"
ontologies: []
- samplename:
type: file
description: (Optional) File with list of samples names in the same order
as in bamlist to impute. One file per line.
pattern: "*.{txt}"
ontologies: []
- reference_vcf_file:
type: file
description: (Mandatory) Phased reference panel VCF/BCF file for
imputation.
pattern: "*.{vcf,vcf.gz,bcf}"
ontologies: []
- reference_vcf_index:
type: file
description: (Mandatory) Index for the reference panel VCF file.
pattern: "*.{tbi,csi}"
ontologies: []
- posfile:
type: file
description: (Optional) File with positions of where to impute, lining up
one-to-one with genfile. File is tab separated with no header, one row
per SNP, with col 1 = chromosome, col 2 = physical position (sorted from
smallest to largest), col 3 = reference base, col 4 = alternate base.
Bases are capitalized.
pattern: "*.{txt}"
ontologies: []
- phasefile:
type: file
description: (Optional) File with truth phasing results. Supersedes
genfile if both options given. File has a header row with a name for
each sample, matching what is found in the bam file. Each subject is
then a tab separated column, with 0 = ref and 1 = alt, separated by a
vertical bar |, e.g. 0|0 or 0|1. Note therefore this file has one more
row than posfile which has no header.
pattern: "*.{txt}"
ontologies: []
- genfile:
type: file
description: (Optional) Path to gen file with high coverage results. Empty
for no genfile. If both genfile and phasefile are given, only phasefile
is used, as genfile (unphased genotypes) is derivative to phasefile
(phased genotypes). File has a header row with a name for each sample,
matching what is found in the bam file. Each subject is then a tab
seperated column, with 0 = hom ref, 1 = het, 2 = hom alt and NA
indicating missing genotype, with rows corresponding to rows of the
posfile. Note therefore this file has one more row than posfile which
has no header [default ""]
pattern: "*.{txt}"
ontologies: []
- chr:
type: string
description: (Mandatory) What chromosome to run. Should match BAM headers.
- regions_start:
type: integer
description: (Mandatory) When running imputation, where to start from. The
1-based position x is kept if regionStart <= x <= regionEnd.
- regions_end:
type: integer
description: (Mandatory) When running imputation, where to stop.
- ngen:
type: integer
description: Number of generations since founding or mixing. Note that the
algorithm is relatively robust to this. Use nGen = 4 * Ne / K if unsure.
- buffer:
type: integer
description: Buffer of region to perform imputation over. So imputation is
run form regionStart-buffer to regionEnd+buffer, and reported for
regionStart to regionEnd, including the bases of regionStart and
regionEnd.
- genetic_map:
type: file
description: (Optional) File with genetic map information, a file with 3
white-space delimited entries giving position (1-based), genetic rate
map in cM/Mbp, and genetic map in cM. If no file included, rate is based
on physical distance and expected rate (expRate).
pattern: "*.{txt,map}{,gz}"
ontologies: []
- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: (Optional) File with reference genome.
pattern: "*.{fa,fasta}"
ontologies: []
- fasta_fai:
type: file
description: (Optional) File with reference genome index.
pattern: "*.{fai}"
ontologies: []
output:
vcf:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.vcf.gz":
type: file
description: VCF file with both SNP annotation information and
per-sample genotype information.
pattern: "*.{vcf.gz}"
ontologies: []
tbi:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.vcf.gz.tbi":
type: file
description: TBI file of the VCF.
pattern: "*.{vcf.gz.tbi}"
ontologies: []
rdata:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- RData:
type: directory
description: |
Folder of RData objects generated during the imputation process.
pattern: "RData"
plots:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- plots:
type: directory
description: |
Folder of plots generated during the imputation process.
pattern: "plots"
versions_r_quilt:
- - ${task.process}:
type: string
description: The name of the process
- r-quilt:
type: string
description: The name of the tool
- Rscript -e "cat(as.character(packageVersion('QUILT')))":
type: eval
description: The expression to obtain the version of the tool
versions_r_base:
- - ${task.process}:
type: string
description: The name of the process
- r-base:
type: string
description: The name of the tool
- R --version | sed "1!d; s/.*version //; s/ .*//":
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- r-quilt:
type: string
description: The name of the tool
- Rscript -e "cat(as.character(packageVersion('QUILT')))":
type: eval
description: The expression to obtain the version of the tool
- - ${task.process}:
type: string
description: The name of the process
- r-base:
type: string
description: The name of the tool
- R --version | sed "1!d; s/.*version //; s/ .*//":
type: eval
description: The expression to obtain the version of the tool
authors:
- "@atrigila"
maintainers:
- "@atrigila"
Loading
Loading