Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/regenie/runl0/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::regenie=4.1.2"
55 changes: 55 additions & 0 deletions modules/nf-core/regenie/runl0/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
process REGENIE_RUNL0 {
tag "${meta.id}_${job_number}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data'
: 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}"

input:
tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file)
tuple val(meta2), path(master), path(snplist), val(job_number)
tuple val(meta3), path(pheno)
tuple val(meta4), path(covar)
val bsize

output:
tuple val(meta), path("*_l0_Y*"), emit: l0_predictions
tuple val(meta), path("*.log"), emit: log
tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def input_prefix = plink_genotype_file.baseName
def prefix = task.ext.prefix ?: input_prefix
def run_prefix = "${prefix}_job${job_number}"
def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed'
def covar_arg = covar ? "--covarFile ${covar}" : ''
def bsize_arg = bsize ?: 1000
"""
regenie \\
--step 1 \\
${genotype_flag} ${input_prefix} \\
--phenoFile ${pheno} \\
${covar_arg} \\
--bsize ${bsize_arg} \\
--gz \\
--threads ${task.cpus} \\
${args} \\
--out ${run_prefix} \\
--run-l0 ${master},${job_number}
"""

stub:
def input_prefix = plink_genotype_file.baseName
def prefix = task.ext.prefix ?: input_prefix
def run_prefix = "${prefix}_job${job_number}"
"""
touch ${run_prefix}_l0_Y1
touch ${run_prefix}.log
"""
}
160 changes: 160 additions & 0 deletions modules/nf-core/regenie/runl0/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "regenie_runl0"
description: Run one REGENIE step 1 level-0 job from a split master file
keywords:
- regenie
- gwas
- association
- genomics
- parallel
tools:
- "regenie":
description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)."
homepage: "https://rgcgithub.github.io/regenie/"
documentation: "https://rgcgithub.github.io/regenie/options/"
tool_dev_url: "https://github.com/rgcgithub/regenie"
doi: "10.1038/s41588-021-00870-7"
licence: ["MIT"]
identifier: "biotools:regenie"

input:
- - meta:
type: map
description: |
Groovy Map containing genotype information
Keep only the genotype analysis identifier in this map
REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename
e.g. `[ id:'cohort' ]`
- plink_genotype_file:
type: file
description: PLINK primary genotype file in BED or PGEN format
pattern: "*.{bed,pgen}"
ontologies:
- edam: "http://edamontology.org/format_3003" # BED
- plink_variant_file:
type: file
description: PLINK variant metadata file in BIM or PVAR format
pattern: "*.{bim,pvar,zst}"
ontologies: []
- plink_sample_file:
type: file
description: PLINK sample metadata file in FAM or PSAM format
pattern: "*.{fam,psam}"
ontologies: []
- - meta2:
type: map
description: |
Groovy Map containing split level-0 job information
e.g. `[ id:'plink_simulated' ]`
- master:
type: file
description: REGENIE split level-0 master file from `regenie/splitl0`
pattern: "*.master"
ontologies:
- edam: "http://edamontology.org/format_2330" # Text
- snplist:
type: file
description: Per-job variant list staged because the master file references it; the path is not passed explicitly to REGENIE
pattern: "*_job*.snplist"
ontologies:
- edam: "http://edamontology.org/format_2330" # Text
- job_number:
type: integer
description: Level-0 job number passed as the second value to `--run-l0`
- - meta3:
type: map
description: |
Groovy Map containing genotype/sample information associated with the phenotype file input
Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis
e.g. `[ id:'plink_simulated' ]`
- pheno:
type: file
description: Phenotype file passed to `--phenoFile`
pattern: "*.{phe,pheno,txt,tsv}"
ontologies:
- edam: "http://edamontology.org/format_3475" # TSV
- - meta4:
type: map
description: |
Groovy Map containing genotype/sample information associated with the covariate input
Use compatible covariate inputs for all stages in the same chunked step 1 analysis
e.g. `[ id:'plink_simulated' ]`
- covar:
type: file
optional: true
description: Optional covariate file passed to `--covarFile`; provide `[]` when absent
pattern: "*.{covar,cov,txt,tsv}"
ontologies:
- edam: "http://edamontology.org/format_3475" # TSV
- bsize:
type: integer
description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000`

output:
l0_predictions:
- - meta:
type: map
description: |
Groovy Map containing genotype/sample information
e.g. `[ id:'plink_simulated' ]`
- "*_l0_Y*":
type: file
description: REGENIE level-0 prediction files for this job
pattern: "*_l0_Y*"
ontologies: []
log:
- - meta:
type: map
description: |
Groovy Map containing genotype information
e.g. `[ id:'plink_simulated' ]`
- "*.log":
type: file
description: REGENIE run level-0 log file
pattern: "*.log"
ontologies:
- edam: "http://edamontology.org/format_2330" # Text
versions_regenie:
- - "${task.process}":
type: string
description: The process the versions were collected from
- "regenie":
type: string
description: The tool name
- 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"':
type: eval
description: The command used to generate the version of the tool

topics:
versions:
- - ${task.process}:
type: string
description: The process the versions were collected from
- regenie:
type: string
description: The tool name
- 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"':
type: eval
description: The command used to generate the version of the tool

notes: |
`task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, or `--keep-l0`.
The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`.
authors:
- "@lyh970817"
maintainers:
- "@lyh970817"
containers:
conda:
linux_amd64:
lock_file: "modules/nf-core/regenie/runl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt"
docker:
linux_amd64:
build_id: "bd-5d361f9fcb2f85cf_1"
name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf"
scanId: "sc-cc9eb5ed5eb381dd_2"
singularity:
linux_amd64:
build_id: "bd-7c121fb4ecd57890_1"
name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890"
https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data"
168 changes: 168 additions & 0 deletions modules/nf-core/regenie/runl0/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
nextflow_process {

name "Test Process REGENIE_RUNL0"
config "./nextflow.config"
script "../main.nf"
process "REGENIE_RUNL0"

tag "modules"
tag "modules_nfcore"
tag "regenie"
tag "regenie/splitl0"
tag "regenie/runl0"

setup {
run("REGENIE_SPLITL0") {
script "../../splitl0/main.nf"
process {
"""
input[0] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
]

input[1] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
]

input[2] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true)
]

input[3] = 100
input[4] = 2
"""
}
}
}

test("homo_sapiens popgen - quantitative plink1 with covariates") {

when {
params {
module_args = '--phenoColList QuantitativeTrait'
}
process {
"""
input[0] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
]

input[1] = REGENIE_SPLITL0.out.master
.combine(REGENIE_SPLITL0.out.snplists)
.map { master_meta, master, snplist_meta, snplists ->
[ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ]
}

input[2] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
]

input[3] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true)
]

input[4] = 100
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert process.out.l0_predictions.size() == 1 },
{ assert process.out.log.size() == 1 },
{ assert process.out.l0_predictions.get(0).get(0).id == 'plink_simulated' },
{ assert process.out.log.get(0).get(0).id == 'plink_simulated' },
{
def predictionFiles = process.out.l0_predictions.get(0).get(1)
predictionFiles = predictionFiles instanceof List ? predictionFiles : [predictionFiles]
assert predictionFiles.size() >= 1
assert predictionFiles.every { path(it).getFileName().toString().contains('_l0_Y') }
},
{ assert path(process.out.log.get(0).get(1)).exists() },
{
def stablePredictions = process.out.l0_predictions.collect { prediction ->
def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]]
[prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()]
}
assert snapshot(
stablePredictions,
process.out.findAll { key, val -> key.startsWith('versions') }
).match()
}
)
}

}

test("homo_sapiens popgen - plink1 - stub") {

options "-stub"

when {
params {
module_args = '--phenoColList QuantitativeTrait'
}
process {
"""
input[0] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
]

input[1] = REGENIE_SPLITL0.out.master
.combine(REGENIE_SPLITL0.out.snplists)
.map { master_meta, master, snplist_meta, snplists ->
[ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ]
}

input[2] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
]

input[3] = [
[ id:'plink_simulated' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true)
]

input[4] = 100
"""
}
}

then {
assertAll(
{ assert process.success },
{
def stablePredictions = process.out.l0_predictions.collect { prediction ->
def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]]
[prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()]
}
def stableLogs = process.out.log.collect { log ->
[log[0], path(log[1]).getFileName().toString()]
}
assert snapshot(
stablePredictions,
stableLogs,
process.out.findAll { key, val -> key.startsWith('versions') }
).match()
}
)
}

}

}
Loading
Loading