nf-core · lyh970817 · Mar 21, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/modules/nf-core/regenie/runl0/environment.yml b/modules/nf-core/regenie/runl0/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::regenie=4.1.2"
diff --git a/modules/nf-core/regenie/runl0/main.nf b/modules/nf-core/regenie/runl0/main.nf
@@ -0,0 +1,55 @@
+process REGENIE_RUNL0 {
+    tag "${meta.id}_${job_number}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data'
+        : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}"
+
+    input:
+    tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file)
+    tuple val(meta2), path(master), path(snplist), val(job_number)
+    tuple val(meta3), path(pheno)
+    tuple val(meta4), path(covar)
+    val bsize
+
+    output:
+    tuple val(meta), path("*_l0_Y*"), emit: l0_predictions
+    tuple val(meta), path("*.log"), emit: log
+    tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def input_prefix = plink_genotype_file.baseName
+    def prefix = task.ext.prefix ?: input_prefix
+    def run_prefix = "${prefix}_job${job_number}"
+    def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed'
+    def covar_arg = covar ? "--covarFile ${covar}" : ''
+    def bsize_arg = bsize ?: 1000
+    """
+    regenie \\
+        --step 1 \\
+        ${genotype_flag} ${input_prefix} \\
+        --phenoFile ${pheno} \\
+        ${covar_arg} \\
+        --bsize ${bsize_arg} \\
+        --gz \\
+        --threads ${task.cpus} \\
+        ${args} \\
+        --out ${run_prefix} \\
+        --run-l0 ${master},${job_number}
+    """
+
+    stub:
+    def input_prefix = plink_genotype_file.baseName
+    def prefix = task.ext.prefix ?: input_prefix
+    def run_prefix = "${prefix}_job${job_number}"
+    """
+    touch ${run_prefix}_l0_Y1
+    touch ${run_prefix}.log
+    """
+}
diff --git a/modules/nf-core/regenie/runl0/meta.yml b/modules/nf-core/regenie/runl0/meta.yml
@@ -0,0 +1,160 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "regenie_runl0"
+description: Run one REGENIE step 1 level-0 job from a split master file
+keywords:
+  - regenie
+  - gwas
+  - association
+  - genomics
+  - parallel
+tools:
+  - "regenie":
+      description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)."
+      homepage: "https://rgcgithub.github.io/regenie/"
+      documentation: "https://rgcgithub.github.io/regenie/options/"
+      tool_dev_url: "https://github.com/rgcgithub/regenie"
+      doi: "10.1038/s41588-021-00870-7"
+      licence: ["MIT"]
+      identifier: "biotools:regenie"
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing genotype information
+          Keep only the genotype analysis identifier in this map
+          REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename
+          e.g. `[ id:'cohort' ]`
+    - plink_genotype_file:
+        type: file
+        description: PLINK primary genotype file in BED or PGEN format
+        pattern: "*.{bed,pgen}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3003" # BED
+    - plink_variant_file:
+        type: file
+        description: PLINK variant metadata file in BIM or PVAR format
+        pattern: "*.{bim,pvar,zst}"
+        ontologies: []
+    - plink_sample_file:
+        type: file
+        description: PLINK sample metadata file in FAM or PSAM format
+        pattern: "*.{fam,psam}"
+        ontologies: []
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing split level-0 job information
+          e.g. `[ id:'plink_simulated' ]`
+    - master:
+        type: file
+        description: REGENIE split level-0 master file from `regenie/splitl0`
+        pattern: "*.master"
+        ontologies:
+          - edam: "http://edamontology.org/format_2330" # Text
+    - snplist:
+        type: file
+        description: Per-job variant list staged because the master file references it; the path is not passed explicitly to REGENIE
+        pattern: "*_job*.snplist"
+        ontologies:
+          - edam: "http://edamontology.org/format_2330" # Text
+    - job_number:
+        type: integer
+        description: Level-0 job number passed as the second value to `--run-l0`
+  - - meta3:
+        type: map
+        description: |
+          Groovy Map containing genotype/sample information associated with the phenotype file input
+          Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis
+          e.g. `[ id:'plink_simulated' ]`
+    - pheno:
+        type: file
+        description: Phenotype file passed to `--phenoFile`
+        pattern: "*.{phe,pheno,txt,tsv}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3475" # TSV
+  - - meta4:
+        type: map
+        description: |
+          Groovy Map containing genotype/sample information associated with the covariate input
+          Use compatible covariate inputs for all stages in the same chunked step 1 analysis
+          e.g. `[ id:'plink_simulated' ]`
+    - covar:
+        type: file
+        optional: true
+        description: Optional covariate file passed to `--covarFile`; provide `[]` when absent
+        pattern: "*.{covar,cov,txt,tsv}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3475" # TSV
+  - bsize:
+      type: integer
+      description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000`
+
+output:
+  l0_predictions:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing genotype/sample information
+            e.g. `[ id:'plink_simulated' ]`
+      - "*_l0_Y*":
+          type: file
+          description: REGENIE level-0 prediction files for this job
+          pattern: "*_l0_Y*"
+          ontologies: []
+  log:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing genotype information
+            e.g. `[ id:'plink_simulated' ]`
+      - "*.log":
+          type: file
+          description: REGENIE run level-0 log file
+          pattern: "*.log"
+          ontologies:
+            - edam: "http://edamontology.org/format_2330" # Text
+  versions_regenie:
+    - - "${task.process}":
+          type: string
+          description: The process the versions were collected from
+      - "regenie":
+          type: string
+          description: The tool name
+      - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"':
+          type: eval
+          description: The command used to generate the version of the tool
+
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The process the versions were collected from
+      - regenie:
+          type: string
+          description: The tool name
+      - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"':
+          type: eval
+          description: The command used to generate the version of the tool
+
+notes: |
+  `task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, or `--keep-l0`.
+  The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`.
+authors:
+  - "@lyh970817"
+maintainers:
+  - "@lyh970817"
+containers:
+  conda:
+    linux_amd64:
+      lock_file: "modules/nf-core/regenie/runl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt"
+  docker:
+    linux_amd64:
+      build_id: "bd-5d361f9fcb2f85cf_1"
+      name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf"
+      scanId: "sc-cc9eb5ed5eb381dd_2"
+  singularity:
+    linux_amd64:
+      build_id: "bd-7c121fb4ecd57890_1"
+      name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890"
+      https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data"
diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test b/modules/nf-core/regenie/runl0/tests/main.nf.test
@@ -0,0 +1,168 @@
+nextflow_process {
+
+    name "Test Process REGENIE_RUNL0"
+    config "./nextflow.config"
+    script "../main.nf"
+    process "REGENIE_RUNL0"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "regenie"
+    tag "regenie/splitl0"
+    tag "regenie/runl0"
+
+    setup {
+        run("REGENIE_SPLITL0") {
+            script "../../splitl0/main.nf"
+            process {
+                """
+                input[0] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
+                ]
+
+                input[1] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
+                ]
+
+                input[2] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true)
+                ]
+
+                input[3] = 100
+                input[4] = 2
+                """
+            }
+        }
+    }
+
+    test("homo_sapiens popgen - quantitative plink1 with covariates") {
+
+        when {
+            params {
+                module_args = '--phenoColList QuantitativeTrait'
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
+                ]
+
+                input[1] = REGENIE_SPLITL0.out.master
+                    .combine(REGENIE_SPLITL0.out.snplists)
+                    .map { master_meta, master, snplist_meta, snplists ->
+                        [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ]
+                    }
+
+                input[2] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
+                ]
+
+                input[3] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true)
+                ]
+
+                input[4] = 100
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.l0_predictions.size() == 1 },
+                { assert process.out.log.size() == 1 },
+                { assert process.out.l0_predictions.get(0).get(0).id == 'plink_simulated' },
+                { assert process.out.log.get(0).get(0).id == 'plink_simulated' },
+                {
+                    def predictionFiles = process.out.l0_predictions.get(0).get(1)
+                    predictionFiles = predictionFiles instanceof List ? predictionFiles : [predictionFiles]
+                    assert predictionFiles.size() >= 1
+                    assert predictionFiles.every { path(it).getFileName().toString().contains('_l0_Y') }
+                },
+                { assert path(process.out.log.get(0).get(1)).exists() },
+                {
+                    def stablePredictions = process.out.l0_predictions.collect { prediction ->
+                        def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]]
+                        [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()]
+                    }
+                    assert snapshot(
+                        stablePredictions,
+                        process.out.findAll { key, val -> key.startsWith('versions') }
+                    ).match()
+                }
+            )
+        }
+
+    }
+
+    test("homo_sapiens popgen - plink1 - stub") {
+
+        options "-stub"
+
+        when {
+            params {
+                module_args = '--phenoColList QuantitativeTrait'
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
+                ]
+
+                input[1] = REGENIE_SPLITL0.out.master
+                    .combine(REGENIE_SPLITL0.out.snplists)
+                    .map { master_meta, master, snplist_meta, snplists ->
+                        [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ]
+                    }
+
+                input[2] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
+                ]
+
+                input[3] = [
+                    [ id:'plink_simulated' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true)
+                ]
+
+                input[4] = 100
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    def stablePredictions = process.out.l0_predictions.collect { prediction ->
+                        def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]]
+                        [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()]
+                    }
+                    def stableLogs = process.out.log.collect { log ->
+                        [log[0], path(log[1]).getFileName().toString()]
+                    }
+                    assert snapshot(
+                        stablePredictions,
+                        stableLogs,
+                        process.out.findAll { key, val -> key.startsWith('versions') }
+                    ).match()
+                }
+            )
+        }
+
+    }
+
+}