-
Notifications
You must be signed in to change notification settings - Fork 1k
Add poppunk/createdb module #11118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Add poppunk/createdb module #11118
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - "bioconda::poppunk=2.7.8" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| process POPPUNK_CREATEDB { | ||
| tag "$meta.id" | ||
| label 'process_medium' | ||
| conda "${moduleDir}/environment.yml" | ||
| container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
| 'https://depot.galaxyproject.org/singularity/poppunk:2.7.8--py310h4d0eb5b_0' : | ||
| 'biocontainers/poppunk:2.7.8--py310h4d0eb5b_0' }" | ||
| input: | ||
| tuple val(meta), path(r_file), path(fasta) | ||
| output: | ||
| tuple val(meta), path("${meta.id}"), emit: db | ||
| tuple val(meta), path("${meta.id}/${meta.id}.h5"), emit: h5 | ||
| tuple val(meta), path("${meta.id}/${meta.id}.dists.*"), emit: dists | ||
| tuple val("${task.process}"), val('poppunk'), eval("poppunk --version 2>&1 | head -1 | sed 's/^.*v//'"), topic: versions, emit: versions_poppunk | ||
| when: | ||
| task.ext.when == null || task.ext.when | ||
| script: | ||
| def args = task.ext.args ?: '' | ||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||
| """ | ||
| poppunk \\ | ||
| --create-db \\ | ||
| --r-files ${r_file} \\ | ||
| --output ${prefix} \\ | ||
| --threads ${task.cpus} \\ | ||
| --qc-keep \\ | ||
| ${args} | ||
| """ | ||
| stub: | ||
| def args = task.ext.args ?: '' | ||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||
| """ | ||
| mkdir -p ${prefix} | ||
| touch ${prefix}/${prefix}.h5 | ||
| touch ${prefix}/${prefix}.dists.pkl | ||
| touch ${prefix}/${prefix}.dists.npy | ||
| touch ${prefix}/${prefix}.refs | ||
| touch ${prefix}/qcreport.txt | ||
| """ | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,100 @@ | ||||||
| name: "poppunk_createdb" | ||||||
| description: Create a PopPUNK database of kmer sketches and pairwise distances | ||||||
| from a set of assemblies | ||||||
| keywords: | ||||||
| - genomics | ||||||
| - bacteria | ||||||
| - clustering | ||||||
| - sketching | ||||||
| - poppunk | ||||||
| tools: | ||||||
| - "poppunk": | ||||||
| description: "PopPUNK (POPulation Partitioning Using Nucleotide Kmers) - rapid | ||||||
| bacterial population analysis. Note: this module runs with --qc-filter continue | ||||||
| by default; override via task.ext.args." | ||||||
| homepage: "https://poppunk.bacpop.org/index.html" | ||||||
| documentation: "https://poppunk.bacpop.org/index.html" | ||||||
| tool_dev_url: "https://github.com/bacpop/PopPUNK" | ||||||
| doi: "10.1101/gr.241455.118" | ||||||
| licence: | ||||||
| - "Apache-2.0" | ||||||
| identifier: biotools:poppunk | ||||||
| input: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'sample1' ]` | ||||||
| - r_file: | ||||||
| type: file | ||||||
| description: | | ||||||
| Tab-separated file listing sample names and paths to their assembly FASTA files. | ||||||
| Format: <sample_name>\t<path/to/assembly.fa> | ||||||
| pattern: "*.{txt,tsv}" | ||||||
| ontologies: | ||||||
| - edam: http://edamontology.org/format_3475 | ||||||
| - fasta: | ||||||
| type: file | ||||||
| description: List of assembly FASTA files for all samples listed in the | ||||||
| r_file | ||||||
| pattern: "*.{fa,fa.gz,fasta,fasta.gz,fna,fna.gz}" | ||||||
| ontologies: | ||||||
| - edam: http://edamontology.org/format_1929 | ||||||
| output: | ||||||
| db: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'sample1' ]` | ||||||
| - ${meta.id}: | ||||||
| type: directory | ||||||
| description: Directory containing the full PopPUNK database output files | ||||||
| h5: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'sample1' ]` | ||||||
| - ${meta.id}/${meta.id}.h5: | ||||||
| type: file | ||||||
| description: HDF5 file containing the k-mer sketches for all input | ||||||
| genomes | ||||||
| pattern: "*.h5" | ||||||
| ontologies: [] | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can an ontology be added here please? |
||||||
| dists: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'sample1' ]` | ||||||
| - ${meta.id}/${meta.id}.dists.*: | ||||||
| type: file | ||||||
| description: Pairwise distance matrix files (numpy and pickle format) | ||||||
| pattern: "*.dists.*" | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Looking at the description/snapshots, is this correct? |
||||||
| ontologies: [] | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can ontologies be added here please? |
||||||
| versions_poppunk: | ||||||
| - - ${task.process}: | ||||||
| type: string | ||||||
| description: The name of the process | ||||||
| - poppunk: | ||||||
| type: string | ||||||
| description: The name of the tool | ||||||
| - poppunk --version 2>&1 | head -1 | sed 's/^.*v//': | ||||||
| type: eval | ||||||
| description: The expression to obtain the version of the tool | ||||||
| topics: | ||||||
| versions: | ||||||
| - - ${task.process}: | ||||||
| type: string | ||||||
| description: The name of the process | ||||||
| - poppunk: | ||||||
| type: string | ||||||
| description: The name of the tool | ||||||
| - poppunk --version 2>&1 | head -1 | sed 's/^.*v//': | ||||||
| type: eval | ||||||
| description: The expression to obtain the version of the tool | ||||||
| authors: | ||||||
| - "@cwoodside1278" | ||||||
| maintainers: | ||||||
| - "@cwoodside1278" | ||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,41 @@ | ||||||
| nextflow_process { | ||||||
| name "Test Process POPPUNK_CREATEDB" | ||||||
| script "../main.nf" | ||||||
| process "POPPUNK_CREATEDB" | ||||||
| tag "modules" | ||||||
| tag "modules_nfcore" | ||||||
| tag "poppunk" | ||||||
| tag "poppunk/createdb" | ||||||
|
|
||||||
| // PopPUNK requires a minimum number of sufficiently diverse same-species genomes | ||||||
| // to compute meaningful k-mer distances. No suitable tiny test data exists in | ||||||
| // nf-core/test-datasets, so only a stub test is provided here. | ||||||
| // To test with real data, run manually with multiple same-species genome FASTAs, | ||||||
| // e.g. multiple H. influenzae strains from NCBI. | ||||||
|
|
||||||
| test("haemophilus_influenzae - fasta - createdb - stub") { | ||||||
| options "-stub" | ||||||
| when { | ||||||
| process { | ||||||
| """ | ||||||
| def fasta1 = file(params.modules_testdata_base_path + 'genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz', checkIfExists: true) | ||||||
| def fasta2 = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true) | ||||||
| def rfile = file(workDir.toString() + '/rfile.txt') | ||||||
| rfile.text = ["sample1", "genome.fna.gz"].join("\t") + System.lineSeparator() + | ||||||
| ["sample2", "haemophilus_influenzae.fna.gz"].join("\t") + System.lineSeparator() | ||||||
| input[0] = [ | ||||||
| [ id:'test' ], | ||||||
| rfile, | ||||||
| [fasta1, fasta2] | ||||||
| ] | ||||||
| """ | ||||||
| } | ||||||
| } | ||||||
| then { | ||||||
| assert process.success | ||||||
| assertAll( | ||||||
| { assert snapshot(process.out).match() } | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
See https://nf-co.re/docs/tutorials/tests_and_test_data/components/03_testing_modules/ |
||||||
| ) | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| { | ||
| "haemophilus_influenzae - fasta - createdb - stub": { | ||
| "content": [ | ||
| { | ||
| "0": [ | ||
| [ | ||
| { | ||
| "id": "test" | ||
| }, | ||
| [ | ||
| "qcreport.txt:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.dists.npy:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.dists.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.refs:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ] | ||
| ], | ||
| "1": [ | ||
| [ | ||
| { | ||
| "id": "test" | ||
| }, | ||
| "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ], | ||
| "2": [ | ||
| [ | ||
| { | ||
| "id": "test" | ||
| }, | ||
| [ | ||
| "test.dists.npy:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.dists.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ] | ||
| ], | ||
| "3": [ | ||
| [ | ||
| "POPPUNK_CREATEDB", | ||
| "poppunk", | ||
| "poppunk 2.7.8" | ||
| ] | ||
| ], | ||
| "db": [ | ||
| [ | ||
| { | ||
| "id": "test" | ||
| }, | ||
| [ | ||
| "qcreport.txt:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.dists.npy:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.dists.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.refs:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ] | ||
| ], | ||
| "dists": [ | ||
| [ | ||
| { | ||
| "id": "test" | ||
| }, | ||
| [ | ||
| "test.dists.npy:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.dists.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ] | ||
| ], | ||
| "h5": [ | ||
| [ | ||
| { | ||
| "id": "test" | ||
| }, | ||
| "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ], | ||
| "versions_poppunk": [ | ||
| [ | ||
| "POPPUNK_CREATEDB", | ||
| "poppunk", | ||
| "poppunk 2.7.8" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be Footnotes |
||
| ] | ||
| ] | ||
| } | ||
| ], | ||
| "timestamp": "2026-04-02T19:53:41.29162914", | ||
| "meta": { | ||
| "nf-test": "0.9.4", | ||
| "nextflow": "25.10.4" | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at the snapshots, is this correct?