Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions modules/nf-core/simpleaf/multiplexquant/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- bioconda
- conda-forge

dependencies:
- bioconda::alevin-fry=0.15.0
- bioconda::piscem=0.20.0
- bioconda::simpleaf=0.25.0
101 changes: 101 additions & 0 deletions modules/nf-core/simpleaf/multiplexquant/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
process SIMPLEAF_MULTIPLEXQUANT {
tag "${meta.id}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/aa/aaba033a0179fd6ccc20c677f9df1fac5d8eac2dbd1bed73c4fa9f7adb65d963/data':
'community.wave.seqera.io/library/simpleaf:0.25.0--b9f96d8b71a01864' }"

input:
//
// Input reads are expected as: [ meta, chemistry_preset, [ pair1_read1, pair1_read2, pair2_read1, pair2_read2 ] ]
// Reads are split into R1/R2 pairs and joined with commas before being passed to simpleaf.
//
tuple val(meta), val(chemistry), path(reads) // chemistry preset and reads
tuple val(meta2), path(index, stageAs: 'index/*'), path(t2g_map) // optional pre-built piscem probe index and t2g map
tuple val(meta3), path(probe_set), path(sample_bc_list), path(cell_bc_list) // optional probe set / sample-BC TSV / cell-BC whitelist overrides
val resolution // UMI resolution (cr-like, cr-like-em, parsimony, ...)

output:
tuple val(meta), path("${prefix}/af_map") , emit: map
tuple val(meta), path("${prefix}/af_quant") , emit: quant
tuple val(meta), path("${prefix}/af_quant/alevin/quants.h5ad") , emit: h5ad, optional: true
tuple val(meta), path("${prefix}/probe_t2g.tsv") , emit: t2g, optional: true
tuple val(meta), path("${prefix}/probe_index/index") , emit: probe_index, optional: true
tuple val("${task.process}"), val('alevin-fry'), eval("alevin-fry --version | sed 's/alevin-fry //'"), topic: versions, emit: versions_alevin_fry
tuple val("${task.process}"), val('piscem'), eval("piscem --version | sed 's/piscem //'"), topic: versions, emit: versions_piscem
tuple val("${task.process}"), val('simpleaf'), eval("ALEVIN_FRY_HOME=. simpleaf --version | sed 's/simpleaf //'"), topic: versions, emit: versions_simpleaf

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"

def mapping_args = mappingArgs(chemistry, reads)
def reference_args = referenceArgs(index, probe_set, sample_bc_list, cell_bc_list, t2g_map)

meta = meta2 + meta3 + meta

"""
export ALEVIN_FRY_HOME=.
simpleaf set-paths

# run simpleaf multiplex-quant
simpleaf multiplex-quant \\
${mapping_args} \\
${reference_args} \\
--resolution ${resolution} \\
--output ${prefix} \\
--threads ${task.cpus} \\
--anndata-out \\
${args}
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"
"""
export ALEVIN_FRY_HOME=.

mkdir -p ${prefix}/af_map
mkdir -p ${prefix}/af_quant/alevin

touch ${prefix}/af_map/map.rad
touch ${prefix}/af_map/map_info.json
touch ${prefix}/af_quant/quant.json
touch ${prefix}/af_quant/generate_permit_list.json
touch ${prefix}/af_quant/alevin/quants_mat.mtx
touch ${prefix}/af_quant/alevin/quants_mat_rows.txt
touch ${prefix}/af_quant/alevin/quants_mat_cols.txt
touch ${prefix}/af_quant/alevin/quants.h5ad
touch ${prefix}/probe_t2g.tsv
"""
}

// `simpleaf multiplex-quant` requires both reads and a chemistry preset (or, with extra
// ext.args, a --geometry override + --cell-bc-list). Only the mainstream case is enforced
// here; non-default geometries can still be set via ext.args.
def mappingArgs(chemistry, reads) {
if (!reads) error "Missing read files; could not proceed."
if (!chemistry) error "Missing chemistry; could not proceed."

def (forward, reverse) = reads.collate(2).transpose()
return """--chemistry ${chemistry} \\
--reads1 ${forward.join(',')} \\
--reads2 ${reverse.join(',')}"""
}

// Build optional reference-override flags. With none of these set, simpleaf auto-downloads
// a probe set + sample BC TSV based on the chemistry preset and (if also provided in ext.args)
// `--organism`. Any combination of overrides is allowed.
def referenceArgs(index, probe_set, sample_bc_list, cell_bc_list, t2g_map) {
def parts = []
if (index) parts << "--index ${index}"
if (probe_set) parts << "--probe-set ${probe_set}"
if (sample_bc_list) parts << "--sample-bc-list ${sample_bc_list}"
if (cell_bc_list) parts << "--cell-bc-list ${cell_bc_list}"
if (t2g_map) parts << "--t2g-map ${t2g_map}"
return parts.join(' \\\n ')
}
202 changes: 202 additions & 0 deletions modules/nf-core/simpleaf/multiplexquant/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
name: simpleaf_multiplexquant
description: |
Quantify a sample-multiplexed single-cell library (e.g. 10x Chromium Fixed RNA
Profiling / Flex) end-to-end with simpleaf — auto-resolves probe set and sample
barcode rotation map from the chemistry preset, builds a piscem probe index if one
is not supplied, maps reads with `piscem map-sc`, performs hierarchical cell-barcode
+ sample-barcode correction, collates, and quantifies. Barcodes in the resulting
count matrix are prefixed with the demultiplexed sample name.
keywords:
- quantification
- gene expression
- multiplexed
- flex
- SimpleAF
tools:
- simpleaf:
description: |
SimpleAF is a program to simplify and customize the running and configuration of single-cell processing with alevin-fry.
homepage: https://github.com/COMBINE-lab/simpleaf
documentation: https://simpleaf.readthedocs.io/en/latest/flex-quant-command.html
tool_dev_url: https://github.com/COMBINE-lab/simpleaf
licence:
- "BSD-3-Clause"
identifier: ""
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- chemistry:
type: string
description: |
Chemistry preset name (required). Typical values: `10x-flexv1-gex-3p`,
`10x-flexv2-gex-3p`. The preset drives auto-resolution of the probe set,
sample-BC TSV, and cell-BC whitelist (each overridable via the inputs
below). Custom geometry / orientation can be passed through ext.args
(`--geometry '...'`, `--expected-ori fw`) without changing the preset.
- reads:
type: file
description: |
List of input FastQ files for paired-end data, grouped by pairs.
Example: [ [R1_1.fastq.gz, R2_1.fastq.gz], [R1_2.fastq.gz, R2_2.fastq.gz] ]
ontologies: []
- - meta2:
type: map
description: |
Groovy Map describing the (optional) pre-built piscem probe index input.
e.g. [ tool:'piscem' ]
- index:
type: directory
description: |
Folder containing a pre-built piscem probe index (the directory holding `simpleaf_index.json`
or its parent). When set, simpleaf will skip auto-building the probe index. Pass `[]`
to let simpleaf auto-build from `probe_set` (or the chemistry default).
- t2g_map:
type: file
description: |
Transcript-to-gene map. Use this when running against a transcriptome reference
instead of a probe set. Pass `[]` for probe-set-based runs.
ontologies: []
- - meta3:
type: map
description: |
Groovy Map describing (optional) probe-set / barcode-list overrides.
e.g. [ probe_set:'custom_10xFlex_v1' ]
- probe_set:
type: file
description: |
Probe set CSV or FASTA. Overrides the chemistry-preset default probe set. Pass `[]`
to use whatever the chemistry preset (and `--organism` in ext.args) auto-resolves.
ontologies: []
- sample_bc_list:
type: file
description: |
Three-column TSV listing the sample barcodes used by the experiment:
`observed_seq<TAB>canonical_seq<TAB>sample_name`. Required when overriding the
chemistry default. Pass `[]` to use the chemistry-preset default.
ontologies: []
- cell_bc_list:
type: file
description: |
Cell barcode whitelist (one barcode per line). Overrides the chemistry-preset
default. Pass `[]` to use the chemistry-preset default.
ontologies: []
- resolution:
type: string
description: |
UMI resolution mode (https://alevin-fry.readthedocs.io/en/latest/quant.html).
Possible values: 'cr-like', 'cr-like-em', 'parsimony', 'parsimony-em',
'parsimony-gene', 'parsimony-gene-em'.
output:
map:
- - meta:
type: map
description: Groovy Map containing sample information
- ${prefix}/af_map:
type: directory
description: |
piscem `map-sc` output directory. Contains `map.rad`, `map_info.json` and
`unmapped_bc_count.bin`.
quant:
- - meta:
type: map
description: Groovy Map containing sample information
- ${prefix}/af_quant:
type: directory
description: |
alevin-fry quantification output directory. Contains `quant.json`,
`generate_permit_list.json`, `alevin/quants_mat.mtx`, and barcode/feature TSVs.
Barcodes in `quants_mat_rows.txt` are prefixed with the demultiplexed sample name.
h5ad:
- - meta:
type: map
description: Groovy Map containing sample information
- ${prefix}/af_quant/alevin/quants.h5ad:
type: file
description: |
AnnData representation of the count matrix, emitted because `--anndata-out`
is set by default in this module.
ontologies: []
t2g:
- - meta:
type: map
description: Groovy Map containing sample information
- ${prefix}/probe_t2g.tsv:
type: file
description: |
Gene-level transcript-to-gene map resolved by multiplex-quant from the probe set.
ontologies: []
probe_index:
- - meta:
type: map
description: Groovy Map containing sample information
- ${prefix}/probe_index/index:
type: directory
description: |
Auto-built piscem probe index, emitted only when no pre-built `index` was provided
on input. Useful for caching the index between runs.
versions_alevin_fry:
- - ${task.process}:
type: string
description: The name of the process
- alevin-fry:
type: string
description: The name of the tool
- alevin-fry --version | sed 's/alevin-fry //':
type: eval
description: The expression to obtain the version of the tool
versions_piscem:
- - ${task.process}:
type: string
description: The name of the process
- piscem:
type: string
description: The name of the tool
- piscem --version | sed 's/piscem //':
type: eval
description: The expression to obtain the version of the tool
versions_simpleaf:
- - ${task.process}:
type: string
description: The name of the process
- simpleaf:
type: string
description: The name of the tool
- ALEVIN_FRY_HOME=. simpleaf --version | sed 's/simpleaf //':
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- alevin-fry:
type: string
description: The name of the tool
- alevin-fry --version | sed 's/alevin-fry //':
type: eval
description: The expression to obtain the version of the tool
- - ${task.process}:
type: string
description: The name of the process
- piscem:
type: string
description: The name of the tool
- piscem --version | sed 's/piscem //':
type: eval
description: The expression to obtain the version of the tool
- - ${task.process}:
type: string
description: The name of the process
- simpleaf:
type: string
description: The name of the tool
- ALEVIN_FRY_HOME=. simpleaf --version | sed 's/simpleaf //':
type: eval
description: The expression to obtain the version of the tool
authors:
- "@an-altosian"
maintainers:
- "@an-altosian"
79 changes: 79 additions & 0 deletions modules/nf-core/simpleaf/multiplexquant/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
nextflow_process {

name "Test Process SIMPLEAF_MULTIPLEXQUANT"
script "../main.nf"
process "SIMPLEAF_MULTIPLEXQUANT"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "simpleaf"
tag "simpleaf/multiplexquant"

test("test_simpleaf_multiplexquant - flex - auto") {
when {
process {
"""
meta = [id:'test_flex', single_end:false]
files = [
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/singleplex_flex/Human_Kidney_GEM-X_Flex_S1_L001_R1_001.subsampled.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/singleplex_flex/Human_Kidney_GEM-X_Flex_S1_L001_R2_001.subsampled.fastq.gz', checkIfExists: true)
]
input[0] = Channel.of([ meta, '10x-flexv1-gex-3p', files ])
input[1] = Channel.of([ [:], [], [] ])
input[2] = Channel.of([ [:], [], [], [] ])
input[3] = Channel.of('cr-like')
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert file("${process.out.map.get(0).get(1)}/map.rad").exists() },
{ assert file("${process.out.map.get(0).get(1)}/map_info.json").exists() },
{ assert file("${process.out.map.get(0).get(1)}/unmapped_bc_count.bin").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/collate.json").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/generate_permit_list.json").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/quant.json").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/featureDump.txt").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/sample_info.json").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/sample_permit_map.bin").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/simpleaf_multiplex_quant_info.json").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/map.collated.rad").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/alevin/quants_mat.mtx").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/alevin/quants_mat_rows.txt").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/alevin/quants_mat_cols.txt").exists() },
{ assert file("${process.out.quant.get(0).get(1)}/alevin/quants.h5ad").exists() },
{ assert snapshot(process.out.findAll { key, val -> key.startsWith("versions")}).match() }
)
}
}

test("test_simpleaf_multiplexquant - flex - auto - stub") {
options "-stub-run"

when {
process {
"""
meta = [id:'test_flex', single_end:false]
files = [
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/singleplex_flex/Human_Kidney_GEM-X_Flex_S1_L001_R1_001.subsampled.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/singleplex_flex/Human_Kidney_GEM-X_Flex_S1_L001_R2_001.subsampled.fastq.gz', checkIfExists: true)
]
input[0] = Channel.of([ meta, '10x-flexv1-gex-3p', files ])
input[1] = Channel.of([ [:], [], [] ])
input[2] = Channel.of([ [:], [], [], [] ])
input[3] = Channel.of('cr-like')
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
Loading
Loading