Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions modules/nf-core/scanpy/pca/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,26 @@ process SCANPY_PCA {
val key_added

output:
tuple val(meta), path("*.h5ad") , emit: h5ad
tuple val(meta), path("*.h5ad") , optional: true, emit: h5ad
tuple val(meta), path("X_*.pkl"), emit: obsm
path "versions.yml" , emit: versions
path "versions.yml" , emit: versions, topic: versions
tuple val(meta), path("*.zarr") , optional: true, emit: zarr

when:
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: "${meta.id}_pca"
if ("${prefix}.h5ad" == "${h5ad}") {
output_file = h5ad.name.endsWith(".zarr") ? "${prefix}.zarr" : "${prefix}.h5ad"
if (output_file == h5ad.name) {
error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
}
template('pca.py')

stub:
prefix = task.ext.prefix ?: "${meta.id}_pca"
if ("${prefix}.h5ad" == "${h5ad}") {
output_file = h5ad.name.endsWith(".zarr") ? "${prefix}.zarr" : "${prefix}.h5ad"
if (output_file == h5ad.name) {
error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
}
"""
Expand All @@ -37,13 +40,17 @@ process SCANPY_PCA {
export MPLCONFIGDIR=./tmp/mpl
export NUMBA_CACHE_DIR=./tmp/numba

touch ${prefix}.h5ad
if [[ "${output_file}" == *.zarr ]]; then
mkdir -p "${output_file}"
else
touch "${output_file}"
fi
touch X_${prefix}.pkl

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python3 -c 'import platform; print(platform.python_version())')
scanpy: \$(python3 -c 'import scanpy; print(scanpy.__version__)')
scanpy: \$(python3 -c 'import importlib.metadata; print(importlib.metadata.version("scanpy"))')
END_VERSIONS
"""
}
26 changes: 24 additions & 2 deletions modules/nf-core/scanpy/pca/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ input:
e.g. [ id:'test' ]
- h5ad:
type: file
description: AnnData object in h5ad format
pattern: "*.{h5ad}"
description: AnnData object in h5ad or zarr format
pattern: "*.{h5ad,zarr}"
ontologies:
- edam: "http://edamontology.org/format_3590" # HDF5 format
- edam: "http://edamontology.org/format_3915" # Zarr format
- key_added:
type: string
description: |
Expand Down Expand Up @@ -66,6 +67,27 @@ output:
pattern: "versions.yml"
ontologies:
- edam: http://edamontology.org/format_3750 # YAML
zarr:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.zarr":
type: directory
description: AnnData object with PCA coordinates added
pattern: "*.zarr"
ontologies:
- edam: "http://edamontology.org/format_3915" # Zarr format

topics:
versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
ontologies:
- edam: http://edamontology.org/format_3750 # YAML

authors:
- "@nictru"
Expand Down
10 changes: 8 additions & 2 deletions modules/nf-core/scanpy/pca/templates/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import platform

import anndata as ad
import numpy as np
import pandas as pd
import scanpy as sc
Expand All @@ -18,7 +19,9 @@
threadpool_limits(int("${task.cpus}"))
sc.settings.n_jobs = int("${task.cpus}")

adata = sc.read_h5ad("${h5ad}")
input_file = "${h5ad}"
output_file = "${output_file}"
adata = ad.read_zarr(input_file) if input_file.endswith(".zarr") else sc.read_h5ad(input_file)
prefix = "${prefix}"
key_added = "${key_added}"

Expand All @@ -29,7 +32,10 @@
# This ensures hashes are stable
adata.obsm[key_added] = np.round(adata.obsm[key_added], 8)

adata.write_h5ad(f"{prefix}.h5ad")
if output_file.endswith(".zarr"):
adata.write_zarr(output_file)
else:
adata.write_h5ad(output_file)
df = pd.DataFrame(adata.obsm[key_added], index=adata.obs_names)
df.to_pickle(f"X_{prefix}.pkl")

Expand Down
4 changes: 4 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "anndata",
"encoding-version": "0.1.0"
}
3 changes: 3 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
1 change: 1 addition & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/.zmetadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"metadata": {".zgroup": {"zarr_format": 2}, ".zattrs": {"encoding-type": "anndata", "encoding-version": "0.1.0"}, "layers/.zattrs": {"encoding-type": "dict", "encoding-version": "0.1.0"}, "layers/.zgroup": {"zarr_format": 2, "consolidated_metadata": {"metadata": {}, "must_understand": false, "kind": "inline"}}, "obs/.zattrs": {"column-order": [], "_index": "_index", "encoding-type": "dataframe", "encoding-version": "0.2.0"}, "obs/.zgroup": {"zarr_format": 2, "consolidated_metadata": {"metadata": {}, "must_understand": false, "kind": "inline"}}, "obsm/.zattrs": {"encoding-type": "dict", "encoding-version": "0.1.0"}, "obsm/.zgroup": {"zarr_format": 2, "consolidated_metadata": {"metadata": {}, "must_understand": false, "kind": "inline"}}, "obsp/.zattrs": {"encoding-type": "dict", "encoding-version": "0.1.0"}, "obsp/.zgroup": {"zarr_format": 2, "consolidated_metadata": {"metadata": {}, "must_understand": false, "kind": "inline"}}, "raw/.zattrs": {"encoding-type": "null", "encoding-version": "0.1.0"}, "raw/.zarray": {"shape": [], "chunks": [], "dtype": "|b1", "fill_value": false, "order": "C", "filters": null, "dimension_separator": ".", "compressor": {"id": "blosc", "cname": "lz4", "clevel": 5, "shuffle": 1, "blocksize": 0}, "zarr_format": 2}, "uns/.zattrs": {"encoding-type": "dict", "encoding-version": "0.1.0"}, "uns/.zgroup": {"zarr_format": 2, "consolidated_metadata": {"metadata": {}, "must_understand": false, "kind": "inline"}}, "var/.zattrs": {"column-order": [], "_index": "_index", "encoding-type": "dataframe", "encoding-version": "0.2.0"}, "var/.zgroup": {"zarr_format": 2, "consolidated_metadata": {"metadata": {}, "must_understand": false, "kind": "inline"}}, "varm/.zattrs": {"encoding-type": "dict", "encoding-version": "0.1.0"}, "varm/.zgroup": {"zarr_format": 2, "consolidated_metadata": {"metadata": {}, "must_understand": false, "kind": "inline"}}, "varp/.zattrs": {"encoding-type": "dict", "encoding-version": "0.1.0"}, "varp/.zgroup": {"zarr_format": 2, "consolidated_metadata": {"metadata": {}, "must_understand": false, "kind": "inline"}}, "X/.zattrs": {"encoding-type": "array", "encoding-version": "0.2.0"}, "X/.zarray": {"shape": [6, 5], "chunks": [6, 5], "dtype": "<f4", "fill_value": 0.0, "order": "C", "filters": null, "dimension_separator": ".", "compressor": {"id": "blosc", "cname": "lz4", "clevel": 5, "shuffle": 1, "blocksize": 0}, "zarr_format": 2}, "obs/_index/.zattrs": {"encoding-type": "string-array", "encoding-version": "0.2.0"}, "obs/_index/.zarray": {"shape": [6], "chunks": [6], "dtype": "|O", "fill_value": "", "order": "C", "filters": [{"id": "vlen-utf8"}], "dimension_separator": ".", "compressor": {"id": "blosc", "cname": "lz4", "clevel": 5, "shuffle": 1, "blocksize": 0}, "zarr_format": 2}, "var/_index/.zattrs": {"encoding-type": "string-array", "encoding-version": "0.2.0"}, "var/_index/.zarray": {"shape": [5], "chunks": [5], "dtype": "|O", "fill_value": "", "order": "C", "filters": [{"id": "vlen-utf8"}], "dimension_separator": ".", "compressor": {"id": "blosc", "cname": "lz4", "clevel": 5, "shuffle": 1, "blocksize": 0}, "zarr_format": 2}}, "zarr_consolidated_format": 1}
23 changes: 23 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/X/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"shape": [
6,
5
],
"chunks": [
6,
5
],
"dtype": "<f4",
"fill_value": 0.0,
"order": "C",
"filters": null,
"dimension_separator": ".",
"compressor": {
"id": "blosc",
"cname": "lz4",
"clevel": 5,
"shuffle": 1,
"blocksize": 0
},
"zarr_format": 2
}
4 changes: 4 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/X/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "array",
"encoding-version": "0.2.0"
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "dict",
"encoding-version": "0.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
6 changes: 6 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/obs/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"column-order": [],
"_index": "_index",
"encoding-type": "dataframe",
"encoding-version": "0.2.0"
}
3 changes: 3 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/obs/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
25 changes: 25 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/obs/_index/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"shape": [
6
],
"chunks": [
6
],
"dtype": "|O",
"fill_value": "",
"order": "C",
"filters": [
{
"id": "vlen-utf8"
}
],
"dimension_separator": ".",
"compressor": {
"id": "blosc",
"cname": "lz4",
"clevel": 5,
"shuffle": 1,
"blocksize": 0
},
"zarr_format": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "string-array",
"encoding-version": "0.2.0"
}
Binary file not shown.
4 changes: 4 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/obsm/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "dict",
"encoding-version": "0.1.0"
}
3 changes: 3 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/obsm/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
4 changes: 4 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/obsp/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "dict",
"encoding-version": "0.1.0"
}
3 changes: 3 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/obsp/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
17 changes: 17 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/raw/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"shape": [],
"chunks": [],
"dtype": "|b1",
"fill_value": false,
"order": "C",
"filters": null,
"dimension_separator": ".",
"compressor": {
"id": "blosc",
"cname": "lz4",
"clevel": 5,
"shuffle": 1,
"blocksize": 0
},
"zarr_format": 2
}
4 changes: 4 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/raw/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "null",
"encoding-version": "0.1.0"
}
4 changes: 4 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/uns/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "dict",
"encoding-version": "0.1.0"
}
3 changes: 3 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/uns/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
6 changes: 6 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/var/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"column-order": [],
"_index": "_index",
"encoding-type": "dataframe",
"encoding-version": "0.2.0"
}
3 changes: 3 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/var/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
25 changes: 25 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/var/_index/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"shape": [
5
],
"chunks": [
5
],
"dtype": "|O",
"fill_value": "",
"order": "C",
"filters": [
{
"id": "vlen-utf8"
}
],
"dimension_separator": ".",
"compressor": {
"id": "blosc",
"cname": "lz4",
"clevel": 5,
"shuffle": 1,
"blocksize": 0
},
"zarr_format": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "string-array",
"encoding-version": "0.2.0"
}
Binary file not shown.
4 changes: 4 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/varm/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "dict",
"encoding-version": "0.1.0"
}
3 changes: 3 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/varm/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
4 changes: 4 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/varp/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"encoding-type": "dict",
"encoding-version": "0.1.0"
}
3 changes: 3 additions & 0 deletions modules/nf-core/scanpy/pca/tests/data/test.zarr/varp/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
60 changes: 59 additions & 1 deletion modules/nf-core/scanpy/pca/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,60 @@ nextflow_process {
tag "scanpy"
tag "scanpy/pca"

test("Should emit zarr output for zarr input - stub") {

options '-stub'

when {
process {
"""
input[0] = Channel.of([
[ id: 'test_zarr' ],
file("${projectDir}/modules/nf-core/scanpy/pca/tests/data/test.zarr", checkIfExists: true)
]
)
input[1] = "X_pca"
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert process.out.zarr },
{ assert file(process.out.zarr[0][1]).name == "test_zarr_pca.zarr" }
)
}

}

test("Should run with zarr input") {

when {
process {
"""
input[0] = Channel.of([
[ id: 'test_zarr' ],
file("${projectDir}/modules/nf-core/scanpy/pca/tests/data/test.zarr", checkIfExists: true)
]
)
input[1] = "X_pca"
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert process.out.zarr },
{ assert file(process.out.zarr[0][1]).name == "test_zarr_pca.zarr" },
{ assert file(process.out.zarr[0][1] + "/obsm/X_pca/.zarray").exists() },
{ assert file(process.out.obsm[0][1]).name == "X_test_zarr_pca.pkl" }
)
}

}

test("Should run without failures") {

when {
Expand All @@ -27,7 +81,11 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert process.out.h5ad },
{ assert file(process.out.h5ad[0][1]).name == "test_pca.h5ad" },
{ assert process.out.obsm },
{ assert file(process.out.obsm[0][1]).name == "X_test_pca.pkl" },
{ assert process.out.versions },
{ assert "X_pca" in anndata(process.out.h5ad[0][1]).obsm }
)
}
Expand Down
Loading