nf-core · khersameesh24 · Sep 10, 2025 · Sep 6, 2025 · Sep 6, 2025 · Sep 6, 2025
diff --git a/.nf-core.yml b/.nf-core.yml
@@ -11,8 +11,8 @@ lint:
 nf_core_version: 3.3.2
 repository_type: pipeline
 template:
-  author: Florian Heyl
-  description: A pipeline for spatialomics Xenium data.
+  author: Sameesh Kher, Florian Heyl
+  description: A pipeline for spatialomics Xenium In Situ data.
   force: false
   is_nfcore: true
   name: spatialxe

diff --git a/README.md b/README.md
@@ -24,9 +24,6 @@
 
 ![nf-core/spatialxe-metromap](docs/images/spatialxe-metromap.png)
 
-<!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core
-     workflows use the "tube map" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   -->
-
 ## Usage
 
 On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spatialxe/results).
@@ -44,8 +41,6 @@ test_sample,/path/to/xenium-bundle,/path/to/morphology.ome.tif
 
 Now, you can run the pipeline using:
 
-<!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
-
 ## Run image-based segmentation mode <br>
 
 `CELLPOSE -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`
@@ -55,7 +50,7 @@ nextflow run nf-core/spatialxe \
    -profile <docker/singularity/.../institute> \
    --input samplesheet.csv \
    --outdir <OUTDIR> \
-   --mode image
+   --mode <MODE>
 ```
 
 ## Run coordinate-based segmentation mode <br>

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -13,3 +13,26 @@ report_section_order:
 export_plots: true
 
 disable_version_detection: true
+
+run_module:
+  - xenium
+
+module_order:
+  - xenium
+
+log_filesize_limit: 5000000000 # 5GB
+
+sp:
+  cell_feature_matrix:
+    fn: cell_feature_matrix.h5
+  cells:
+    fn: cells.parquet
+  experiment:
+    fn: experiment.xenium
+    num_lines: 50
+  metrics:
+    contents: num_cells_detected
+    fn: metrics_summary.csv
+    num_lines: 5
+  transcripts:
+    fn: transcripts.parquet
diff --git a/conf/modules.config b/conf/modules.config
@@ -81,18 +81,13 @@ process {
             path: { "${params.outdir}/segger/create_dataset" },
             mode: params.publish_dir_mode,
         ]
-        tile_width = "120"
-        tile_height = "120"
     }
 
     withName: SEGGER_TRAIN {
         publishDir = [
             path: { "${params.outdir}/segger/train" },
             mode: params.publish_dir_mode,
         ]
-        batch_size = 4                             // larger batch size can speed up training, but requires more memory
-        devices = 4                                // Use multiple GPUs by increasing the devices parameter to further accelerate training
-        max_epochs = 200                           // increasing #epochs can improve model performance with more learning cycles, but extends training time
         ext.args = { "--init_emb 8 --hidden_channels 32 --num_tx_tokens 500 --out_channels 8 --heads 2 --num_mid_layers 2 --strategy auto --precision 16-mixed" }
     }
 
@@ -101,8 +96,6 @@ process {
             path: { "${params.outdir}/segger/predict" },
             mode: params.publish_dir_mode,
         ]
-        batch_size = 1                            // larger batch size can speed up training, but requires more memory
-        cc_analysis = "false"                     // to control connected component analysis
     }
 
     withName: PARQUET_TO_CSV {

diff --git a/modules.json b/modules.json
@@ -7,7 +7,7 @@
                 "nf-core": {
                     "cellpose": {
                         "branch": "master",
-                        "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/cellpose/cellpose.diff"
                     },
@@ -18,13 +18,13 @@
                     },
                     "untar": {
                         "branch": "master",
-                        "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/untar/untar.diff"
                     },
                     "unzip": {
                         "branch": "master",
-                        "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
                         "installed_by": ["modules"]
                     },
                     "xeniumranger/import-segmentation": {
@@ -61,7 +61,7 @@
                     },
                     "utils_nfschema_plugin": {
                         "branch": "master",
-                        "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e",
+                        "git_sha": "dcd088f483cede0c3df4034d405126f05a764cc7",
                         "installed_by": ["subworkflows"]
                     }
                 }

diff --git a/modules/local/segger/create_dataset/main.nf b/modules/local/segger/create_dataset/main.nf
@@ -33,9 +33,9 @@ process SEGGER_CREATE_DATASET {
         --base_dir ${base_dir} \\
         --data_dir ${prefix} \\
         --sample_type ${params.format} \\
+        --tile_width ${params.tile_width} \\
+        --tile_height ${params.tile_height} \\
         --n_workers ${task.cpus} \\
-        --tile_width ${task.tile_width} \\
-        --tile_height ${task.tile_height} \\
         ${args}
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/local/segger/predict/main.nf b/modules/local/segger/predict/main.nf
@@ -11,9 +11,9 @@ process SEGGER_PREDICT {
 
 
     output:
-    tuple val(meta), path("${meta.id}_benchmarks_dir")                                  , emit: benchmarks
-    tuple val(meta), path("${meta.id}_benchmarks_dir/*/segger_transcripts.parquet")     , emit: transcripts
-    path("versions.yml")                                                                , emit: versions
+    tuple val(meta), path("${meta.id}_benchmarks_dir")                             , emit: benchmarks
+    tuple val(meta), path("${meta.id}_benchmarks_dir/*/segger_transcripts.parquet"), emit: transcripts
+    path("versions.yml")                                                           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -33,10 +33,10 @@ process SEGGER_PREDICT {
         --segger_data_dir ${segger_dataset} \\
         --transcripts_file ${transcripts} \\
         --benchmarks_dir ${prefix}_benchmarks_dir \\
-        --num_workers ${task.cpus} \\
-        --batch_size ${task.batch_size} \\
-        --use_cc ${task.cc_analysis} \\
+        --batch_size ${params.batch_size_predict} \\
+        --use_cc ${params.cc_analysis} \\
         --knn_method ${params.segger_knn_method} \\
+        --num_workers ${task.cpus} \\
         ${args}
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/local/segger/train/main.nf b/modules/local/segger/train/main.nf
@@ -8,8 +8,8 @@ process SEGGER_TRAIN {
     tuple val(meta), path(dataset_dir)
 
     output:
-    tuple val(meta), path("${meta.id}_trained_models")   , emit: trained_models
-    path("versions.yml")                                 , emit: versions
+    tuple val(meta), path("${meta.id}_trained_models"), emit: trained_models
+    path("versions.yml")                              , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -28,10 +28,10 @@ process SEGGER_TRAIN {
         --dataset_dir ${dataset_dir} \\
         --models_dir ${prefix}_trained_models \\
         --sample_tag ${prefix} \\
+        --batch_size ${params.batch_size_train} \\
+        --max_epochs ${params.max_epochs} \\
+        --devices ${params.devices} \\
         --num_workers ${task.cpus} \\
-        --batch_size ${task.batch_size} \\
-        --max_epochs ${task.max_epochs} \\
-        --devices ${task.devices} \\
         --accelerator ${params.segger_accelerator} \\
         ${args}
 

diff --git a/modules/local/spatialdata/merge/main.nf b/modules/local/spatialdata/merge/main.nf
@@ -9,8 +9,8 @@ process SPATIALDATA_MERGE {
     path(add_bundle, stageAs: "*")
 
     output:
-    tuple val(meta), path("spatialdata_spatialxe"), emit: spatialxe_bundle
-    path("versions.yml")                          , emit: versions
+    tuple val(meta), path("spatialdata_merged"), emit: merged_bundle
+    path("versions.yml")                         , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -27,8 +27,8 @@ process SPATIALDATA_MERGE {
 
     stub:
     """
-    mkdir -p "spatialdata_spatialxe/"
-    touch spatialdata_spatialxe/fake_file.txt
+    mkdir -p "spatialdata_merged/"
+    touch spatialdata_merged/fake_file.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/spatialdata/merge/templates/merge.py b/modules/local/spatialdata/merge/templates/merge.py
@@ -12,7 +12,7 @@ def main():
 
     reference_bundle = "${ref_bundle}"
     add_bundle = "${add_bundle}"
-    output_folder = "./spatialdata_spatialxe"
+    output_folder = "./spatialdata_merged"
 
     # Ensure the output folder exists
     if os.path.exists(output_folder):

diff --git a/modules/local/spatialdata/meta/main.nf b/modules/local/spatialdata/meta/main.nf
@@ -9,8 +9,8 @@ process SPATIALDATA_META {
     path(xenium_bundle, stageAs: "*")
 
     output:
-    tuple val(meta), path("spatialdata_spatialxe_final"), emit: spatialxe_bundle
-    path("versions.yml")                                , emit: versions
+    tuple val(meta), path("spatialdata_meta"), emit: metadata
+    path("versions.yml")                     , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -28,8 +28,8 @@ process SPATIALDATA_META {
     stub:
 
     """
-    mkdir -p "spatialdata_spatialxe_final/"
-    touch "spatialdata_spatialxe_final/fake_file.txt"
+    mkdir -p "spatialdata_meta/"
+    touch "spatialdata_meta/fake_file.txt"
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/spatialdata/meta/templates/meta.py b/modules/local/spatialdata/meta/templates/meta.py
@@ -12,7 +12,7 @@ def main():
     spatialdata_bundle = "${spatialdata_bundle}"
     xenium_bundle = "${xenium_bundle}"
     metadata = "${meta}"
-    output = "spatialdata_spatialxe_final"
+    output = "spatialdata_meta"
 
     sdata = sd.read_zarr(f"{spatialdata_bundle}")
 

diff --git a/...l/spatialconverter/parquet_to_csv/main.nf → ...y/spatialconverter/parquet_to_csv/main.nf b/...l/spatialconverter/parquet_to_csv/main.nf → ...y/spatialconverter/parquet_to_csv/main.nf
@@ -2,7 +2,7 @@ process PARQUET_TO_CSV {
     tag "$meta.id"
     label 'process_low'
 
-    container "ghcr.io/scverse/spatialdata:spatialdata0.3.0_spatialdata-io0.1.7_spatialdata-plot0.2.9"
+    container "heylf/spatialdata:0.2.6"
 
     input:
     tuple val(meta), path(transcripts)

diff --git a/.../spatialconverter/parquet_to_csv/meta.yml → .../spatialconverter/parquet_to_csv/meta.yml b/.../spatialconverter/parquet_to_csv/meta.yml → .../spatialconverter/parquet_to_csv/meta.yml
diff --git a/...arquet_to_csv/templates/parquet_to_csv.py → ...arquet_to_csv/templates/parquet_to_csv.py b/...arquet_to_csv/templates/parquet_to_csv.py → ...arquet_to_csv/templates/parquet_to_csv.py
diff --git a/modules/nf-core/cellpose/cellpose.diff b/modules/nf-core/cellpose/cellpose.diff
diff --git a/modules/nf-core/cellpose/main.nf b/modules/nf-core/cellpose/main.nf
diff --git a/modules/nf-core/cellpose/meta.yml b/modules/nf-core/cellpose/meta.yml