nf-core · pinin4fjords · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/modules/nf-core/custom/bed12codonpositions/environment.yml b/modules/nf-core/custom/bed12codonpositions/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::pandas=2.3.0
+  - conda-forge::python=3.12.11
+  - conda-forge::pyyaml=6.0.2
diff --git a/modules/nf-core/custom/bed12codonpositions/main.nf b/modules/nf-core/custom/bed12codonpositions/main.nf
@@ -0,0 +1,49 @@
+process CUSTOM_BED12CODONPOSITIONS {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1019bd22c111267bcb670fdb128829776f0ca6adfa7b0e2d126f91577d08e3/data' :
+        'community.wave.seqera.io/library/python_pandas_pyyaml:75514f9f977be607' }"
+
+    input:
+    tuple val(meta), path(bed12)
+
+    output:
+    tuple val(meta), path("${prefix}.bed"), emit: bed
+    path "versions.yml"                  , emit: versions, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    args   = task.ext.args ?: ''
+    template 'bed12codonpositions.py'
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.bed
+
+    python - <<END
+import platform
+import pandas
+import yaml
+
+with open("versions.yml", "w") as fh:
+    yaml.safe_dump(
+        {
+            "${task.process}": {
+                "python": platform.python_version(),
+                "pandas": pandas.__version__,
+            }
+        },
+        fh,
+        default_flow_style=False,
+        sort_keys=False,
+    )
+END
+    """
+}
diff --git a/modules/nf-core/custom/bed12codonpositions/meta.yml b/modules/nf-core/custom/bed12codonpositions/meta.yml
@@ -0,0 +1,72 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "custom_bed12codonpositions"
+description: |
+  Expand a BED12 into a BED6 of in-frame mRNA positions, projected back
+  to genomic coordinates. Default behaviour emits one row per codon (the
+  5' nucleotide); --step / --width / --frame control the stride, span
+  and offset on the spliced mRNA. Useful for codon-level work on
+  spliced features (e.g. ribo-seq P-site counts per codon, frame /
+  periodicity QC, novel-ORF tiling).
+keywords:
+  - bed12
+  - bed6
+  - codon
+  - splicing
+  - coordinates
+tools:
+  - "bed12codonpositions":
+      description: |
+        Python helper that expands a BED12 into per-codon BED6
+        positions along the spliced feature, with configurable frame,
+        step and span width via `ext.args`.
+      tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/bed12codonpositions/main.nf"
+      licence: ["MIT"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample/feature-set information
+          e.g. `[ id:'catalogue' ]`
+    - bed12:
+        type: file
+        description: |
+          BED12 file with one record per multi-block feature. blockStarts
+          are offsets from column 2 (start); blockSizes are in nt; blocks
+          must be in ascending genomic-coordinate order.
+        pattern: "*.{bed,bed12}"
+        ontologies:
+          - edam: http://edamontology.org/format_3586 # BED
+output:
+  bed:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map matching the input meta.
+      - ${prefix}.bed:
+          type: file
+          description: |
+            BED6 file with one row per in-frame mRNA position projected
+            back to genomic coordinates. Columns are chrom, start, end,
+            name (from BED12 column 4), score (preserved from BED12
+            column 5), strand. Sorted in mRNA-traversal order, which
+            means descending genomic order on '-' strand records.
+          pattern: "*.bed"
+          ontologies:
+            - edam: http://edamontology.org/format_3003 # BED
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+topics:
+  versions:
+    - versions.yml:
+        type: string
+        description: The name of the process
+authors:
+  - "@pinin4fjords"
+maintainers:
+  - "@pinin4fjords"
diff --git a/modules/nf-core/custom/bed12codonpositions/templates/bed12codonpositions.py b/modules/nf-core/custom/bed12codonpositions/templates/bed12codonpositions.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""Expand a BED12 into a BED6 of in-frame mRNA positions.
+
+Walks each record's blocks in mRNA order (5'→3'), emits every --step-th
+mRNA position starting at --frame, and projects them back to genomic
+coordinates. Rows are written in mRNA-traversal order, so '-' strand
+records come out in descending genomic order.
+"""
+
+import argparse
+import platform
+import sys
+
+import pandas as pd
+import yaml
+
+BED12_COLUMNS = [
+    "chrom",
+    "start",
+    "end",
+    "name",
+    "score",
+    "strand",
+    "thickStart",
+    "thickEnd",
+    "itemRgb",
+    "blockCount",
+    "blockSizes",
+    "blockStarts",
+]
+
+
+def parse_block_field(value):
+    return [int(x) for x in str(value).rstrip(",").split(",") if x != ""]
+
+
+def mrna_to_genomic_runs(blocks, strand, mrna_start, mrna_end):
+    """Project a half-open mRNA span [mrna_start, mrna_end) onto genomic
+    coordinates, returning a list of (g_start, g_end) BED-style runs
+    (one per overlapped block, in mRNA-traversal order)."""
+    if strand == "+":
+        ordered = list(blocks)
+    elif strand == "-":
+        ordered = list(reversed(blocks))
+    else:
+        return []
+
+    runs = []
+    cum = 0
+    for blk_start, blk_end in ordered:
+        blk_len = blk_end - blk_start
+        blk_lo = cum
+        blk_hi = cum + blk_len
+        cum = blk_hi
+
+        lo = max(mrna_start, blk_lo)
+        hi = min(mrna_end, blk_hi)
+        if lo >= hi:
+            continue
+        off_lo = lo - blk_lo
+        off_hi = hi - blk_lo
+        if strand == "+":
+            g_lo = blk_start + off_lo
+            g_hi = blk_start + off_hi
+        else:
+            g_hi = blk_end - off_lo
+            g_lo = blk_end - off_hi
+        runs.append((g_lo, g_hi))
+
+    return runs
+
+
+def emit_rows(row, frame, step, width, keep_duplicates):
+    block_sizes = parse_block_field(row["blockSizes"])
+    block_starts = parse_block_field(row["blockStarts"])
+    if len(block_sizes) != int(row["blockCount"]) or len(block_starts) != int(row["blockCount"]):
+        sys.stderr.write(
+            f"warning: skipping {row['name']!r}: blockCount={row['blockCount']} but "
+            f"blockSizes has {len(block_sizes)} entries and blockStarts has {len(block_starts)}\\n"
+        )
+        return []
+
+    blocks = sorted((row["start"] + off, row["start"] + off + sz) for sz, off in zip(block_sizes, block_starts))
+    total_len = sum(be - bs for bs, be in blocks)
+    chrom = row["chrom"]
+    name = row["name"]
+    score = row["score"]
+    strand = row["strand"]
+
+    rows = []
+    seen = set()
+    for mrna_pos in range(frame, total_len, step):
+        if mrna_pos + width > total_len:
+            break
+        for g_start, g_end in mrna_to_genomic_runs(blocks, strand, mrna_pos, mrna_pos + width):
+            key = (chrom, g_start, g_end, name, strand)
+            if not keep_duplicates and key in seen:
+                continue
+            seen.add(key)
+            rows.append((chrom, g_start, g_end, name, score, strand))
+    return rows
+
+
+parser = argparse.ArgumentParser(
+    description=__doc__,
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+)
+parser.add_argument(
+    "--frame",
+    type=int,
+    default=0,
+    help="mRNA offset of the first position to emit (default: 0).",
+)
+parser.add_argument(
+    "--step",
+    type=int,
+    default=3,
+    help="Stride between successive emitted positions on the mRNA (default: 3).",
+)
+parser.add_argument(
+    "--width",
+    type=int,
+    default=1,
+    help="Width in nucleotides of each emitted span on the mRNA (default: 1). "
+    "Spans that cross a block boundary are split into one BED row per block.",
+)
+parser.add_argument(
+    "--keep-duplicates",
+    action="store_true",
+    help="Keep duplicate (chrom, start, end, name, strand) rows arising from "
+    "the same record (e.g. when --width >= --step).",
+)
+parsed_args = parser.parse_args("${args}".split() if "${args}".strip() else [])
+
+if parsed_args.step <= 0:
+    raise SystemExit("--step must be positive")
+if parsed_args.width <= 0:
+    raise SystemExit("--width must be positive")
+if parsed_args.frame < 0:
+    raise SystemExit("--frame must be non-negative")
+
+bed = pd.read_csv(
+    "${bed12}",
+    sep="\\t",
+    comment="#",
+    header=None,
+    names=BED12_COLUMNS,
+    dtype={"chrom": str, "name": str, "strand": str},
+)
+bed = bed[~bed["chrom"].astype(str).str.startswith(("track", "browser"))]
+
+out_rows = []
+for _, rec in bed.iterrows():
+    out_rows.extend(
+        emit_rows(
+            rec,
+            parsed_args.frame,
+            parsed_args.step,
+            parsed_args.width,
+            parsed_args.keep_duplicates,
+        )
+    )
+
+out = pd.DataFrame(out_rows, columns=["chrom", "start", "end", "name", "score", "strand"])
+out.to_csv("${prefix}.bed", sep="\\t", header=False, index=False)
+
+with open("versions.yml", "w") as fh:
+    yaml.safe_dump(
+        {
+            "${task.process}": {
+                "python": platform.python_version(),
+                "pandas": pd.__version__,
+            }
+        },
+        fh,
+        default_flow_style=False,
+        sort_keys=False,
+    )
diff --git a/modules/nf-core/custom/bed12codonpositions/tests/frame.config b/modules/nf-core/custom/bed12codonpositions/tests/frame.config
@@ -0,0 +1,5 @@
+process {
+    withName: CUSTOM_BED12CODONPOSITIONS {
+        ext.args = '--frame 1'
+    }
+}
diff --git a/modules/nf-core/custom/bed12codonpositions/tests/keep_duplicates.config b/modules/nf-core/custom/bed12codonpositions/tests/keep_duplicates.config
@@ -0,0 +1,5 @@
+process {
+    withName: CUSTOM_BED12CODONPOSITIONS {
+        ext.args = '--width 2 --step 1 --keep-duplicates'
+    }
+}