Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
with:
channel-priority: strict
activate-environment: snakemake
environment-file: .test/environment_v7.yaml
environment-file: .test/environment_v9.yaml
- name: Create environments
shell: bash -el {0}
run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml --conda-create-envs-only --use-conda -c1 --conda-frontend conda
Expand All @@ -29,7 +29,7 @@ jobs:
with:
channel-priority: strict
activate-environment: snakemake
environment-file: .test/environment_v7.yaml
environment-file: .test/environment_v9.yaml
- name: Dry run
shell: bash -el {0}
run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml --dry-run
4 changes: 2 additions & 2 deletions .github/workflows/test_apptainer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ jobs:
- name: Pack logs
if: success() || failure()
shell: bash -el {0}
run: tar czf logs.tar.gz .test/output .snakemake/log
- name: Upload output file
run: tar czf logs.tar.gz logs .snakemake/log
- name: Upload logs file
if: success() || failure()
uses: actions/upload-artifact@v4
with:
Expand Down
39 changes: 0 additions & 39 deletions .github/workflows/test_v7.yml

This file was deleted.

4 changes: 2 additions & 2 deletions .github/workflows/test_v9.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ jobs:
- name: Pack logs
if: success() || failure()
shell: bash -el {0}
run: tar czf logs.tar.gz .test/output .snakemake/log
- name: Upload output file
run: tar czf logs.tar.gz logs .snakemake/log
- name: Upload logs file
if: success() || failure()
uses: actions/upload-artifact@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

# Analysis files
/data/
/output/
/results/
/logs/

# MAC
.DS_Store
Expand Down
7 changes: 5 additions & 2 deletions .test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@ from snakemake.utils import min_version
import subprocess


min_version("7.19")
min_version("9.12")

# Workflow version
__version__ = "test"

pathvars:
dataset = config["OUTPUT_NAME"]

# Rules
include: "../workflow/core.smk"
include: "../workflow/rules/demix.smk"
Expand All @@ -19,4 +22,4 @@ include: "../workflow/rules/report.smk"

rule all:
input:
OUTDIR/f"{OUTPUT_NAME}.report.html"
"<results>/<dataset>/report.html"
10 changes: 0 additions & 10 deletions .test/environment_v7.yaml

This file was deleted.

2 changes: 0 additions & 2 deletions .test/targets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ SAMPLES:
fasta: ".test/data/fasta/sample3.fasta"
METADATA:
".test/data/metadata.csv"
OUTPUT_DIRECTORY:
".test/output"
CONTEXT_FASTA:
".test/data/context.fasta"
OUTPUT_NAME:
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
[![Zenodo DOI](https://zenodo.org/badge/627797162.svg)](https://doi.org/10.5281/zenodo.15771867)

![Install workflow](https://github.com/PathoGenOmics-Lab/VIPERA/actions/workflows/install.yml/badge.svg)
![Test workflow with Snakemake v7](https://github.com/PathoGenOmics-Lab/VIPERA/actions/workflows/test_v7.yml/badge.svg)
![Test workflow with Snakemake v9](https://github.com/PathoGenOmics-Lab/VIPERA/actions/workflows/test_v9.yml/badge.svg)
![Test workflow with Snakemake v9 and Apptainer](https://github.com/PathoGenOmics-Lab/VIPERA/actions/workflows/test_apptainer.yml/badge.svg)

Expand All @@ -24,7 +23,7 @@ configuring [the inputs and outputs](config/README.md#inputs-and-outputs) and
[the context dataset](config/README.md#automated-construction-of-a-context-dataset):

```shell
snakemake --use-conda --cores 4 # command for Snakemake v7
snakemake --sdm conda --cores 4
```

We provide a simple script that downloads the [data](https://doi.org/10.20350/digitalCSIC/15648) from [our study](https://doi.org/10.1093/ve/veae018)
Expand All @@ -40,8 +39,9 @@ It supports dependency management through either conda or Apptainer/Singularity,
[run modes documentation](config/README.md#run-modes).

We use continuous integration (CI) to automatically verify that all dependencies install correctly
with Snakemake v7.32.4 (see GitHub Action `Install`), and to test that VIPERA runs
successfully with Snakemake v7.32.4 and v9.15.0 using conda (Actions `Test Sm v(7|9)`).
(see GitHub Action `Install`), and to test that VIPERA runs
successfully using conda (Actions `Test Sm v(7|9)`).
We run Snakemake v9.15.0 for these.
We also test a containerized workflow with Snakemake v9.15.0 and Apptainer using a
[remote image](https://hub.docker.com/r/ahmig/vipera) (Action `Test Sm v9 Apptainer`).
This image is automatically updated in every version (Action `Deploy`).
Expand Down
1 change: 0 additions & 1 deletion build_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ def find_file_with_extension(directory: Path, prefix: str, extensions: List[str]
else:
sys.exit(f"ERROR: metadata file '{args.metadata_csv}' does not exist")
targets["OUTPUT_NAME"] = args.output_name
targets["OUTPUT_DIRECTORY"] = "output"
targets["CONTEXT_FASTA"] = None
targets["MAPPING_REFERENCES_FASTA"] = None

Expand Down
10 changes: 3 additions & 7 deletions config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ The path to these input files is set in two configuration files in YAML format:
[config.yaml](/config/config.yaml) (for general workflow settings) and
[targets.yaml](/config/targets.yaml) (for specific dataset-related settings).
The latter must be modified by the user to point the `SAMPLES` and `METADATA`
parameters to your data. The `OUTPUT_DIRECTORY` parameter should point to your
desired results directory.
parameters to your data.

The script [`build_targets.py`](/build_targets.py) simplifies the process of creating
the targets configuration file. To run this script, you need to have PyYAML installed. It
Expand All @@ -62,8 +61,6 @@ SAMPLES:
...
METADATA:
"path/to/metadata.csv"
OUTPUT_DIRECTORY:
"output"
CONTEXT_FASTA:
null
MAPPING_REFERENCES_FASTA:
Expand Down Expand Up @@ -258,11 +255,10 @@ should be modified to fit your needs. Read more about Snakemake profiles
[here](https://snakemake.readthedocs.io/en/stable/executing/cli.html#executing-profiles).
To use the profile, install the
[Snakemake executor plugin for SLURM](https://snakemake.github.io/snakemake-plugin-catalog/plugins/executor/slurm.html)
and run one of the following commands:
and run the following command:

```shell
snakemake --slurm --profile profile/slurm # Snakemake v7
snakemake --profile profile/slurm # Snakemake v8+
snakemake --profile profile/slurm
```

Additionally, we offer the option of running the workflow within a containerized
Expand Down
2 changes: 0 additions & 2 deletions config/targets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ METADATA:
"data/metadata.csv"
OUTPUT_NAME:
"case_study"
OUTPUT_DIRECTORY:
"output"
CONTEXT_FASTA:
null
MAPPING_REFERENCES_FASTA:
Expand Down
5 changes: 4 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ containerized: "docker://ahmig/vipera:v" + __version__
configfile: "config/config.yaml"
configfile: "config/targets.yaml"

pathvars:
dataset = config["OUTPUT_NAME"]

include: "core.smk"
include: "rules/context.smk"
include: "rules/demix.smk"
Expand All @@ -22,4 +25,4 @@ include: "rules/report.smk"

rule all:
input:
OUTDIR/f"{OUTPUT_NAME}.report.html"
"<results>/<dataset>/report.html"
13 changes: 0 additions & 13 deletions workflow/core.smk
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
BASE_PATH = Path(workflow.basedir).parent.resolve()

include: "rules/common.smk"

# Outputs
OUTPUT_NAME = config["OUTPUT_NAME"]
OUTDIR = Path(config["OUTPUT_DIRECTORY"])

# Logging
LOGDIR = OUTDIR / "logs"

# Report
REPORT_DIR_PLOTS = Path(OUTDIR/"report/plots")
REPORT_DIR_TABLES = Path(OUTDIR/"report/tables")

include: "rules/fetch.smk"
include: "rules/fasta.smk"
include: "rules/asr.smk"
Expand Down
21 changes: 10 additions & 11 deletions workflow/rules/asr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,21 @@ rule reconstruct_ancestral_sequence:
params:
seed = 7291,
seqtype = "DNA",
name = OUTPUT_NAME,
outgroup = config["ALIGNMENT_REFERENCE"],
model = config["TREE_MODEL"]
input:
fasta = OUTDIR/"nextalign"/f"{OUTPUT_NAME}.aligned.masked.fasta"
fasta = "<results>/<dataset>/aligned.masked.fasta"
output:
folder = directory(OUTDIR/"tree"),
state_file = OUTDIR/"tree"/f"{OUTPUT_NAME}.state"
folder = directory("<results>/<dataset>/tree"),
state_file = "<results>/<dataset>/tree/asr.state"
log:
LOGDIR / "reconstruct_ancestral_sequence" / "log.txt"
"<logs>/<dataset>/reconstruct_ancestral_sequence/log.txt"
shell:
"mkdir -p {output.folder} && "
"iqtree2 -seed {params.seed} "
"-asr "
"-o {params.outgroup} -T AUTO --threads-max {threads} -s {input.fasta} "
"--seqtype {params.seqtype} -m {params.model} --prefix {output.folder}/{params.name} >{log} 2>&1"
"-asr "
"-o {params.outgroup} -T AUTO --threads-max {threads} -s {input.fasta} "
"--seqtype {params.seqtype} -m {params.model} --prefix {output.folder}/asr >{log} 2>&1"


rule ancestor_fasta:
Expand All @@ -30,10 +29,10 @@ rule ancestor_fasta:
indeterminate_char = "N",
name = "case_ancestor",
input:
state_file = OUTDIR/"tree"/f"{OUTPUT_NAME}.state"
state_file = "<results>/<dataset>/tree/asr.state"
output:
fasta = report(OUTDIR/f"{OUTPUT_NAME}.ancestor.fasta")
fasta = report("<results>/<dataset>/ancestor.fasta")
log:
LOGDIR / "ancestor_fasta" / "log.txt"
"<logs>/<dataset>/ancestor_fasta/log.txt"
script:
"../scripts/ancestor_fasta.py"
6 changes: 3 additions & 3 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def get_repo_version(base_dir: str, default: str, warn=False) -> str:
def select_context_fasta():
"""Set context to be fetched automatically if CONTEXT_FASTA=null"""
if "CONTEXT_FASTA" not in config.keys() or config["CONTEXT_FASTA"] is None:
return OUTDIR/"context"/"sequences.fasta"
return "<results>/<dataset>/context/sequences.fasta"
if not Path(config["GISAID"]["CREDENTIALS"]).is_file():
raise FileNotFoundError(f"Tried to download a context dataset, but no GISAID credentials were found at '{config['GISAID']['CREDENTIALS']}' (see README.md).")
elif Path(config["CONTEXT_FASTA"]).is_file():
Expand All @@ -61,7 +61,7 @@ def select_context_fasta():
def select_mapping_references_fasta():
"""Set mapping references to be fetched automatically if MAPPING_REFERENCES_FASTA=null"""
if "MAPPING_REFERENCES_FASTA" not in config.keys() or config["MAPPING_REFERENCES_FASTA"] is None:
return OUTDIR/"mapping_references.fasta"
return "<results>/<dataset>/mapping_references.fasta"
elif Path(config["MAPPING_REFERENCES_FASTA"]).is_file():
return config["MAPPING_REFERENCES_FASTA"]
else:
Expand All @@ -74,7 +74,7 @@ def is_url(string: str) -> bool:

def select_problematic_vcf():
if is_url(config["PROBLEMATIC_VCF"]):
return OUTDIR/"problematic_sites.vcf"
return "<results>/<dataset>/problematic_sites.vcf"
elif Path(config["PROBLEMATIC_VCF"]).is_file():
return config["PROBLEMATIC_VCF"]
else:
Expand Down
Loading
Loading