-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfive_tools.py
More file actions
115 lines (94 loc) · 3.4 KB
/
five_tools.py
File metadata and controls
115 lines (94 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
Variant Annotation Software Tools Automated Pipeline
Author: Martina Debnath
GitHub: https://github.com/marti-dotcom
License: MIT
"""
__author__ = "Martina Debnath"
__version__ = "1.0.0"
__license__ = "MIT"
import subprocess
import os
import sys
def run_command(command):
print(f"Running: {' '.join(command)}")
try:
result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(result.stdout.decode())
except subprocess.CalledProcessError as e:
print(f"An error has occurred: {e.stderr.decode()}")
sys.exit(1)
def run_annovar(input_vcf, output_prefix):
annovar_path = "/home/martina/annovar"
humandb_path = "/home/martina/annovar/humandb"
annovar_command = [
"perl", f"{annovar_path}/table_annovar.pl", input_vcf, humandb_path,
"-buildver", "hg38", "-out", output_prefix, "-remove", "-protocol", "refGene,gnomad211_exome",
"-operation", "g,f", "-nastring", ".", "-vcfinput"
]
run_command(annovar_command)
def run_snpeff(input_vcf):
snpeff_path = "/home/martina/snpEff"
genome_version = "GRCh38.86"
snpeff_command = [
"java", "-Xmx4g", "-jar", f"{snpeff_path}/snpEff.jar", genome_version, input_vcf
]
run_command(snpeff_command)
def run_vep(input_vcf, output_vcf):
vep_command = [
"vep",
"-i", input_vcf,
"-o", output_vcf,
"--vcf",
"--cache",
"--offline",
"--species", "homo_sapiens",
"--assembly", "GRCh38",
"--force_overwrite"
]
run_command(vep_command)
def run_bcftools_csq(input_vcf, output_vcf):
fasta = "/home/martina/clinvar_project/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fa"
gff3 = "/home/martina/Homo_sapiens.GRCh38.113.gff3"
fai = fasta + ".fai"
for path in [input_vcf, fasta, fai, gff3]:
if not os.path.exists(path):
print(f"[ERROR] Required file not found: {path}")
sys.exit(1)
bcftools_command = [
"bcftools", "csq",
"-f", fasta,
"-g", gff3,
"-o", output_vcf,
input_vcf
]
run_command(bcftools_command)
def run_fathmm(input_vcf):
converter_script = "/home/martina/fathmm-MKL/vcf_to_fathmm_input.py"
input_txt = "/home/martina/fathmm-MKL/omim_fathmm_input.txt"
output_txt = "/home/martina/fathmm-MKL/output_predictions.txt"
fathmm_script = "/home/martina/fathmm-MKL/fathmm-MKL.py"
fathmm_db = "/home/martina/fathmm-MKL/fathmm-MKL_Current.tab.gz"
# Convert VCF to FATHMM input format
run_command(["python", converter_script, input_vcf, input_txt])
# Run FATHMM-MKL
run_command(["python", fathmm_script, input_txt, output_txt, fathmm_db])
def main(input_vcf):
print("Running ANNOVAR annotation...")
run_annovar(input_vcf, "output_annovar")
print("Running SnpEff annotation...")
run_snpeff(input_vcf)
print("Running VEP annotation...")
run_vep(input_vcf, "output_vep.vcf")
print("Running BCFtools csq annotation...")
run_bcftools_csq(input_vcf, "output_bcftools_csq.vcf")
print("Running FATHMM-MKL annotation...")
run_fathmm(input_vcf)
#output for FATHMM-MKL found in : output_predictions.txt
print("Annotation pipeline completed. YAY!")
if __name__ == "__main__":
if len(sys.argv) != 2:
print(f"Usage: python {sys.argv[0]} input.vcf")
sys.exit(1)
input_vcf = sys.argv[1]
main(input_vcf)