Skip to content

Commit 0404aef

Browse files
author
Nolan Woods
committed
Handle slicing SeqRecords
1 parent 73473ea commit 0404aef

File tree

6 files changed

+110
-4
lines changed

6 files changed

+110
-4
lines changed

.idea/BioPython-Convert.iml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

biopython_convert/JMESPathGen.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@
2929
# and https://github.com/jmespath/jmespath.py/issues/159
3030

3131

32+
class Options(jmespath.Options):
33+
def __init__(self, dict_cls=None, custom_functions=None, custom_slice_types=None):
34+
super().__init__(dict_cls, custom_functions)
35+
self.custom_slice_types = custom_slice_types
36+
37+
3238
def compile(expression):
3339
return Parser().parse(expression)
3440

@@ -178,6 +184,8 @@ def visit_index(self, node, value, **kwargs):
178184
return super().visit_index(node, value)
179185

180186
def visit_slice(self, node, value, **kwargs):
187+
if self._options.custom_slice_types is not None and isinstance(value, self._options.custom_slice_types):
188+
return value[slice(*node['children'])]
181189
return itertools.islice(value, *node['children'])
182190

183191
def visit_multi_select_list(self, node, value, **kwargs):

biopython_convert/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
stat_annotations = ['molecule_type', 'topology', 'data_file_division', 'date', 'accessions', 'sequence_version', 'gi',
2828
'keywords', 'source', 'organism']
2929

30+
JMESPathGenOptions = JMESPathGen.Options(custom_functions=JMESPathGen.ExtendedFunctions(), custom_slice_types=(SeqIO.SeqRecord,))
31+
3032
usage = """\
3133
Use: biopython.convert [-s] [-v] [-i] [-q JMESPath] input_file input_type output_file output_type
3234
\t-s Split records into seperate files
@@ -192,7 +194,7 @@ def gentype(x):
192194

193195
# Wrap input in JMESPath selector if provided
194196
if jpath:
195-
input_records = JMESPathGen.search(jpath, gentype(input_records))
197+
input_records = JMESPathGen.search(jpath, gentype(input_records), JMESPathGenOptions)
196198

197199
# Apply xform to both entire return value
198200
input_records = xform(input_records)

test-data/outputs/jpath_slice

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
LOCUS NC_008563 2800 bp DNA UNK 01-JAN-1980
2+
DEFINITION Escherichia coli APEC O1, complete genome.
3+
ACCESSION NC_008563
4+
VERSION NC_008563.1
5+
KEYWORDS .
6+
SOURCE .
7+
ORGANISM .
8+
.
9+
FEATURES Location/Qualifiers
10+
gene 117..2579
11+
/locus_tag="APECO1_RS00010"
12+
/old_locus_tag="APECO1_1976"
13+
CDS 117..2579
14+
/locus_tag="APECO1_RS00010"
15+
/old_locus_tag="APECO1_1976"
16+
/inference="COORDINATES: similar to AA
17+
sequence:RefSeq:WP_005124053.1"
18+
/note="Derived by automated computational analysis using
19+
gene prediction method: Protein Homology."
20+
/codon_start=1
21+
/transl_table=11
22+
/product="bifunctional aspartokinase I/homoserine
23+
dehydrogenase I"
24+
/protein_id="WP_001264707.1"
25+
/translation="MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITN
26+
HLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHV
27+
LHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLES
28+
TVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADC
29+
CEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCL
30+
IKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMS
31+
RARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAII
32+
SVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQM
33+
LFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLN
34+
LENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVT
35+
PNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGI
36+
LSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGREL
37+
ELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDG
38+
VCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLR
39+
TLSWKLGV"
40+
ORIGIN
41+
1 accatcacca ttaccacagg taacggtgcg ggctgacgcg tacaggaaac acagaaaaaa
42+
61 gcccgcacct gacagtgcgg gctttttttt cgaccaaagg taacgaggta acaaccatgc
43+
121 gagtgttgaa gttcggcggt acatcagtgg caaatgcaga acgttttctg cgggttgccg
44+
181 atattctgga aagcaatgcc aggcaggggc aggtggcgac cgtcctctct gcccccgcca
45+
241 aaattaccaa ccatctggta gcgatgattg aaaaaaccat tagcggccaa gatgctttac
46+
301 ccaatatcag cgatgccgaa cgtatttttg ccgaacttct gacgggactc gccgccgccc
47+
361 agccgggatt tccgctggca caattgaaaa ctttcgtcga ccaggaattt gcccaaataa
48+
421 aacatgtcct gcatggcatt agtttgttgg ggcagtgccc ggatagcatc aacgctgcgc
49+
481 tgatttgccg tggcgagaaa atgtcgatcg ccattatggc cggcgtgtta gaagcgcgtg
50+
541 gtcacaacgt taccgttatc gatccggtcg aaaaactgct tgcagtgggg cattacctcg
51+
601 aatctaccgt tgatattgct gagtccaccc gccgtattgc ggcaagccgc attccggctg
52+
661 accacatggt gctgatggct ggtttcactg ccggtaatga aaaaggcgag ctggtggttc
53+
721 tgggacgcaa cggttccgac tactccgctg cggtgctggc ggcctgttta cgcgccgatt
54+
781 gttgcgagat ctggacggat gttgacggtg tttatacctg cgatccgcgt caggtgcccg
55+
841 atgcgaggtt gttgaagtcg atgtcctatc aggaagcgat ggagctttct tacttcggcg
56+
901 ctaaagttct tcacccccgc accatcaccc ccatcgccca gtttcagatc ccttgcctga
57+
961 ttaaaaatac cggaaatcct caagctccag gtacgctcat tggtgccagc cgtgatgaag
58+
1021 acgaattacc ggtcaagggc atttccaatc tgaataacat ggcaatgttc agcgtttccg
59+
1081 gcccggggat gaaagggatg gttggcatgg cggcgcgcgt ctttgcagcg atgtcacgcg
60+
1141 cccgtatttc cgtggtgctg attacgcaat catcttccga atacagtatc agtttctgcg
61+
1201 ttccgcaaag cgactgtgtg cgagctgaac gggcaatgca ggaagagttc tacctggaac
62+
1261 tgaaagaagg cttactggag ccgttggcgg tgacggaacg gctggccatt atctcggtgg
63+
1321 taggtgatgg tatgcgcacc ttacgtggga tctcggcgaa attctttgcc gcgctggccc
64+
1381 gcgccaatat caacattgtc gccattgctc agggatcttc tgaacgctca atctctgtcg
65+
1441 tggtcaataa cgatgatgcg accactggcg tgcgcgttac tcatcagatg ctgttcaata
66+
1501 ccgatcaggt tatcgaagtg tttgtgattg gcgtcggtgg cgttggcggt gcgctgctgg
67+
1561 agcaactgaa gcgtcagcaa agctggttga agaataaaca tatcgactta cgtgtctgcg
68+
1621 gtgttgctaa ctcgaaggca ctgctcacca atgtacatgg ccttaatctg gaaaactggc
69+
1681 aggaagaact ggcgcaagcc aaagagccgt ttaatctcgg gcgcttaatt cgcctcgtga
70+
1741 aagaatatca tctgctgaac ccggtcattg ttgactgtac ttccagccag gcagtggcgg
71+
1801 atcaatatgc cgacttcctg cgcgaaggtt tccacgttgt tacgccgaac aaaaaggcca
72+
1861 acacctcgtc gatggattac taccatcagt tgcgttatgc ggcggaaaaa tcgcggcgta
73+
1921 aattcctcta tgacaccaac gttggggctg gattaccggt tatcgagaac ctgcaaaatc
74+
1981 tgctcaatgc tggtgatgaa ttgatgaagt tctccggcat tctttcaggt tcgctttctt
75+
2041 atatcttcgg caagttagac gaaggcatga gtttctccga ggcgaccaca ctggcgcggg
76+
2101 aaatgggtta taccgaaccg gacccgcgag atgatctttc tggtatggat gtggcgcgta
77+
2161 agctattgat tctcgctcgt gaaacgggac gtgaactgga gctggcggat attgaaattg
78+
2221 aacctgtgct gcccgcagag tttaacgccg agggtgatgt cgccgctttt atggcgaatc
79+
2281 tgtcacagct cgacgatctc tttgccgcgc gtgtggcgaa ggcccgtgat gaaggaaaag
80+
2341 ttttgcgcta tgttggcaat attgatgaag atggcgtctg ccgcgtgaag attgccgaag
81+
2401 tggatggtaa tgatccgctg ttcaaagtga aaaatggcga aaacgccctg gccttctata
82+
2461 gccactatta tcagccgctg ccgttggtac tgcgcggata tggtgcgggc aatgacgtta
83+
2521 cagctgccgg tgtctttgct gatctgctac gtaccctctc atggaagtta ggagtctgac
84+
2581 atggttaaag tttatgcccc ggcttccagt gccaatatga gcgtcgggtt tgatgtgctc
85+
2641 ggggcggcgg tgacacctgt tgatggtgca ttgctcggag atgtagtcac ggttgaggcg
86+
2701 gcagagacat tcagtctcaa caacctcgga cgctttgccg ataagctgcc gtcagagcca
87+
2761 cgggaaaata tcgtttatca gtgctgggag cgtttttgcc
88+
//

tests/test_convert.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,12 @@ def test_creation2(self):
172172
seq: extract(seq, @),
173173
description: desc})
174174
""")
175-
self.compare_files(Path.joinpath(self.output_path, 'ffn'), output_path)
175+
self.compare_files(Path.joinpath(self.output_path, 'ffn'), output_path)
176+
177+
def test_jpath_slice(self):
178+
"""
179+
Test slicing a SeqRecord
180+
"""
181+
output_path = Path(self.workdir.name, 'jpath_slice')
182+
convert(self.input_path, self.input_type, output_path, 'genbank', jpath='[[0][200:3000]]')
183+
self.compare_files(Path.joinpath(self.output_path, 'jpath_slice'), output_path)

0 commit comments

Comments
 (0)