Skip to content

Commit 56b7f59

Browse files
committed
updated from Atlas
1 parent 9db73c7 commit 56b7f59

File tree

4 files changed

+612
-0
lines changed

4 files changed

+612
-0
lines changed

attributes/hasattr.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import timeit
2+
3+
test_hasattr = """
4+
if hasattr(gizmo, 'gadget'):
5+
feature = gizmo.gadget
6+
else:
7+
feature = None
8+
"""
9+
10+
test_getattr = """
11+
feature = getattr(gizmo, 'gadget', None)
12+
"""
13+
14+
test_tryget = """
15+
try:
16+
feature = getattr(gizmo, 'gadget')
17+
except AttributeError:
18+
feature = None
19+
"""
20+
21+
22+
class Gizmo:
23+
def __init__(self):
24+
self.gadget = True
25+
26+
gizmo = Gizmo()
27+
28+
test_keys = 'hasattr', 'getattr', 'tryget'
29+
30+
def test():
31+
for test_key in test_keys:
32+
test_name = 'test_' + test_key
33+
test = globals()[test_name]
34+
setup = 'from __main__ import gizmo'
35+
t_yes = min(timeit.repeat(test, repeat=5, setup=setup))
36+
del gizmo.gadget
37+
t_not = min(timeit.repeat(test, repeat=5, setup=setup))
38+
gizmo.gadget = True
39+
print('|{:7} | {:.3} | {:.3}'.format(test_key, t_yes, t_not))
40+
41+
if __name__ == '__main__':
42+
test()
43+

support/isis2json/isis2json.py

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
#!/usr/bin/env python
2+
# -*- encoding: utf-8 -*-
3+
4+
# isis2json.py: convert ISIS and ISO-2709 files to JSON
5+
#
6+
# Copyright (C) 2010 BIREME/PAHO/WHO
7+
#
8+
# This program is free software: you can redistribute it and/or modify
9+
# it under the terms of the GNU Lesser General Public License as published
10+
# by the Free Software Foundation, either version 2.1 of the License, or
11+
# (at your option) any later version.
12+
13+
# This program is distributed in the hope that it will be useful,
14+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
# GNU Lesser General Public License for more details.
17+
18+
# You should have received a copy of the GNU Lesser General Public License
19+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
20+
21+
############################
22+
# BEGIN ISIS2JSON
23+
# this script works with Python or Jython (versions >=2.5 and <3)
24+
25+
import sys
26+
import argparse
27+
from uuid import uuid4
28+
import os
29+
30+
try:
31+
import json
32+
except ImportError:
33+
if os.name == 'java': # running Jython
34+
from com.xhaus.jyson import JysonCodec as json
35+
else:
36+
import simplejson as json
37+
38+
SKIP_INACTIVE = True
39+
DEFAULT_QTY = 2**31
40+
ISIS_MFN_KEY = 'mfn'
41+
ISIS_ACTIVE_KEY = 'active'
42+
SUBFIELD_DELIMITER = '^'
43+
INPUT_ENCODING = 'cp1252'
44+
45+
46+
def iter_iso_records(iso_file_name, isis_json_type):
47+
from iso2709 import IsoFile
48+
from subfield import expand
49+
50+
iso = IsoFile(iso_file_name)
51+
for record in iso:
52+
fields = {}
53+
for field in record.directory:
54+
field_key = str(int(field.tag)) # remove leading zeroes
55+
field_occurrences = fields.setdefault(field_key, [])
56+
content = field.value.decode(INPUT_ENCODING, 'replace')
57+
if isis_json_type == 1:
58+
field_occurrences.append(content)
59+
elif isis_json_type == 2:
60+
field_occurrences.append(expand(content))
61+
elif isis_json_type == 3:
62+
field_occurrences.append(dict(expand(content)))
63+
else:
64+
raise NotImplementedError('ISIS-JSON type %s conversion '
65+
'not yet implemented for .iso input' % isis_json_type)
66+
67+
yield fields
68+
iso.close()
69+
70+
71+
def iter_mst_records(master_file_name, isis_json_type):
72+
try:
73+
from bruma.master import MasterFactory, Record
74+
except ImportError:
75+
print('IMPORT ERROR: Jython 2.5 and Bruma.jar '
76+
'are required to read .mst files')
77+
raise SystemExit
78+
mst = MasterFactory.getInstance(master_file_name).open()
79+
for record in mst:
80+
fields = {}
81+
if SKIP_INACTIVE:
82+
if record.getStatus() != Record.Status.ACTIVE:
83+
continue
84+
else: # save status only there are non-active records
85+
fields[ISIS_ACTIVE_KEY] = (record.getStatus() ==
86+
Record.Status.ACTIVE)
87+
fields[ISIS_MFN_KEY] = record.getMfn()
88+
for field in record.getFields():
89+
field_key = str(field.getId())
90+
field_occurrences = fields.setdefault(field_key, [])
91+
if isis_json_type == 3:
92+
content = {}
93+
for subfield in field.getSubfields():
94+
subfield_key = subfield.getId()
95+
if subfield_key == '*':
96+
content['_'] = subfield.getContent()
97+
else:
98+
subfield_occurrences = content.setdefault(subfield_key, [])
99+
subfield_occurrences.append(subfield.getContent())
100+
field_occurrences.append(content)
101+
elif isis_json_type == 1:
102+
content = []
103+
for subfield in field.getSubfields():
104+
subfield_key = subfield.getId()
105+
if subfield_key == '*':
106+
content.insert(0, subfield.getContent())
107+
else:
108+
content.append(SUBFIELD_DELIMITER + subfield_key +
109+
subfield.getContent())
110+
field_occurrences.append(''.join(content))
111+
else:
112+
raise NotImplementedError('ISIS-JSON type %s conversion '
113+
'not yet implemented for .mst input' % isis_json_type)
114+
yield fields
115+
mst.close()
116+
117+
118+
def write_json(input_gen, file_name, output, qty, skip, id_tag,
119+
gen_uuid, mongo, mfn, isis_json_type, prefix, constant):
120+
start = skip
121+
end = start + qty
122+
if id_tag:
123+
id_tag = str(id_tag)
124+
ids = set()
125+
else:
126+
id_tag = ''
127+
for i, record in enumerate(input_gen):
128+
if i >= end:
129+
break
130+
if not mongo:
131+
if i == 0:
132+
output.write('[')
133+
elif i > start:
134+
output.write(',')
135+
if start <= i < end:
136+
if id_tag:
137+
occurrences = record.get(id_tag, None)
138+
if occurrences is None:
139+
msg = 'id tag #%s not found in record %s'
140+
if ISIS_MFN_KEY in record:
141+
msg = msg + (' (mfn=%s)' % record[ISIS_MFN_KEY])
142+
raise KeyError(msg % (id_tag, i))
143+
if len(occurrences) > 1:
144+
msg = 'multiple id tags #%s found in record %s'
145+
if ISIS_MFN_KEY in record:
146+
msg = msg + (' (mfn=%s)' % record[ISIS_MFN_KEY])
147+
raise TypeError(msg % (id_tag, i))
148+
else: # ok, we have one and only one id field
149+
if isis_json_type == 1:
150+
id = occurrences[0]
151+
elif isis_json_type == 2:
152+
id = occurrences[0][0][1]
153+
elif isis_json_type == 3:
154+
id = occurrences[0]['_']
155+
if id in ids:
156+
msg = 'duplicate id %s in tag #%s, record %s'
157+
if ISIS_MFN_KEY in record:
158+
msg = msg + (' (mfn=%s)' % record[ISIS_MFN_KEY])
159+
raise TypeError(msg % (id, id_tag, i))
160+
record['_id'] = id
161+
ids.add(id)
162+
elif gen_uuid:
163+
record['_id'] = unicode(uuid4())
164+
elif mfn:
165+
record['_id'] = record[ISIS_MFN_KEY]
166+
if prefix:
167+
# iterate over a fixed sequence of tags
168+
for tag in tuple(record):
169+
if str(tag).isdigit():
170+
record[prefix+tag] = record[tag]
171+
del record[tag] # this is why we iterate over a tuple
172+
# with the tags, and not directly on the record dict
173+
if constant:
174+
constant_key, constant_value = constant.split(':')
175+
record[constant_key] = constant_value
176+
output.write(json.dumps(record).encode('utf-8'))
177+
output.write('\n')
178+
if not mongo:
179+
output.write(']\n')
180+
181+
182+
def main():
183+
# create the parser
184+
parser = argparse.ArgumentParser(
185+
description='Convert an ISIS .mst or .iso file to a JSON array')
186+
187+
# add the arguments
188+
parser.add_argument(
189+
'file_name', metavar='INPUT.(mst|iso)',
190+
help='.mst or .iso file to read')
191+
parser.add_argument(
192+
'-o', '--out', type=argparse.FileType('w'), default=sys.stdout,
193+
metavar='OUTPUT.json',
194+
help='the file where the JSON output should be written'
195+
' (default: write to stdout)')
196+
parser.add_argument(
197+
'-c', '--couch', action='store_true',
198+
help='output array within a "docs" item in a JSON document'
199+
' for bulk insert to CouchDB via POST to db/_bulk_docs')
200+
parser.add_argument(
201+
'-m', '--mongo', action='store_true',
202+
help='output individual records as separate JSON dictionaries,'
203+
' one per line for bulk insert to MongoDB via mongoimport utility')
204+
parser.add_argument(
205+
'-t', '--type', type=int, metavar='ISIS_JSON_TYPE', default=1,
206+
help='ISIS-JSON type, sets field structure: 1=string, 2=alist, 3=dict (default=1)')
207+
parser.add_argument(
208+
'-q', '--qty', type=int, default=DEFAULT_QTY,
209+
help='maximum quantity of records to read (default=ALL)')
210+
parser.add_argument(
211+
'-s', '--skip', type=int, default=0,
212+
help='records to skip from start of .mst (default=0)')
213+
parser.add_argument(
214+
'-i', '--id', type=int, metavar='TAG_NUMBER', default=0,
215+
help='generate an "_id" from the given unique TAG field number'
216+
' for each record')
217+
parser.add_argument(
218+
'-u', '--uuid', action='store_true',
219+
help='generate an "_id" with a random UUID for each record')
220+
parser.add_argument(
221+
'-p', '--prefix', type=str, metavar='PREFIX', default='',
222+
help='concatenate prefix to every numeric field tag (ex. 99 becomes "v99")')
223+
parser.add_argument(
224+
'-n', '--mfn', action='store_true',
225+
help='generate an "_id" from the MFN of each record'
226+
' (available only for .mst input)')
227+
parser.add_argument(
228+
'-k', '--constant', type=str, metavar='TAG:VALUE', default='',
229+
help='Include a constant tag:value in every record (ex. -k type:AS)')
230+
231+
'''
232+
# TODO: implement this to export large quantities of records to CouchDB
233+
parser.add_argument(
234+
'-r', '--repeat', type=int, default=1,
235+
help='repeat operation, saving multiple JSON files'
236+
' (default=1, use -r 0 to repeat until end of input)')
237+
'''
238+
# parse the command line
239+
args = parser.parse_args()
240+
if args.file_name.lower().endswith('.mst'):
241+
input_gen_func = iter_mst_records
242+
else:
243+
if args.mfn:
244+
print('UNSUPORTED: -n/--mfn option only available for .mst input.')
245+
raise SystemExit
246+
input_gen_func = iter_iso_records
247+
input_gen = input_gen_func(args.file_name, args.type)
248+
if args.couch:
249+
args.out.write('{ "docs" : ')
250+
write_json(input_gen, args.file_name, args.out, args.qty, args.skip,
251+
args.id, args.uuid, args.mongo, args.mfn, args.type,
252+
args.prefix, args.constant)
253+
if args.couch:
254+
args.out.write('}\n')
255+
args.out.close()
256+
257+
258+
if __name__ == '__main__':
259+
main()
260+
# END ISIS2JSON

0 commit comments

Comments
 (0)