Skip to content

Commit 8c09657

Browse files
committed
2 parents 03b6baf + 76a2c38 commit 8c09657

File tree

2 files changed

+69
-16
lines changed

2 files changed

+69
-16
lines changed

checkInventory.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import argparse
2+
import pandas as pd
3+
import os
4+
5+
6+
def main():
7+
# begin: argument parsing
8+
parser = argparse.ArgumentParser()
9+
10+
parser.add_argument('-i', '--inventory', required=True,
11+
help='csv file containing the inventory. the path, if given, can be absolute or relative to this script')
12+
13+
parser.add_argument('-d', '--dataDir',
14+
help='directory containing the data. if omitted, data will be read from the directory containing the inventory file')
15+
16+
parser.add_argument('-f', '--field',
17+
help='field in the csv containing the filenames. default: name')
18+
19+
parser.add_argument('-v', '--verbose', action='store_true',
20+
help='increase output verbosity')
21+
22+
args = parser.parse_args()
23+
24+
if not args.dataDir:
25+
(args.dataDir, null) = os.path.split(args.inventory)
26+
27+
if not args.field:
28+
args.field = 'name'
29+
30+
if args.verbose:
31+
print('verbosity turned on')
32+
print('reading inventory from {}'.format(args.inventory))
33+
print('filenames read from field named {}'.format(args.field))
34+
print('searching for files in {}'.format(args.dataDir))
35+
# end: argument parsing
36+
37+
inventory = pd.read_csv(args.inventory, usecols=[args.field])
38+
filenames = inventory[args.field]
39+
foundfiles = 0
40+
missingfiles = 0
41+
for filename in filenames:
42+
if os.path.isfile(args.dataDir + '/' + filename):
43+
if args.verbose: print('{} is not missing'.format(filename))
44+
foundfiles += 1
45+
else:
46+
print('{} is missing'.format(filename))
47+
missingfiles += 1
48+
49+
print('{} files found and {} files missing'.format(foundfiles, missingfiles))
50+
51+
52+
if __name__ == "__main__": main()

getBitstreams.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import argparse
77
import os
88
import re
9+
from six.moves import input
910

1011

1112
def main():
@@ -67,7 +68,7 @@ def main():
6768

6869
args = parser.parse_args()
6970

70-
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
71+
secretsVersion = input('To edit production server, enter the name of the secrets file: ')
7172
if secretsVersion != '':
7273
try:
7374
secrets = __import__(secretsVersion)
@@ -101,29 +102,29 @@ def main():
101102
if args.handle:
102103
handle = args.handle
103104
else:
104-
handle = raw_input('Enter handle: ')
105+
handle = input('Enter handle: ')
105106

106107
if args.verbose:
107108
print('verbosity turned on')
108109

109110
if args.handle:
110-
print('retreiving object with handle {}').format(args.handle)
111+
print('retreiving object with handle {}'.format(args.handle))
111112

112113
if args.formats:
113-
print('filtering results to the following bitstream formats: {}').format(args.formats)
114+
print('filtering results to the following bitstream formats: {}'.format(args.formats))
114115
else:
115116
print('returning bitstreams of any format')
116117

117118
if args.bundles:
118-
print('filtering results to the following bundles: {}').format(args.bundles)
119+
print('filtering results to the following bundles: {}'.format(args.bundles))
119120
else:
120121
print('returning bitstreams from any bundle')
121122

122123
if args.download:
123124
print('downloading bitstreams')
124125

125126
if args.rtimeout:
126-
print('response_timeout set to {}').format(args.rtimeout)
127+
print('response_timeout set to {}'.format(args.rtimeout))
127128

128129
# end: argument parsing
129130

@@ -134,21 +135,21 @@ def main():
134135
header = {'content-type': 'application/json', 'accept': 'application/json'}
135136
session = requests.post(args.baseURL+'/rest/login', headers=header, verify=args.verify, params=data, timeout=args.rtimeout).cookies['JSESSIONID']
136137
cookies = {'JSESSIONID': session}
137-
print 'authenticated'
138+
print('authenticated')
138139

139140
# NOTE: expanding items (of collections) and bitstreams (of items) to get the count
140141
endpoint = args.baseURL+'/rest/handle/'+handle+'?expand=items,bitstreams'
141142
dsObject = requests.get(endpoint, headers=header, cookies=cookies, verify=args.verify, timeout=args.rtimeout)
142143
dsObject.raise_for_status() # ensure we notice bad responses
143144
dsObject = dsObject.json()
144-
if args.verbose: print dsObject
145+
if args.verbose: print(dsObject)
145146
dsObjectID = dsObject['uuid']
146147
# TODO: extend
147148
if dsObject['type'] == 'collection':
148-
if args.verbose: print dsObject['type']
149+
if args.verbose: print(dsObject['type'])
149150

150151
itemCount = len(dsObject['items'])
151-
print('{} items').format(itemCount)
152+
print('{} items'.format(itemCount))
152153
for collItem in dsObject['items']:
153154
endpoint = args.baseURL + collItem['link'] + '?expand=bitstreams'
154155
item = requests.get(endpoint, headers=header, cookies=cookies, verify=args.verify, timeout=args.rtimeout)
@@ -160,14 +161,14 @@ def main():
160161
processItem(dsObject, args)
161162

162163
else:
163-
print('object is of an invalid type for this script ({}). please enter the handle of an item or a collection.').format(dsObject['type'])
164+
print('object is of an invalid type for this script ({}). please enter the handle of an item or a collection.'.format(dsObject['type']))
164165

165166
logout = requests.post(args.baseURL+'/rest/logout', headers=header, cookies=cookies, verify=args.verify, timeout=args.rtimeout)
166167

167168
elapsedTime = time.time() - startTime
168169
m, s = divmod(elapsedTime, 60)
169170
h, m = divmod(m, 60)
170-
print('Total script run time: {:01.0f}:{:02.0f}:{:02.0f}').format(h, m, s)
171+
print('Total script run time: {:01.0f}:{:02.0f}:{:02.0f}'.format(h, m, s))
171172

172173

173174
def processItem(dsObject, args):
@@ -179,7 +180,7 @@ def processItem(dsObject, args):
179180
if not os.path.exists(itemPath):
180181
os.makedirs(itemPath)
181182

182-
f = csv.writer(open(itemPath + handleID + '_bitstreams.csv', 'wb'))
183+
f = csv.writer(open(itemPath + handleID + '_bitstreams.csv', 'w'))
183184
f.writerow(['sequenceId']+['name']+['format']+['bundleName'])
184185

185186
itemID = dsObject['uuid']
@@ -193,13 +194,13 @@ def processItem(dsObject, args):
193194
# don't retreive more bitstreams than we have left
194195
if limit > bitstreamCount:
195196
limit = bitstreamCount
196-
print('bitstreamCount: {0} offset: {1} limit: {2}').format(bitstreamCount, offset, limit)
197+
print('bitstreamCount: {0} offset: {1} limit: {2}'.format(bitstreamCount, offset, limit))
197198
bitstreams = requests.get(args.baseURL+'/rest/items/' + str(itemID) + '/bitstreams?limit=' + str(limit) + '&offset='+str(offset), headers=header, cookies=cookies, verify=args.verify, timeout=args.rtimeout)
198199
bitstreams.raise_for_status() # ensure we notice bad responses
199200
bitstreams = bitstreams.json()
200201
for bitstream in bitstreams:
201-
if (args.formats and bitstream['format'] in args.formats or not args.formats
202-
and args.bundles and bitstream['bundleName'] in args.bundles or not args.bundles):
202+
if ((args.formats and bitstream['format'] in args.formats or not args.formats)
203+
and (args.bundles and bitstream['bundleName'] in args.bundles or not args.bundles)):
203204
if args.verbose: print(bitstream)
204205
sequenceId = str(bitstream['sequenceId'])
205206
fileName = bitstream['name']

0 commit comments

Comments
 (0)