Skip to content

Commit 08103b0

Browse files
committed
Use main function
1 parent e5bc705 commit 08103b0

File tree

1 file changed

+165
-159
lines changed

1 file changed

+165
-159
lines changed

getBitstreams.py

Lines changed: 165 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -1,187 +1,193 @@
1-
# NOTE: this is the secrets file, not a module
2-
import secrets
3-
41
import json
52
import requests
63
import time
74
import csv
85
import urllib3
96
import argparse
107

11-
# TODO: use main() to remove need to define defaults up here
12-
response_timeout = 1
13-
limit = 100
148

15-
# begin: argument parsing
16-
parser = argparse.ArgumentParser()
9+
def main():
10+
# NOTE: this is the secrets file, not a module
11+
import secrets
1712

18-
parser.add_argument('-v', '--verbose', action='store_true',
19-
help='increase output verbosity')
13+
# define defaults
14+
response_timeout = 1
15+
limit = 100
2016

21-
parser.add_argument('-i', '--handle',
22-
help='handle of the object to retreive. optional - if not provided, the script will ask for input')
17+
# begin: argument parsing
18+
parser = argparse.ArgumentParser()
2319

24-
# bitstream formats:
25-
# REM: set number of args
26-
# '+' == 1 or more.
27-
# '*' == 0 or more.
28-
# '?' == 0 or 1.
29-
# An int is an explicit number of arguments to accept.
30-
parser.add_argument('-f', '--formats', nargs='*',
31-
help='optional list of bitstream formats. will return all formats if not provided')
20+
parser.add_argument('-v', '--verbose', action='store_true',
21+
help='increase output verbosity')
3222

33-
parser.add_argument('-dl', '--download', action='store_true',
34-
help='download bitstreams (rather than just retreive metadata about them). default: false')
23+
parser.add_argument('-i', '--handle',
24+
help='handle of the object to retreive. optional - if not provided, the script will ask for input')
3525

36-
parser.add_argument('-rt', '--rtimeout', type=int,
37-
help='response timeout - number of seconds to wait for a response. not a timeout for a download or run of the entire script. default: ' + str(response_timeout))
26+
# bitstream formats:
27+
# REM: set number of args
28+
# '+' == 1 or more.
29+
# '*' == 0 or more.
30+
# '?' == 0 or 1.
31+
# An int is an explicit number of arguments to accept.
32+
parser.add_argument('-f', '--formats', nargs='*',
33+
help='optional list of bitstream formats. will return all formats if not provided')
3834

39-
parser.add_argument('-l', '--limit', type=int,
40-
help='limit to the number of objects to return in a given request. default = ' + str(limit))
35+
parser.add_argument('-dl', '--download', action='store_true',
36+
help='download bitstreams (rather than just retreive metadata about them). default: false')
4137

42-
args = parser.parse_args()
38+
parser.add_argument('-rt', '--rtimeout', type=int,
39+
help='response timeout - number of seconds to wait for a response. not a timeout for a download or run of the entire script. default: ' + str(response_timeout))
4340

44-
if args.rtimeout:
45-
response_timeout = args.rtimeout
41+
parser.add_argument('-l', '--limit', type=int,
42+
help='limit to the number of objects to return in a given request. default: ' + str(limit))
4643

47-
if args.limit:
48-
limit = args.limit
44+
args = parser.parse_args()
4945

50-
if args.verbose:
51-
print('verbosity turned on')
46+
if args.rtimeout:
47+
response_timeout = args.rtimeout
5248

53-
if args.handle:
54-
print('retreiving object with handle {}').format(args.handle)
49+
if args.limit:
50+
limit = args.limit
5551

56-
if args.formats:
57-
print('filtering results to the following bitstream formats: {}').format(args.formats)
58-
else:
59-
print('returning bitstreams of any format')
52+
if args.verbose:
53+
print('verbosity turned on')
6054

61-
if args.download:
62-
print('downloading bitstreams')
55+
if args.handle:
56+
print('retreiving object with handle {}').format(args.handle)
6357

64-
if args.rtimeout:
65-
print('response_timeout set to {}').format(response_timeout)
58+
if args.formats:
59+
print('filtering results to the following bitstream formats: {}').format(args.formats)
60+
else:
61+
print('returning bitstreams of any format')
6662

67-
# end: argument parsing
63+
if args.download:
64+
print('downloading bitstreams')
6865

69-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
66+
if args.rtimeout:
67+
print('response_timeout set to {}').format(response_timeout)
7068

71-
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
72-
if secretsVersion != '':
73-
try:
74-
secrets = __import__(secretsVersion)
75-
print('Accessing Production')
76-
except ImportError:
69+
# end: argument parsing
70+
71+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
72+
73+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
74+
if secretsVersion != '':
75+
try:
76+
secrets = __import__(secretsVersion)
77+
print('Accessing Production')
78+
except ImportError:
79+
print('Accessing Stage')
80+
else:
7781
print('Accessing Stage')
78-
else:
79-
print('Accessing Stage')
80-
81-
baseURL = secrets.baseURL
82-
email = secrets.email
83-
password = secrets.password
84-
filePath = secrets.filePath
85-
verify = secrets.verify
86-
87-
if args.handle:
88-
handle = args.handle
89-
else:
90-
handle = raw_input('Enter handle: ')
91-
92-
startTime = time.time()
93-
data = {'email': email, 'password': password}
94-
header = {'content-type': 'application/json', 'accept': 'application/json'}
95-
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data, timeout=response_timeout).cookies['JSESSIONID']
96-
cookies = {'JSESSIONID': session}
97-
headerFileUpload = {'accept': 'application/json'}
98-
cookiesFileUpload = cookies
99-
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json()
100-
userFullName = status['fullname']
101-
print 'authenticated'
102-
103-
# NOTE: expanding bitstreams to get the count, in case this is an item
104-
endpoint = baseURL+'/rest/handle/'+handle+'?expand=bitstreams'
105-
dsObject = requests.get(endpoint, headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json()
106-
if args.verbose: print dsObject
107-
dsObjectID = dsObject['uuid']
108-
# TODO: extend
109-
if dsObject['type'] == 'collection':
110-
if args.verbose: print dsObject['type']
111-
112-
itemList = []
113-
offset = 0
114-
items = ''
115-
while items != []:
116-
items = requests.get(baseURL+'/rest/collections/'+str(dsObjectID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
117-
while items.status_code != 200:
118-
time.sleep(5)
82+
83+
baseURL = secrets.baseURL
84+
email = secrets.email
85+
password = secrets.password
86+
filePath = secrets.filePath
87+
verify = secrets.verify
88+
89+
if args.handle:
90+
handle = args.handle
91+
else:
92+
handle = raw_input('Enter handle: ')
93+
94+
startTime = time.time()
95+
data = {'email': email, 'password': password}
96+
header = {'content-type': 'application/json', 'accept': 'application/json'}
97+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data, timeout=response_timeout).cookies['JSESSIONID']
98+
cookies = {'JSESSIONID': session}
99+
headerFileUpload = {'accept': 'application/json'}
100+
cookiesFileUpload = cookies
101+
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json()
102+
userFullName = status['fullname']
103+
print 'authenticated'
104+
105+
# NOTE: expanding bitstreams to get the count, in case this is an item
106+
endpoint = baseURL+'/rest/handle/'+handle+'?expand=bitstreams'
107+
dsObject = requests.get(endpoint, headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json()
108+
if args.verbose: print dsObject
109+
dsObjectID = dsObject['uuid']
110+
# TODO: extend
111+
if dsObject['type'] == 'collection':
112+
if args.verbose: print dsObject['type']
113+
114+
itemList = []
115+
offset = 0
116+
items = ''
117+
while items != []:
119118
items = requests.get(baseURL+'/rest/collections/'+str(dsObjectID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
120-
items = items.json()
121-
for k in range(0, len(items)):
122-
itemID = items[k]['uuid']
123-
itemID = '/rest/items/'+itemID
124-
itemHandle = items[k]['handle']
125-
itemList.append(itemID)
126-
offset = offset + 200
127-
128-
f = csv.writer(open(filePath+'handlesAndBitstreams.csv', 'wb'))
129-
f.writerow(['bitstream']+['handle'])
130-
131-
for item in itemList:
132-
bitstreams = requests.get(baseURL+itemID+'/bitstreams', headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json()
133-
for bitstream in bitstreams:
134-
fileName = bitstream['name']
135-
fileName.replace('.pdf', '')
136-
f.writerow([fileName]+[itemHandle])
137-
138-
elif dsObject['type'] == 'item':
139-
if args.verbose: print(dsObject['type'])
140-
141-
itemHandle = dsObject['handle']
142-
143-
f = csv.writer(open(filePath+itemHandle.replace('/', '-')+'_bitstreams.csv', 'wb'))
144-
f.writerow(['sequenceId']+['name']+['format']+['bundleName'])
145-
146-
bitstreamCount = len(dsObject['bitstreams'])
147-
dlBitstreams = []
148-
offset = 0
149-
bitstreams = ''
150-
while bitstreams != []:
151-
# don't retreive more bitstreams than we have left
152-
if limit > bitstreamCount:
153-
limit = bitstreamCount
154-
print('bitstreamCount: {0} offset: {1} limit: {2}').format(bitstreamCount, offset, limit)
155-
bitstreams = requests.get(baseURL+'/rest/items/' + str(dsObjectID) + '/bitstreams?limit=' + str(limit) + '&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
156-
bitstreams.raise_for_status() # ensure we notice bad responses
157-
bitstreams = bitstreams.json()
158-
for bitstream in bitstreams:
159-
if args.formats and bitstream['format'] in args.formats or not args.formats:
160-
if args.verbose: print(bitstream)
161-
sequenceId = str(bitstream['sequenceId'])
119+
while items.status_code != 200:
120+
time.sleep(5)
121+
items = requests.get(baseURL+'/rest/collections/'+str(dsObjectID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
122+
items = items.json()
123+
for k in range(0, len(items)):
124+
itemID = items[k]['uuid']
125+
itemID = '/rest/items/'+itemID
126+
itemHandle = items[k]['handle']
127+
itemList.append(itemID)
128+
offset = offset + 200
129+
130+
f = csv.writer(open(filePath+'handlesAndBitstreams.csv', 'wb'))
131+
f.writerow(['bitstream']+['handle'])
132+
133+
for item in itemList:
134+
bitstreams = requests.get(baseURL+itemID+'/bitstreams', headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json()
135+
for bitstream in bitstreams:
162136
fileName = bitstream['name']
163-
fileFormat = bitstream['format']
164-
bundleName = bitstream['bundleName']
165-
f.writerow([sequenceId]+[fileName]+[fileFormat]+[bundleName])
166-
167-
if args.download:
168-
dlBitstreams.append(bitstream)
169-
offset += limit
170-
bitstreamCount -= limit
171-
172-
for dlBitstream in dlBitstreams:
173-
response = requests.get(baseURL + str(dlBitstream['retrieveLink']), headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
174-
response.raise_for_status() # ensure we notice bad responses
175-
file = open(filePath + dlBitstream['name'], 'wb')
176-
file.write(response.content)
177-
file.close()
178-
else:
179-
print('object is of an invalid type for this script ({}). please enter the handle of an item or a collection.').format(dsObject['type'])
180-
181-
182-
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
183-
184-
elapsedTime = time.time() - startTime
185-
m, s = divmod(elapsedTime, 60)
186-
h, m = divmod(m, 60)
187-
print('Total script run time: {:01.0f}:{:02.0f}:{:02.0f}').format(h, m, s)
137+
fileName.replace('.pdf', '')
138+
f.writerow([fileName]+[itemHandle])
139+
140+
elif dsObject['type'] == 'item':
141+
if args.verbose: print(dsObject['type'])
142+
143+
itemHandle = dsObject['handle']
144+
145+
f = csv.writer(open(filePath+itemHandle.replace('/', '-')+'_bitstreams.csv', 'wb'))
146+
f.writerow(['sequenceId']+['name']+['format']+['bundleName'])
147+
148+
bitstreamCount = len(dsObject['bitstreams'])
149+
dlBitstreams = []
150+
offset = 0
151+
bitstreams = ''
152+
# while bitstreams != []:
153+
while bitstreamCount > 0:
154+
# don't retreive more bitstreams than we have left
155+
if limit > bitstreamCount:
156+
limit = bitstreamCount
157+
print('bitstreamCount: {0} offset: {1} limit: {2}').format(bitstreamCount, offset, limit)
158+
bitstreams = requests.get(baseURL+'/rest/items/' + str(dsObjectID) + '/bitstreams?limit=' + str(limit) + '&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
159+
bitstreams.raise_for_status() # ensure we notice bad responses
160+
bitstreams = bitstreams.json()
161+
for bitstream in bitstreams:
162+
if args.formats and bitstream['format'] in args.formats or not args.formats:
163+
if args.verbose: print(bitstream)
164+
sequenceId = str(bitstream['sequenceId'])
165+
fileName = bitstream['name']
166+
fileFormat = bitstream['format']
167+
bundleName = bitstream['bundleName']
168+
f.writerow([sequenceId]+[fileName]+[fileFormat]+[bundleName])
169+
170+
if args.download:
171+
dlBitstreams.append(bitstream)
172+
offset += limit
173+
bitstreamCount -= limit
174+
175+
for dlBitstream in dlBitstreams:
176+
response = requests.get(baseURL + str(dlBitstream['retrieveLink']), headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
177+
response.raise_for_status() # ensure we notice bad responses
178+
file = open(filePath + dlBitstream['name'], 'wb')
179+
file.write(response.content)
180+
file.close()
181+
else:
182+
print('object is of an invalid type for this script ({}). please enter the handle of an item or a collection.').format(dsObject['type'])
183+
184+
185+
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify, timeout=response_timeout)
186+
187+
elapsedTime = time.time() - startTime
188+
m, s = divmod(elapsedTime, 60)
189+
h, m = divmod(m, 60)
190+
print('Total script run time: {:01.0f}:{:02.0f}:{:02.0f}').format(h, m, s)
191+
192+
193+
if __name__ == "__main__": main()

0 commit comments

Comments
 (0)