|
1 | | -# NOTE: this is the secrets file, not a module |
2 | | -import secrets |
3 | | - |
4 | 1 | import json |
5 | 2 | import requests |
6 | 3 | import time |
7 | 4 | import csv |
8 | 5 | import urllib3 |
9 | 6 | import argparse |
10 | 7 |
|
11 | | -# TODO: use main() to remove need to define defaults up here |
12 | | -response_timeout = 1 |
13 | | -limit = 100 |
14 | 8 |
|
15 | | -# begin: argument parsing |
16 | | -parser = argparse.ArgumentParser() |
| 9 | +def main(): |
| 10 | + # NOTE: this is the secrets file, not a module |
| 11 | + import secrets |
17 | 12 |
|
18 | | -parser.add_argument('-v', '--verbose', action='store_true', |
19 | | - help='increase output verbosity') |
| 13 | + # define defaults |
| 14 | + response_timeout = 1 |
| 15 | + limit = 100 |
20 | 16 |
|
21 | | -parser.add_argument('-i', '--handle', |
22 | | - help='handle of the object to retreive. optional - if not provided, the script will ask for input') |
| 17 | + # begin: argument parsing |
| 18 | + parser = argparse.ArgumentParser() |
23 | 19 |
|
24 | | -# bitstream formats: |
25 | | -# REM: set number of args |
26 | | -# '+' == 1 or more. |
27 | | -# '*' == 0 or more. |
28 | | -# '?' == 0 or 1. |
29 | | -# An int is an explicit number of arguments to accept. |
30 | | -parser.add_argument('-f', '--formats', nargs='*', |
31 | | - help='optional list of bitstream formats. will return all formats if not provided') |
| 20 | + parser.add_argument('-v', '--verbose', action='store_true', |
| 21 | + help='increase output verbosity') |
32 | 22 |
|
33 | | -parser.add_argument('-dl', '--download', action='store_true', |
34 | | - help='download bitstreams (rather than just retreive metadata about them). default: false') |
| 23 | + parser.add_argument('-i', '--handle', |
| 24 | + help='handle of the object to retreive. optional - if not provided, the script will ask for input') |
35 | 25 |
|
36 | | -parser.add_argument('-rt', '--rtimeout', type=int, |
37 | | - help='response timeout - number of seconds to wait for a response. not a timeout for a download or run of the entire script. default: ' + str(response_timeout)) |
| 26 | + # bitstream formats: |
| 27 | + # REM: set number of args |
| 28 | + # '+' == 1 or more. |
| 29 | + # '*' == 0 or more. |
| 30 | + # '?' == 0 or 1. |
| 31 | + # An int is an explicit number of arguments to accept. |
| 32 | + parser.add_argument('-f', '--formats', nargs='*', |
| 33 | + help='optional list of bitstream formats. will return all formats if not provided') |
38 | 34 |
|
39 | | -parser.add_argument('-l', '--limit', type=int, |
40 | | - help='limit to the number of objects to return in a given request. default = ' + str(limit)) |
| 35 | + parser.add_argument('-dl', '--download', action='store_true', |
| 36 | + help='download bitstreams (rather than just retreive metadata about them). default: false') |
41 | 37 |
|
42 | | -args = parser.parse_args() |
| 38 | + parser.add_argument('-rt', '--rtimeout', type=int, |
| 39 | + help='response timeout - number of seconds to wait for a response. not a timeout for a download or run of the entire script. default: ' + str(response_timeout)) |
43 | 40 |
|
44 | | -if args.rtimeout: |
45 | | - response_timeout = args.rtimeout |
| 41 | + parser.add_argument('-l', '--limit', type=int, |
| 42 | + help='limit to the number of objects to return in a given request. default: ' + str(limit)) |
46 | 43 |
|
47 | | -if args.limit: |
48 | | - limit = args.limit |
| 44 | + args = parser.parse_args() |
49 | 45 |
|
50 | | -if args.verbose: |
51 | | - print('verbosity turned on') |
| 46 | + if args.rtimeout: |
| 47 | + response_timeout = args.rtimeout |
52 | 48 |
|
53 | | - if args.handle: |
54 | | - print('retreiving object with handle {}').format(args.handle) |
| 49 | + if args.limit: |
| 50 | + limit = args.limit |
55 | 51 |
|
56 | | - if args.formats: |
57 | | - print('filtering results to the following bitstream formats: {}').format(args.formats) |
58 | | - else: |
59 | | - print('returning bitstreams of any format') |
| 52 | + if args.verbose: |
| 53 | + print('verbosity turned on') |
60 | 54 |
|
61 | | - if args.download: |
62 | | - print('downloading bitstreams') |
| 55 | + if args.handle: |
| 56 | + print('retreiving object with handle {}').format(args.handle) |
63 | 57 |
|
64 | | - if args.rtimeout: |
65 | | - print('response_timeout set to {}').format(response_timeout) |
| 58 | + if args.formats: |
| 59 | + print('filtering results to the following bitstream formats: {}').format(args.formats) |
| 60 | + else: |
| 61 | + print('returning bitstreams of any format') |
66 | 62 |
|
67 | | -# end: argument parsing |
| 63 | + if args.download: |
| 64 | + print('downloading bitstreams') |
68 | 65 |
|
69 | | -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
| 66 | + if args.rtimeout: |
| 67 | + print('response_timeout set to {}').format(response_timeout) |
70 | 68 |
|
71 | | -secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ') |
72 | | -if secretsVersion != '': |
73 | | - try: |
74 | | - secrets = __import__(secretsVersion) |
75 | | - print('Accessing Production') |
76 | | - except ImportError: |
| 69 | + # end: argument parsing |
| 70 | + |
| 71 | + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
| 72 | + |
| 73 | + secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ') |
| 74 | + if secretsVersion != '': |
| 75 | + try: |
| 76 | + secrets = __import__(secretsVersion) |
| 77 | + print('Accessing Production') |
| 78 | + except ImportError: |
| 79 | + print('Accessing Stage') |
| 80 | + else: |
77 | 81 | print('Accessing Stage') |
78 | | -else: |
79 | | - print('Accessing Stage') |
80 | | - |
81 | | -baseURL = secrets.baseURL |
82 | | -email = secrets.email |
83 | | -password = secrets.password |
84 | | -filePath = secrets.filePath |
85 | | -verify = secrets.verify |
86 | | - |
87 | | -if args.handle: |
88 | | - handle = args.handle |
89 | | -else: |
90 | | - handle = raw_input('Enter handle: ') |
91 | | - |
92 | | -startTime = time.time() |
93 | | -data = {'email': email, 'password': password} |
94 | | -header = {'content-type': 'application/json', 'accept': 'application/json'} |
95 | | -session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data, timeout=response_timeout).cookies['JSESSIONID'] |
96 | | -cookies = {'JSESSIONID': session} |
97 | | -headerFileUpload = {'accept': 'application/json'} |
98 | | -cookiesFileUpload = cookies |
99 | | -status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json() |
100 | | -userFullName = status['fullname'] |
101 | | -print 'authenticated' |
102 | | - |
103 | | -# NOTE: expanding bitstreams to get the count, in case this is an item |
104 | | -endpoint = baseURL+'/rest/handle/'+handle+'?expand=bitstreams' |
105 | | -dsObject = requests.get(endpoint, headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json() |
106 | | -if args.verbose: print dsObject |
107 | | -dsObjectID = dsObject['uuid'] |
108 | | -# TODO: extend |
109 | | -if dsObject['type'] == 'collection': |
110 | | - if args.verbose: print dsObject['type'] |
111 | | - |
112 | | - itemList = [] |
113 | | - offset = 0 |
114 | | - items = '' |
115 | | - while items != []: |
116 | | - items = requests.get(baseURL+'/rest/collections/'+str(dsObjectID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
117 | | - while items.status_code != 200: |
118 | | - time.sleep(5) |
| 82 | + |
| 83 | + baseURL = secrets.baseURL |
| 84 | + email = secrets.email |
| 85 | + password = secrets.password |
| 86 | + filePath = secrets.filePath |
| 87 | + verify = secrets.verify |
| 88 | + |
| 89 | + if args.handle: |
| 90 | + handle = args.handle |
| 91 | + else: |
| 92 | + handle = raw_input('Enter handle: ') |
| 93 | + |
| 94 | + startTime = time.time() |
| 95 | + data = {'email': email, 'password': password} |
| 96 | + header = {'content-type': 'application/json', 'accept': 'application/json'} |
| 97 | + session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data, timeout=response_timeout).cookies['JSESSIONID'] |
| 98 | + cookies = {'JSESSIONID': session} |
| 99 | + headerFileUpload = {'accept': 'application/json'} |
| 100 | + cookiesFileUpload = cookies |
| 101 | + status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json() |
| 102 | + userFullName = status['fullname'] |
| 103 | + print 'authenticated' |
| 104 | + |
| 105 | + # NOTE: expanding bitstreams to get the count, in case this is an item |
| 106 | + endpoint = baseURL+'/rest/handle/'+handle+'?expand=bitstreams' |
| 107 | + dsObject = requests.get(endpoint, headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json() |
| 108 | + if args.verbose: print dsObject |
| 109 | + dsObjectID = dsObject['uuid'] |
| 110 | + # TODO: extend |
| 111 | + if dsObject['type'] == 'collection': |
| 112 | + if args.verbose: print dsObject['type'] |
| 113 | + |
| 114 | + itemList = [] |
| 115 | + offset = 0 |
| 116 | + items = '' |
| 117 | + while items != []: |
119 | 118 | items = requests.get(baseURL+'/rest/collections/'+str(dsObjectID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
120 | | - items = items.json() |
121 | | - for k in range(0, len(items)): |
122 | | - itemID = items[k]['uuid'] |
123 | | - itemID = '/rest/items/'+itemID |
124 | | - itemHandle = items[k]['handle'] |
125 | | - itemList.append(itemID) |
126 | | - offset = offset + 200 |
127 | | - |
128 | | - f = csv.writer(open(filePath+'handlesAndBitstreams.csv', 'wb')) |
129 | | - f.writerow(['bitstream']+['handle']) |
130 | | - |
131 | | - for item in itemList: |
132 | | - bitstreams = requests.get(baseURL+itemID+'/bitstreams', headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json() |
133 | | - for bitstream in bitstreams: |
134 | | - fileName = bitstream['name'] |
135 | | - fileName.replace('.pdf', '') |
136 | | - f.writerow([fileName]+[itemHandle]) |
137 | | - |
138 | | -elif dsObject['type'] == 'item': |
139 | | - if args.verbose: print(dsObject['type']) |
140 | | - |
141 | | - itemHandle = dsObject['handle'] |
142 | | - |
143 | | - f = csv.writer(open(filePath+itemHandle.replace('/', '-')+'_bitstreams.csv', 'wb')) |
144 | | - f.writerow(['sequenceId']+['name']+['format']+['bundleName']) |
145 | | - |
146 | | - bitstreamCount = len(dsObject['bitstreams']) |
147 | | - dlBitstreams = [] |
148 | | - offset = 0 |
149 | | - bitstreams = '' |
150 | | - while bitstreams != []: |
151 | | - # don't retreive more bitstreams than we have left |
152 | | - if limit > bitstreamCount: |
153 | | - limit = bitstreamCount |
154 | | - print('bitstreamCount: {0} offset: {1} limit: {2}').format(bitstreamCount, offset, limit) |
155 | | - bitstreams = requests.get(baseURL+'/rest/items/' + str(dsObjectID) + '/bitstreams?limit=' + str(limit) + '&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
156 | | - bitstreams.raise_for_status() # ensure we notice bad responses |
157 | | - bitstreams = bitstreams.json() |
158 | | - for bitstream in bitstreams: |
159 | | - if args.formats and bitstream['format'] in args.formats or not args.formats: |
160 | | - if args.verbose: print(bitstream) |
161 | | - sequenceId = str(bitstream['sequenceId']) |
| 119 | + while items.status_code != 200: |
| 120 | + time.sleep(5) |
| 121 | + items = requests.get(baseURL+'/rest/collections/'+str(dsObjectID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
| 122 | + items = items.json() |
| 123 | + for k in range(0, len(items)): |
| 124 | + itemID = items[k]['uuid'] |
| 125 | + itemID = '/rest/items/'+itemID |
| 126 | + itemHandle = items[k]['handle'] |
| 127 | + itemList.append(itemID) |
| 128 | + offset = offset + 200 |
| 129 | + |
| 130 | + f = csv.writer(open(filePath+'handlesAndBitstreams.csv', 'wb')) |
| 131 | + f.writerow(['bitstream']+['handle']) |
| 132 | + |
| 133 | + for item in itemList: |
| 134 | + bitstreams = requests.get(baseURL+itemID+'/bitstreams', headers=header, cookies=cookies, verify=verify, timeout=response_timeout).json() |
| 135 | + for bitstream in bitstreams: |
162 | 136 | fileName = bitstream['name'] |
163 | | - fileFormat = bitstream['format'] |
164 | | - bundleName = bitstream['bundleName'] |
165 | | - f.writerow([sequenceId]+[fileName]+[fileFormat]+[bundleName]) |
166 | | - |
167 | | - if args.download: |
168 | | - dlBitstreams.append(bitstream) |
169 | | - offset += limit |
170 | | - bitstreamCount -= limit |
171 | | - |
172 | | - for dlBitstream in dlBitstreams: |
173 | | - response = requests.get(baseURL + str(dlBitstream['retrieveLink']), headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
174 | | - response.raise_for_status() # ensure we notice bad responses |
175 | | - file = open(filePath + dlBitstream['name'], 'wb') |
176 | | - file.write(response.content) |
177 | | - file.close() |
178 | | -else: |
179 | | - print('object is of an invalid type for this script ({}). please enter the handle of an item or a collection.').format(dsObject['type']) |
180 | | - |
181 | | - |
182 | | -logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
183 | | - |
184 | | -elapsedTime = time.time() - startTime |
185 | | -m, s = divmod(elapsedTime, 60) |
186 | | -h, m = divmod(m, 60) |
187 | | -print('Total script run time: {:01.0f}:{:02.0f}:{:02.0f}').format(h, m, s) |
| 137 | + fileName.replace('.pdf', '') |
| 138 | + f.writerow([fileName]+[itemHandle]) |
| 139 | + |
| 140 | + elif dsObject['type'] == 'item': |
| 141 | + if args.verbose: print(dsObject['type']) |
| 142 | + |
| 143 | + itemHandle = dsObject['handle'] |
| 144 | + |
| 145 | + f = csv.writer(open(filePath+itemHandle.replace('/', '-')+'_bitstreams.csv', 'wb')) |
| 146 | + f.writerow(['sequenceId']+['name']+['format']+['bundleName']) |
| 147 | + |
| 148 | + bitstreamCount = len(dsObject['bitstreams']) |
| 149 | + dlBitstreams = [] |
| 150 | + offset = 0 |
| 151 | + bitstreams = '' |
| 152 | + # while bitstreams != []: |
| 153 | + while bitstreamCount > 0: |
| 154 | + # don't retreive more bitstreams than we have left |
| 155 | + if limit > bitstreamCount: |
| 156 | + limit = bitstreamCount |
| 157 | + print('bitstreamCount: {0} offset: {1} limit: {2}').format(bitstreamCount, offset, limit) |
| 158 | + bitstreams = requests.get(baseURL+'/rest/items/' + str(dsObjectID) + '/bitstreams?limit=' + str(limit) + '&offset='+str(offset), headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
| 159 | + bitstreams.raise_for_status() # ensure we notice bad responses |
| 160 | + bitstreams = bitstreams.json() |
| 161 | + for bitstream in bitstreams: |
| 162 | + if args.formats and bitstream['format'] in args.formats or not args.formats: |
| 163 | + if args.verbose: print(bitstream) |
| 164 | + sequenceId = str(bitstream['sequenceId']) |
| 165 | + fileName = bitstream['name'] |
| 166 | + fileFormat = bitstream['format'] |
| 167 | + bundleName = bitstream['bundleName'] |
| 168 | + f.writerow([sequenceId]+[fileName]+[fileFormat]+[bundleName]) |
| 169 | + |
| 170 | + if args.download: |
| 171 | + dlBitstreams.append(bitstream) |
| 172 | + offset += limit |
| 173 | + bitstreamCount -= limit |
| 174 | + |
| 175 | + for dlBitstream in dlBitstreams: |
| 176 | + response = requests.get(baseURL + str(dlBitstream['retrieveLink']), headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
| 177 | + response.raise_for_status() # ensure we notice bad responses |
| 178 | + file = open(filePath + dlBitstream['name'], 'wb') |
| 179 | + file.write(response.content) |
| 180 | + file.close() |
| 181 | + else: |
| 182 | + print('object is of an invalid type for this script ({}). please enter the handle of an item or a collection.').format(dsObject['type']) |
| 183 | + |
| 184 | + |
| 185 | + logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify, timeout=response_timeout) |
| 186 | + |
| 187 | + elapsedTime = time.time() - startTime |
| 188 | + m, s = divmod(elapsedTime, 60) |
| 189 | + h, m = divmod(m, 60) |
| 190 | + print('Total script run time: {:01.0f}:{:02.0f}:{:02.0f}').format(h, m, s) |
| 191 | + |
| 192 | + |
| 193 | +if __name__ == "__main__": main() |
0 commit comments