Skip to content

Commit 08a7ebf

Browse files
committed
updates
1 parent 71769fd commit 08a7ebf

File tree

1 file changed

+47
-35
lines changed

1 file changed

+47
-35
lines changed

getCompleteAndUniqueValuesForAllKeys.py

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -40,57 +40,69 @@
4040
userFullName = status['fullname']
4141
print 'authenticated'
4242

43-
itemList = []
43+
collectionIds = []
4444
endpoint = baseURL+'/rest/communities'
4545
communities = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
4646
for i in range (0, len (communities)):
4747
communityID = communities[i]['uuid']
4848
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json()
4949
for j in range (0, len (collections)):
5050
collectionID = collections[j]['uuid']
51-
print collectionID
5251
if collectionID != '45794375-6640-4efe-848e-082e60bae375':
53-
offset = 0
54-
items = ''
55-
while items != []:
56-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
57-
while items.status_code != 200:
58-
time.sleep(5)
59-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
60-
items = items.json()
61-
for k in range (0, len (items)):
62-
itemID = items[k]['uuid']
63-
itemList.append(itemID)
64-
offset = offset + 200
65-
print offset
66-
elapsedTime = time.time() - startTime
67-
m, s = divmod(elapsedTime, 60)
68-
h, m = divmod(m, 60)
69-
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
52+
collectionIds.append(collectionID)
7053

7154
os.mkdir(filePathComplete)
7255
os.mkdir(filePathUnique)
73-
for number, itemID in enumerate(itemList):
74-
itemsRemaining = len(itemList) - number
75-
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
76-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
77-
for l in range (0, len (metadata)):
78-
if metadata[l]['key'] != 'dc.description.provenance':
79-
key = metadata[l]['key']
80-
value = metadata[l]['value'].encode('utf-8')
81-
if os.path.isfile(filePathComplete+key+'ValuesComplete.csv') == False:
82-
f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'wb'))
83-
f.writerow(['itemID']+['value'])
84-
f.writerow([itemID]+[value])
85-
else:
86-
f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'a'))
87-
f.writerow([itemID]+[value])
56+
57+
for number, collectionID in enumerate(collectionIds):
58+
collectionsRemaining = len(collectionIds) - number
59+
print collectionID, 'Collections remaining: ', collectionsRemaining
60+
collSels = '&collSel[]=' + collectionID
61+
offset = 0
62+
recordsEdited = 0
63+
items = ''
64+
while items != []:
65+
setTime = time.time()
66+
endpoint = baseURL+'/rest/filtered-items?query_field[]=*&query_op[]=exists&query_val[]='+collSels+'&expand=metadata&limit=20&offset='+str(offset)
67+
response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
68+
items = response['items']
69+
for item in items:
70+
metadata = item['metadata']
71+
for i in range (0, len (metadata)):
72+
if metadata[i]['key'] != 'dc.description.provenance':
73+
key = metadata[i]['key']
74+
try:
75+
value = metadata[i]['value'].encode('utf-8')
76+
except:
77+
value = ''
78+
for i in range (0, len (metadata)):
79+
if metadata[i]['key'] == 'dc.identifier.uri':
80+
uri = metadata[i]['value']
81+
if os.path.isfile(filePathComplete+key+'ValuesComplete.csv') == False:
82+
f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'wb'))
83+
f.writerow(['handle']+['value'])
84+
f.writerow([uri]+[value])
85+
else:
86+
f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'a'))
87+
f.writerow([uri]+[value])
88+
offset = offset + 20
89+
print offset
90+
91+
setTime = time.time() - setTime
92+
m, s = divmod(setTime, 60)
93+
h, m = divmod(m, 60)
94+
print 'Set run time: ', '%d:%02d:%02d' % (h, m, s)
95+
96+
elapsedTime = time.time() - startTime
97+
m, s = divmod(elapsedTime, 60)
98+
h, m = divmod(m, 60)
99+
print 'Collection run time: ', '%d:%02d:%02d' % (h, m, s)
88100

89101
elapsedTime = time.time() - startTime
90102
m, s = divmod(elapsedTime, 60)
91103
h, m = divmod(m, 60)
92104
print 'Complete value list creation time: ','%d:%02d:%02d' % (h, m, s)
93-
105+
#
94106
for fileName in os.listdir(filePathComplete):
95107
reader = csv.DictReader(open(filePathComplete+fileName))
96108
fileName = fileName.replace('Complete', 'Unique')

0 commit comments

Comments
 (0)