|
40 | 40 | userFullName = status['fullname'] |
41 | 41 | print 'authenticated' |
42 | 42 |
|
43 | | -itemList = [] |
| 43 | +collectionIds = [] |
44 | 44 | endpoint = baseURL+'/rest/communities' |
45 | 45 | communities = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json() |
46 | 46 | for i in range (0, len (communities)): |
47 | 47 | communityID = communities[i]['uuid'] |
48 | 48 | collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json() |
49 | 49 | for j in range (0, len (collections)): |
50 | 50 | collectionID = collections[j]['uuid'] |
51 | | - print collectionID |
52 | 51 | if collectionID != '45794375-6640-4efe-848e-082e60bae375': |
53 | | - offset = 0 |
54 | | - items = '' |
55 | | - while items != []: |
56 | | - items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify) |
57 | | - while items.status_code != 200: |
58 | | - time.sleep(5) |
59 | | - items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify) |
60 | | - items = items.json() |
61 | | - for k in range (0, len (items)): |
62 | | - itemID = items[k]['uuid'] |
63 | | - itemList.append(itemID) |
64 | | - offset = offset + 200 |
65 | | - print offset |
66 | | -elapsedTime = time.time() - startTime |
67 | | -m, s = divmod(elapsedTime, 60) |
68 | | -h, m = divmod(m, 60) |
69 | | -print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s) |
| 52 | + collectionIds.append(collectionID) |
70 | 53 |
|
71 | 54 | os.mkdir(filePathComplete) |
72 | 55 | os.mkdir(filePathUnique) |
73 | | -for number, itemID in enumerate(itemList): |
74 | | - itemsRemaining = len(itemList) - number |
75 | | - print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID |
76 | | - metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json() |
77 | | - for l in range (0, len (metadata)): |
78 | | - if metadata[l]['key'] != 'dc.description.provenance': |
79 | | - key = metadata[l]['key'] |
80 | | - value = metadata[l]['value'].encode('utf-8') |
81 | | - if os.path.isfile(filePathComplete+key+'ValuesComplete.csv') == False: |
82 | | - f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'wb')) |
83 | | - f.writerow(['itemID']+['value']) |
84 | | - f.writerow([itemID]+[value]) |
85 | | - else: |
86 | | - f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'a')) |
87 | | - f.writerow([itemID]+[value]) |
| 56 | + |
| 57 | +for number, collectionID in enumerate(collectionIds): |
| 58 | + collectionsRemaining = len(collectionIds) - number |
| 59 | + print collectionID, 'Collections remaining: ', collectionsRemaining |
| 60 | + collSels = '&collSel[]=' + collectionID |
| 61 | + offset = 0 |
| 62 | + recordsEdited = 0 |
| 63 | + items = '' |
| 64 | + while items != []: |
| 65 | + setTime = time.time() |
| 66 | + endpoint = baseURL+'/rest/filtered-items?query_field[]=*&query_op[]=exists&query_val[]='+collSels+'&expand=metadata&limit=20&offset='+str(offset) |
| 67 | + response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json() |
| 68 | + items = response['items'] |
| 69 | + for item in items: |
| 70 | + metadata = item['metadata'] |
| 71 | + for i in range (0, len (metadata)): |
| 72 | + if metadata[i]['key'] != 'dc.description.provenance': |
| 73 | + key = metadata[i]['key'] |
| 74 | + try: |
| 75 | + value = metadata[i]['value'].encode('utf-8') |
| 76 | + except: |
| 77 | + value = '' |
| 78 | + for i in range (0, len (metadata)): |
| 79 | + if metadata[i]['key'] == 'dc.identifier.uri': |
| 80 | + uri = metadata[i]['value'] |
| 81 | + if os.path.isfile(filePathComplete+key+'ValuesComplete.csv') == False: |
| 82 | + f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'wb')) |
| 83 | + f.writerow(['handle']+['value']) |
| 84 | + f.writerow([uri]+[value]) |
| 85 | + else: |
| 86 | + f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'a')) |
| 87 | + f.writerow([uri]+[value]) |
| 88 | + offset = offset + 20 |
| 89 | + print offset |
| 90 | + |
| 91 | + setTime = time.time() - setTime |
| 92 | + m, s = divmod(setTime, 60) |
| 93 | + h, m = divmod(m, 60) |
| 94 | + print 'Set run time: ', '%d:%02d:%02d' % (h, m, s) |
| 95 | + |
| 96 | + elapsedTime = time.time() - startTime |
| 97 | + m, s = divmod(elapsedTime, 60) |
| 98 | + h, m = divmod(m, 60) |
| 99 | + print 'Collection run time: ', '%d:%02d:%02d' % (h, m, s) |
88 | 100 |
|
89 | 101 | elapsedTime = time.time() - startTime |
90 | 102 | m, s = divmod(elapsedTime, 60) |
91 | 103 | h, m = divmod(m, 60) |
92 | 104 | print 'Complete value list creation time: ','%d:%02d:%02d' % (h, m, s) |
93 | | - |
| 105 | +# |
94 | 106 | for fileName in os.listdir(filePathComplete): |
95 | 107 | reader = csv.DictReader(open(filePathComplete+fileName)) |
96 | 108 | fileName = fileName.replace('Complete', 'Unique') |
|
0 commit comments