Skip to content

Commit fa99305

Browse files
committed
updates
1 parent 4bd2a6d commit fa99305

File tree

2 files changed

+110
-3
lines changed

2 files changed

+110
-3
lines changed

exportCollectionMetadataToCSV.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from collections import Counter
7+
import urllib3
8+
import argparse
9+
10+
#login info kept in secrets.py file
11+
baseURL = secrets.baseURL
12+
email = secrets.email
13+
password = secrets.password
14+
filePath = secrets.filePath
15+
verify = secrets.verify
16+
17+
parser = argparse.ArgumentParser()
18+
parser.add_argument('-i', '--handle', help='handle of the collection to retreive. optional - if not provided, the script will ask for input')
19+
args = parser.parse_args()
20+
21+
if args.handle:
22+
handle = args.handle
23+
else:
24+
handle = raw_input('Enter collection handle: ')
25+
26+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
27+
28+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
29+
if secretsVersion != '':
30+
try:
31+
secrets = __import__(secretsVersion)
32+
print 'Editing Production'
33+
except ImportError:
34+
print 'Editing Stage'
35+
else:
36+
print 'Editing Stage'
37+
38+
#authentication
39+
startTime = time.time()
40+
data = {'email':email,'password':password}
41+
header = {'content-type':'application/json','accept':'application/json'}
42+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
43+
cookies = {'JSESSIONID': session}
44+
headerFileUpload = {'accept':'application/json'}
45+
cookiesFileUpload = cookies
46+
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
47+
userFullName = status['fullname']
48+
print 'authenticated'
49+
50+
endpoint = baseURL+'/rest/handle/'+handle
51+
collection = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
52+
collectionID = collection['uuid']
53+
collectionTitle = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
54+
itemList = {}
55+
offset = 0
56+
items = ''
57+
while items != []:
58+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
59+
while items.status_code != 200:
60+
time.sleep(5)
61+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
62+
items = items.json()
63+
for k in range (0, len (items)):
64+
itemID = items[k]['uuid']
65+
itemHandle = items[k]['handle']
66+
itemList[itemID] = itemHandle
67+
offset = offset + 200
68+
print offset
69+
70+
keyList = []
71+
for itemID in itemList:
72+
print baseURL+'/rest/items/'+str(itemID)+'/metadata'
73+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
74+
for metadataElement in metadata:
75+
key = metadataElement['key']
76+
if key not in keyList and key != 'dc.description.provenance':
77+
keyList.append(key)
78+
print itemID, key
79+
80+
keyListHeader = ['itemID']
81+
keyListHeader = keyListHeader + keyList
82+
print keyListHeader
83+
f=csv.writer(open(filePath+itemHandle.replace('/','-')+'Metadata.csv', 'wb'))
84+
f.writerow(keyListHeader)
85+
86+
itemRows = []
87+
for itemID in itemList:
88+
itemRow = dict.fromkeys(keyListHeader, '')
89+
itemRow['itemID'] = itemID
90+
print itemID
91+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
92+
for metadataElement in metadata:
93+
for key in keyListHeader:
94+
if metadataElement['key'] == key:
95+
value = metadataElement['value'].encode('utf-8')+'|'
96+
try:
97+
itemRow[key] = itemRow[key] + value
98+
except:
99+
itemRow[key] = value
100+
itemList = []
101+
for key in keyListHeader:
102+
itemList.append(itemRow[key][:len(itemRow[key])-1])
103+
f.writerow(itemList)
104+
105+
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
106+
107+
elapsedTime = time.time() - startTime
108+
m, s = divmod(elapsedTime, 60)
109+
h, m = divmod(m, 60)
110+
print 'Total script run time: ','%d:%02d:%02d' % (h, m, s)

exportSelectedRecordMetadataToCSV.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@
7878

7979
itemRows = []
8080
for itemID in itemList:
81-
#itemRow = {}
8281
itemRow = dict.fromkeys(keyListHeader, '')
8382
itemRow['itemID'] = itemID
8483
print itemRow
@@ -92,8 +91,6 @@
9291
except:
9392
itemRow[key] = value
9493
print itemRow
95-
itemList = []
9694
for key in keyListHeader:
9795
itemList.append(itemRow[key][:len(itemRow[key])-1])
98-
#itemRows.append(itemRow)
9996
f.writerow(itemList)

0 commit comments

Comments
 (0)