|
| 1 | +import json |
| 2 | +import requests |
| 3 | +import secrets |
| 4 | +import csv |
| 5 | +import time |
| 6 | +import os.path |
| 7 | +from collections import Counter |
| 8 | +from datetime import datetime |
| 9 | + |
| 10 | +secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ') |
| 11 | +if secretsVersion != '': |
| 12 | + try: |
| 13 | + secrets = __import__(secretsVersion) |
| 14 | + print 'Editing Production' |
| 15 | + except ImportError: |
| 16 | + print 'Editing Stage' |
| 17 | +else: |
| 18 | + print 'Editing Stage' |
| 19 | + |
| 20 | +baseURL = secrets.baseURL |
| 21 | +email = secrets.email |
| 22 | +password = secrets.password |
| 23 | +filePath = secrets.filePath |
| 24 | +verify = secrets.verify |
| 25 | + |
| 26 | +handle = raw_input('Enter community handle: ') |
| 27 | + |
| 28 | +requests.packages.urllib3.disable_warnings() |
| 29 | + |
| 30 | + |
| 31 | + |
| 32 | +startTime = time.time() |
| 33 | +data = json.dumps({'email':email,'password':password}) |
| 34 | +header = {'content-type':'application/json','accept':'application/json'} |
| 35 | +session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content |
| 36 | +headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session} |
| 37 | +print 'authenticated' |
| 38 | + |
| 39 | +itemList = [] |
| 40 | +endpoint = baseURL+'/rest/handle/'+handle |
| 41 | +community = requests.get(endpoint, headers=headerAuth, verify=verify).json() |
| 42 | +communityName = community['name'].replace(' ','') |
| 43 | +communityID = community['id'] |
| 44 | + |
| 45 | +filePathComplete = filePath+'completeValueLists'+communityName+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'/' |
| 46 | +filePathUnique = filePath+'uniqueValueLists'+communityName+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'/' |
| 47 | + |
| 48 | +collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth, verify=verify).json() |
| 49 | +for j in range (0, len (collections)): |
| 50 | + collectionID = collections[j]['id'] |
| 51 | + if collectionID != 24: |
| 52 | + offset = 0 |
| 53 | + items = '' |
| 54 | + while items != []: |
| 55 | + items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify) |
| 56 | + while items.status_code != 200: |
| 57 | + time.sleep(5) |
| 58 | + items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify) |
| 59 | + items = items.json() |
| 60 | + for k in range (0, len (items)): |
| 61 | + itemID = items[k]['id'] |
| 62 | + itemList.append(itemID) |
| 63 | + offset = offset + 1000 |
| 64 | +elapsedTime = time.time() - startTime |
| 65 | +m, s = divmod(elapsedTime, 60) |
| 66 | +h, m = divmod(m, 60) |
| 67 | +print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s) |
| 68 | + |
| 69 | +os.mkdir(filePathComplete) |
| 70 | +os.mkdir(filePathUnique) |
| 71 | +for number, itemID in enumerate(itemList): |
| 72 | + itemsRemaining = len(itemList) - number |
| 73 | + print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID |
| 74 | + metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json() |
| 75 | + for l in range (0, len (metadata)): |
| 76 | + if metadata[l]['key'] != 'dc.description.provenance': |
| 77 | + key = metadata[l]['key'] |
| 78 | + value = metadata[l]['value'].encode('utf-8') |
| 79 | + if os.path.isfile(filePathComplete+key+'ValuesComplete.csv') == False: |
| 80 | + f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'wb')) |
| 81 | + f.writerow(['itemID']+['value']) |
| 82 | + f.writerow([itemID]+[value]) |
| 83 | + else: |
| 84 | + f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'a')) |
| 85 | + f.writerow([itemID]+[value]) |
| 86 | + |
| 87 | +elapsedTime = time.time() - startTime |
| 88 | +m, s = divmod(elapsedTime, 60) |
| 89 | +h, m = divmod(m, 60) |
| 90 | +print 'Complete value list creation time: ','%d:%02d:%02d' % (h, m, s) |
| 91 | + |
| 92 | +for fileName in os.listdir(filePathComplete): |
| 93 | + reader = csv.DictReader(open(filePathComplete+fileName)) |
| 94 | + fileName = fileName.replace('Complete', 'Unique') |
| 95 | + valueList = [] |
| 96 | + for row in reader: |
| 97 | + valueList.append(row['value']) |
| 98 | + valueListCount = Counter(valueList) |
| 99 | + f=csv.writer(open(filePathUnique+fileName, 'wb')) |
| 100 | + f.writerow(['value']+['count']) |
| 101 | + for key, value in valueListCount.items(): |
| 102 | + f.writerow([key]+[str(value).zfill(6)]) |
| 103 | + |
| 104 | +logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify) |
| 105 | + |
| 106 | +elapsedTime = time.time() - startTime |
| 107 | +m, s = divmod(elapsedTime, 60) |
| 108 | +h, m = divmod(m, 60) |
| 109 | +print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s) |
0 commit comments