|
| 1 | +import json |
| 2 | +import requests |
| 3 | +import secrets |
| 4 | +import csv |
| 5 | +import re |
| 6 | +import time |
| 7 | +import urllib3 |
| 8 | + |
| 9 | +secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ') |
| 10 | +if secretsVersion != '': |
| 11 | + try: |
| 12 | + secrets = __import__(secretsVersion) |
| 13 | + print 'Editing Production' |
| 14 | + except ImportError: |
| 15 | + print 'Editing Stage' |
| 16 | +else: |
| 17 | + print 'Editing Stage' |
| 18 | + |
| 19 | +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
| 20 | + |
| 21 | +baseURL = secrets.baseURL |
| 22 | +email = secrets.email |
| 23 | +password = secrets.password |
| 24 | +filePath = secrets.filePath |
| 25 | +verify = secrets.verify |
| 26 | + |
| 27 | +startTime = time.time() |
| 28 | +data = {'email':email,'password':password} |
| 29 | +header = {'content-type':'application/json','accept':'application/json'} |
| 30 | +session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID'] |
| 31 | +cookies = {'JSESSIONID': session} |
| 32 | +headerFileUpload = {'accept':'application/json'} |
| 33 | +cookiesFileUpload = cookies |
| 34 | +status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json() |
| 35 | +userFullName = status['fullname'] |
| 36 | +print 'authenticated' |
| 37 | + |
| 38 | +collectionIds = [] |
| 39 | +endpoint = baseURL+'/rest/communities' |
| 40 | +communities = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json() |
| 41 | +for community in communities: |
| 42 | + communityID = community['uuid'] |
| 43 | + collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json() |
| 44 | + for collection in collections: |
| 45 | + collectionID = collection['uuid'] |
| 46 | + if collectionID != '45794375-6640-4efe-848e-082e60bae375': |
| 47 | + collectionIds.append(collectionID) |
| 48 | + |
| 49 | +names = [] |
| 50 | +keys = ['dc.contributor.advisor', 'dc.contributor.author', 'dc.contributor.committeeMember', 'dc.contributor.editor', 'dc.contributor.illustrator', 'dc.contributor.other', 'dc.creator'] |
| 51 | + |
| 52 | +f = csv.writer(open('initialCountInCollection.csv', 'wb')) |
| 53 | +f.writerow(['collectionName']+['handle']+['initialCount']) |
| 54 | + |
| 55 | +for number, collectionID in enumerate(collectionIds): |
| 56 | + initialCount = 0 |
| 57 | + collectionsRemaining = len(collectionIds) - number |
| 58 | + print collectionID, 'Collections remaining: ', collectionsRemaining |
| 59 | + collection = requests.get(baseURL+'/rest/collections/'+str(collectionID), headers=header, cookies=cookies, verify=verify).json() |
| 60 | + collectionName = collection['name'].encode('utf-8') |
| 61 | + collectionHandle = collection['handle'] |
| 62 | + collSels = '&collSel[]=' + collectionID |
| 63 | + offset = 0 |
| 64 | + recordsEdited = 0 |
| 65 | + items = '' |
| 66 | + while items != []: |
| 67 | + for key in keys: |
| 68 | + endpoint = baseURL+'/rest/filtered-items?query_field[]='+key+'&query_op[]=exists&query_val[]='+collSels+'&limit=100&offset='+str(offset) |
| 69 | + print endpoint |
| 70 | + response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json() |
| 71 | + items = response['items'] |
| 72 | + for item in items: |
| 73 | + itemLink = item['link'] |
| 74 | + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() |
| 75 | + for metadata_element in metadata: |
| 76 | + if metadata_element['key'] == key: |
| 77 | + individual_name = metadata_element['value'].encode('utf-8') |
| 78 | + for metadata_element in metadata: |
| 79 | + if metadata_element['key'] == 'dc.identifier.uri': |
| 80 | + uri = metadata_element['value'] |
| 81 | + contains_initials = re.search(r'(\s|,|[A-Z]|([A-Z]\.))[A-Z](\s|$|\.|,)', individual_name) |
| 82 | + contains_middleinitial = re.search(r'((\w{2,},\s)|(\w{2,},))\w[a-z]+', individual_name) |
| 83 | + contains_parentheses = re.search(r'\(|\)', individual_name) |
| 84 | + if contains_middleinitial: |
| 85 | + continue |
| 86 | + elif contains_parentheses: |
| 87 | + continue |
| 88 | + elif contains_initials: |
| 89 | + initialCount += 1 |
| 90 | + else: |
| 91 | + continue |
| 92 | + offset = offset + 200 |
| 93 | + print offset |
| 94 | + if initialCount > 0: |
| 95 | + f.writerow([collectionName]+[baseURL+'/'+collectionHandle]+[str(initialCount).zfill(6)]) |
| 96 | + |
| 97 | +logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) |
| 98 | + |
| 99 | +elapsedTime = time.time() - startTime |
| 100 | +m, s = divmod(elapsedTime, 60) |
| 101 | +h, m = divmod(m, 60) |
| 102 | +print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s) |
0 commit comments