Skip to content

Commit ab61273

Browse files
committed
creating python3 branch
1 parent aa807d8 commit ab61273

21 files changed

+2142
-0
lines changed

python3/checkInventory.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import argparse
2+
import pandas as pd
3+
import os
4+
5+
6+
def main():
7+
# begin: argument parsing
8+
parser = argparse.ArgumentParser()
9+
10+
parser.add_argument('-i', '--inventory', required=True,
11+
help='csv file containing the inventory. the path, if given, can be absolute or relative to this script')
12+
13+
parser.add_argument('-d', '--dataDir',
14+
help='directory containing the data. if omitted, data will be read from the directory containing the inventory file')
15+
16+
parser.add_argument('-f', '--field',
17+
help='field in the csv containing the fileNames. default: name')
18+
19+
parser.add_argument('-v', '--verbose', action='store_true',
20+
help='increase output verbosity')
21+
22+
args = parser.parse_args()
23+
24+
if not args.dataDir:
25+
(args.dataDir, null) = os.path.split(args.inventory)
26+
27+
if not args.field:
28+
args.field = 'name'
29+
30+
if args.verbose:
31+
print('verbosity turned on')
32+
print('reading inventory from {}'.format(args.inventory))
33+
print('fileNames read from field named {}'.format(args.field))
34+
print('searching for files in {}'.format(args.dataDir))
35+
# end: argument parsing
36+
37+
inventory = pd.read_csv(args.inventory, usecols=[args.field])
38+
fileNames = inventory[args.field]
39+
foundfiles = 0
40+
missingfiles = 0
41+
for fileName in fileNames:
42+
if os.path.isfile(args.dataDir + '/' + fileName):
43+
if args.verbose: print('{} is not missing'.format(fileName))
44+
foundfiles += 1
45+
else:
46+
print('{} is missing'.format(fileName))
47+
missingfiles += 1
48+
49+
print('{} files found and {} files missing'.format(foundfiles, missingfiles))
50+
51+
52+
if __name__ == "__main__": main()
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import json
2+
import requests
3+
import secrets
4+
import csv
5+
import time
6+
import urllib3
7+
import argparse
8+
9+
secretsVersion = input('To edit production server, enter the name of the secrets file: ')
10+
if secretsVersion != '':
11+
try:
12+
secrets = __import__(secretsVersion)
13+
print('Editing Production')
14+
except ImportError:
15+
print('Editing Stage')
16+
else:
17+
print('Editing Stage')
18+
19+
parser = argparse.ArgumentParser()
20+
parser.add_argument('-1', '--key', help='the first key to be output. optional - if not provided, the script will ask for input')
21+
parser.add_argument('-2', '--key2', help='the second key to be output. optional - if not provided, the script will ask for input')
22+
parser.add_argument('-i', '--handle', help='handle of the community to retreive. optional - if not provided, the script will ask for input')
23+
args = parser.parse_args()
24+
25+
if args.key:
26+
key = args.key
27+
else:
28+
key = input('Enter first key: ')
29+
if args.key2:
30+
key2 = args.key2
31+
else:
32+
key2 = input('Enter second key: ')
33+
if args.handle:
34+
handle = args.handle
35+
else:
36+
handle = input('Enter community handle: ')
37+
38+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
39+
40+
41+
baseURL = secrets.baseURL
42+
email = secrets.email
43+
password = secrets.password
44+
filePath = secrets.filePath
45+
verify = secrets.verify
46+
skippedCollections = secrets.skippedCollections
47+
48+
startTime = time.time()
49+
data = {'email':email,'password':password}
50+
header = {'content-type':'application/json','accept':'application/json'}
51+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
52+
cookies = {'JSESSIONID': session}
53+
headerFileUpload = {'accept':'application/json'}
54+
cookiesFileUpload = cookies
55+
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
56+
print('authenticated')
57+
58+
endpoint = baseURL+'/rest/handle/'+handle
59+
community = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
60+
communityID = community['uuid']
61+
62+
itemList = []
63+
endpoint = baseURL+'/rest/communities'
64+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json()
65+
for j in range (0, len (collections)):
66+
collectionID = collections[j]['uuid']
67+
print(collectionID)
68+
if collectionID not in skippedCollections:
69+
offset = 0
70+
items = ''
71+
while items != []:
72+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
73+
while items.status_code != 200:
74+
time.sleep(5)
75+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
76+
items = items.json()
77+
for k in range (0, len (items)):
78+
itemID = items[k]['uuid']
79+
itemList.append(itemID)
80+
offset = offset + 200
81+
print(offset)
82+
elapsedTime = time.time() - startTime
83+
m, s = divmod(elapsedTime, 60)
84+
h, m = divmod(m, 60)
85+
print('Item list creation time: ','%d:%02d:%02d' % (h, m, s))
86+
87+
valueList = []
88+
for number, itemID in enumerate(itemList):
89+
itemsRemaining = len(itemList) - number
90+
print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
91+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
92+
itemTuple = (itemID,)
93+
tupleValue1 = ''
94+
tupleValue2 = ''
95+
for l in range (0, len (metadata)):
96+
if metadata[l]['key'] == key:
97+
metadataValue = metadata[l]['value']
98+
tupleValue1 = metadataValue
99+
if metadata[l]['key'] == key2:
100+
metadataValue = metadata[l]['value']
101+
tupleValue2 = metadataValue
102+
itemTuple = itemTuple + (tupleValue1 , tupleValue2)
103+
valueList.append(itemTuple)
104+
print(itemTuple)
105+
print(valueList)
106+
107+
elapsedTime = time.time() - startTime
108+
m, s = divmod(elapsedTime, 60)
109+
h, m = divmod(m, 60)
110+
print('Value list creation time: ','%d:%02d:%02d' % (h, m, s))
111+
112+
f=csv.writer(open(filePath+key+'-'+key2+'Values.csv', 'w'))
113+
f.writerow(['itemID']+[key]+[key2])
114+
for i in range (0, len (valueList)):
115+
f.writerow([valueList[i][0]]+[valueList[i][1]]+[valueList[i][2]])
116+
117+
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
118+
119+
elapsedTime = time.time() - startTime
120+
m, s = divmod(elapsedTime, 60)
121+
h, m = divmod(m, 60)
122+
print('Total script run time: ', '%d:%02d:%02d' % (h, m, s))
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import json
2+
import requests
3+
import secrets
4+
import csv
5+
import re
6+
import time
7+
import urllib3
8+
9+
secretsVersion = input('To edit production server, enter the name of the secrets file: ')
10+
if secretsVersion != '':
11+
try:
12+
secrets = __import__(secretsVersion)
13+
print('Editing Production')
14+
except ImportError:
15+
print('Editing Stage')
16+
else:
17+
print('Editing Stage')
18+
19+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
20+
21+
baseURL = secrets.baseURL
22+
email = secrets.email
23+
password = secrets.password
24+
filePath = secrets.filePath
25+
verify = secrets.verify
26+
skippedCollections = secrets.skippedCollections
27+
28+
startTime = time.time()
29+
data = {'email':email,'password':password}
30+
header = {'content-type':'application/json','accept':'application/json'}
31+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
32+
cookies = {'JSESSIONID': session}
33+
headerFileUpload = {'accept':'application/json'}
34+
cookiesFileUpload = cookies
35+
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
36+
userFullName = status['fullname']
37+
print('authenticated')
38+
39+
collectionIds = []
40+
endpoint = baseURL+'/rest/communities'
41+
communities = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
42+
for community in communities:
43+
communityID = community['uuid']
44+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json()
45+
for collection in collections:
46+
collectionID = collection['uuid']
47+
if collectionID not in skippedCollections:
48+
collectionIds.append(collectionID)
49+
50+
names = []
51+
keys = ['dc.contributor.advisor', 'dc.contributor.author', 'dc.contributor.committeeMember', 'dc.contributor.editor', 'dc.contributor.illustrator', 'dc.contributor.other', 'dc.creator']
52+
53+
f = csv.writer(open('initialCountInCollection.csv', 'w'))
54+
f.writerow(['collectionName']+['handle']+['initialCount'])
55+
56+
for number, collectionID in enumerate(collectionIds):
57+
initialCount = 0
58+
collectionsRemaining = len(collectionIds) - number
59+
print(collectionID, 'Collections remaining: ', collectionsRemaining)
60+
collection = requests.get(baseURL+'/rest/collections/'+str(collectionID), headers=header, cookies=cookies, verify=verify).json()
61+
collectionName = collection['name']
62+
collectionHandle = collection['handle']
63+
collSels = '&collSel[]=' + collectionID
64+
offset = 0
65+
recordsEdited = 0
66+
items = ''
67+
while items != []:
68+
for key in keys:
69+
endpoint = baseURL+'/rest/filtered-items?query_field[]='+key+'&query_op[]=exists&query_val[]='+collSels+'&limit=100&offset='+str(offset)
70+
print(endpoint)
71+
response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
72+
items = response['items']
73+
for item in items:
74+
itemLink = item['link']
75+
metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json()
76+
for metadata_element in metadata:
77+
if metadata_element['key'] == key:
78+
individual_name = metadata_element['value']
79+
for metadata_element in metadata:
80+
if metadata_element['key'] == 'dc.identifier.uri':
81+
uri = metadata_element['value']
82+
contains_initials = re.search(r'(\s|,|[A-Z]|([A-Z]\.))[A-Z](\s|$|\.|,)', individual_name)
83+
contains_middleinitial = re.search(r'((\w{2,},\s)|(\w{2,},))\w[a-z]+', individual_name)
84+
contains_parentheses = re.search(r'\(|\)', individual_name)
85+
if contains_middleinitial:
86+
continue
87+
elif contains_parentheses:
88+
continue
89+
elif contains_initials:
90+
initialCount += 1
91+
else:
92+
continue
93+
offset = offset + 200
94+
print(offset)
95+
if initialCount > 0:
96+
f.writerow([collectionName]+[baseURL+'/'+collectionHandle]+[str(initialCount).zfill(6)])
97+
98+
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
99+
100+
elapsedTime = time.time() - startTime
101+
m, s = divmod(elapsedTime, 60)
102+
h, m = divmod(m, 60)
103+
print('Total script run time: ', '%d:%02d:%02d' % (h, m, s))
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from collections import Counter
7+
import urllib3
8+
import argparse
9+
10+
secretsVersion = input('To edit production server, enter the name of the secrets file: ')
11+
if secretsVersion != '':
12+
try:
13+
secrets = __import__(secretsVersion)
14+
print('Editing Production')
15+
except ImportError:
16+
print('Editing Stage')
17+
else:
18+
print('Editing Stage')
19+
20+
#login info kept in secrets.py file
21+
baseURL = secrets.baseURL
22+
email = secrets.email
23+
password = secrets.password
24+
filePath = secrets.filePath
25+
verify = secrets.verify
26+
skippedCollections = secrets.skippedCollections
27+
28+
parser = argparse.ArgumentParser()
29+
parser.add_argument('-i', '--handle', help='handle of the collection to retreive. optional - if not provided, the script will ask for input')
30+
args = parser.parse_args()
31+
32+
if args.handle:
33+
handle = args.handle
34+
else:
35+
handle = input('Enter collection handle: ')
36+
37+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
38+
39+
#authentication
40+
startTime = time.time()
41+
data = {'email':email,'password':password}
42+
header = {'content-type':'application/json','accept':'application/json'}
43+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
44+
cookies = {'JSESSIONID': session}
45+
headerFileUpload = {'accept':'application/json'}
46+
cookiesFileUpload = cookies
47+
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
48+
userFullName = status['fullname']
49+
print('authenticated')
50+
51+
endpoint = baseURL+'/rest/handle/'+handle
52+
collection = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
53+
collectionID = collection['uuid']
54+
collectionTitle = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
55+
itemList = {}
56+
offset = 0
57+
items = ''
58+
while items != []:
59+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
60+
while items.status_code != 200:
61+
time.sleep(5)
62+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
63+
items = items.json()
64+
for k in range (0, len (items)):
65+
itemID = items[k]['uuid']
66+
itemHandle = items[k]['handle']
67+
itemList[itemID] = itemHandle
68+
offset = offset + 200
69+
print(offset)
70+
71+
keyList = []
72+
for itemID in itemList:
73+
print(baseURL+'/rest/items/'+str(itemID)+'/metadata')
74+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
75+
for metadataElement in metadata:
76+
key = metadataElement['key']
77+
if key not in keyList and key != 'dc.description.provenance':
78+
keyList.append(key)
79+
print(itemID, key)
80+
81+
keyListHeader = ['itemID']
82+
keyListHeader = keyListHeader + keyList
83+
print(keyListHeader)
84+
f=csv.writer(open(filePath+handle.replace('/','-')+'Metadata.csv', 'w'))
85+
f.writerow(keyListHeader)
86+
87+
itemRows = []
88+
for itemID in itemList:
89+
itemRow = dict.fromkeys(keyListHeader, '')
90+
itemRow['itemID'] = itemID
91+
print(itemID)
92+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
93+
for metadataElement in metadata:
94+
for key in keyListHeader:
95+
if metadataElement['key'] == key:
96+
try:
97+
value = metadataElement['value']+'|'
98+
except:
99+
value = ''+'|'
100+
try:
101+
itemRow[key] = itemRow[key] + value
102+
except:
103+
itemRow[key] = value
104+
itemList = []
105+
for key in keyListHeader:
106+
itemList.append(itemRow[key][:len(itemRow[key])-1])
107+
f.writerow(itemList)
108+
109+
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
110+
111+
elapsedTime = time.time() - startTime
112+
m, s = divmod(elapsedTime, 60)
113+
h, m = divmod(m, 60)
114+
print('Total script run time: ','%d:%02d:%02d' % (h, m, s))

0 commit comments

Comments
 (0)