updates

ehanson8 · ehanson8 · commit 8a3a04e7fa8f · 2018-09-04T10:06:09.000-04:00
diff --git a/README.md b/README.md
@@ -36,6 +36,9 @@ No collections skipped:
 #### [compareTwoKeysInCommunity.py](compareTwoKeysInCommunity.py)
 Based on user input, extracts the values of two specified keys from a specified community to a CSV file for comparison.
 
+#### [exportSelectedRecordMetadataToCSV.py](exportSelectedRecordMetadataToCSV.py)
+Based a CSV of item handles, extracts all metadata (except 'dc.description.provenance' values) from the selected items to a CSV file.
+
 #### [findBogusUris.py](findBogusUris.py)
 Extracts the item ID and the value of the key 'dc.identifier.uri' to a CSV file when the value does not begin with the handlePrefix specified in the secrets.py file.
 
@@ -72,9 +75,6 @@ Based on user input, extracts the ID and URI for all items in the specified coll
 #### [getRecordsWithKeyAndValue.py](getRecordsWithKeyAndValue.py)
 Based on user input, extracts the ID and URI for all items in the repository with the specified key-value pair to a CSV file.
 
-#### [identifyDuplicateKeyValuePairsFromItemsDiffLangTags.py](identifyDuplicateKeyValuePairsFromItemsDiffLangTags.py)
-Extracts all duplicate key-value pairs in an item that only differ in their assigned language tag.
-
 #### [identifyItemsMissingKeyInCommunity.py](identifyItemsMissingKeyInCommunity.py)
 Based on user input, extracts the IDs of items from a specified community that do not have the specified key.
 
diff --git a/exportSelectedRecordMetadataToCSV.py b/exportSelectedRecordMetadataToCSV.py
@@ -0,0 +1,99 @@
+import json
+import requests
+import secrets
+import time
+import csv
+from collections import Counter
+import urllib3
+import argparse
+
+#login info kept in secrets.py file
+baseURL = secrets.baseURL
+email = secrets.email
+password = secrets.password
+filePath = secrets.filePath
+verify = secrets.verify
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-f', '--fileName', help='the CSV file of record handles. optional - if not provided, the script will ask for input')
+args = parser.parse_args()
+
+if args.fileName:
+    fileName = filePath+args.fileName
+else:
+    fileName = filePath+raw_input('Enter the CSV file of record handles (including \'.csv\'): ')
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
+if secretsVersion != '':
+    try:
+        secrets = __import__(secretsVersion)
+        print 'Editing Production'
+    except ImportError:
+        print 'Editing Stage'
+else:
+    print 'Editing Stage'
+
+#authentication
+startTime = time.time()
+data = {'email':email,'password':password}
+header = {'content-type':'application/json','accept':'application/json'}
+session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
+cookies = {'JSESSIONID': session}
+headerFileUpload = {'accept':'application/json'}
+cookiesFileUpload = cookies
+status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
+userFullName = status['fullname']
+print 'authenticated'
+
+
+handles = []
+with open(fileName) as csvfile:
+    reader = csv.DictReader(csvfile)
+    for row in reader:
+        handles.append(row['handle'])
+
+itemList = []
+for handle in handles:
+    endpoint = baseURL+'/rest/handle/'+handle
+    item = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
+    itemID = item['uuid']
+    itemList.append(itemID)
+
+keyList = []
+for itemID in itemList:
+    metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
+    for metadataElement in metadata:
+        key = metadataElement['key']
+        if key not in keyList and key != 'dc.description.provenance':
+            keyList.append(key)
+            print itemID, key
+
+keyListHeader = ['itemID']
+keyListHeader = keyListHeader + keyList
+print keyListHeader
+f=csv.writer(open(filePath+'selectedRecordMetadata.csv', 'wb'))
+f.writerow(keyListHeader)
+
+itemRows = []
+for itemID in itemList:
+    #itemRow = {}
+    itemRow = dict.fromkeys(keyListHeader, '')
+    itemRow['itemID'] = itemID
+    print itemRow
+    metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
+    for metadataElement in metadata:
+        for key in keyListHeader:
+            if metadataElement['key'] == key:
+                value = metadataElement['value'].encode('utf-8')+'|'
+                try:
+                    itemRow[key] = itemRow[key] + value
+                except:
+                    itemRow[key] = value
+    print itemRow
+    itemList = []
+    for key in keyListHeader:
+        itemList.append(itemRow[key][:len(itemRow[key])-1])
+    #itemRows.append(itemRow)
+    f.writerow(itemList)
diff --git a/getCompleteAndUniqueValuesForAllKeysInCommunity.py b/getCompleteAndUniqueValuesForAllKeysInCommunity.py
@@ -78,7 +78,10 @@
     for l in range (0, len (metadata)):
         if metadata[l]['key'] != 'dc.description.provenance':
             key = metadata[l]['key']
-            value = metadata[l]['value'].encode('utf-8')
+            try:
+                value = metadata[l]['value'].encode('utf-8')
+            except:
+                value = ''
             if os.path.isfile(filePathComplete+key+'ValuesComplete.csv') == False:
                 f=csv.writer(open(filePathComplete+key+'ValuesComplete.csv', 'wb'))
                 f.writerow(['itemID']+['value'])
diff --git a/identifyDuplicateKeyValuePairsFromItemsDiffLangTags.py b/identifyDuplicateKeyValuePairsFromItemsDiffLangTags.py