Skip to content

Commit a293d9f

Browse files
committed
updates
1 parent 8c09657 commit a293d9f

File tree

4 files changed

+75
-166
lines changed

4 files changed

+75
-166
lines changed

getRecordsWithKeyAndValue.py

Lines changed: 36 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,21 @@
44
import csv
55
import time
66
import urllib3
7+
import argparse
8+
9+
parser = argparse.ArgumentParser()
10+
parser.add_argument('-k', '--key', help='the key to be searched. optional - if not provided, the script will ask for input')
11+
parser.add_argument('-v', '--value', help='the value to be searched. optional - if not provided, the script will ask for input')
12+
args = parser.parse_args()
13+
14+
if args.key:
15+
key = args.key
16+
else:
17+
key = raw_input('Enter the key: ')
18+
if args.value:
19+
value = args.value
20+
else:
21+
value = raw_input('Enter the value: ')
722

823
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
924

@@ -23,9 +38,6 @@
2338
filePath = secrets.filePath
2439
verify = secrets.verify
2540

26-
key = raw_input('Enter key: ')
27-
value = raw_input('Enter value: ')
28-
2941
startTime = time.time()
3042
data = {'email':email,'password':password}
3143
header = {'content-type':'application/json','accept':'application/json'}
@@ -37,49 +49,29 @@
3749
userFullName = status['fullname']
3850
print 'authenticated'
3951

40-
endpoint = baseURL+'/rest/communities'
41-
communities = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
42-
43-
itemList = []
44-
endpoint = baseURL+'/rest/communities'
45-
communities = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
46-
for i in range (0, len (communities)):
47-
communityID = communities[i]['uuid']
48-
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json()
49-
for j in range (0, len (collections)):
50-
collectionID = collections[j]['uuid']
51-
if collectionID != '4dccec82-4cfb-4583-a728-2cb823b15ef0':
52-
offset = 0
53-
items = ''
54-
while items != []:
55-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
56-
while items.status_code != 200:
57-
time.sleep(5)
58-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
59-
items = items.json()
60-
for k in range (0, len (items)):
61-
itemID = items[k]['uuid']
62-
itemList.append(itemID)
63-
offset = offset + 200
64-
elapsedTime = time.time() - startTime
65-
m, s = divmod(elapsedTime, 60)
66-
h, m = divmod(m, 60)
67-
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
68-
6952
f=csv.writer(open(filePath+'Key='+key+' Value='+value+'.csv', 'wb'))
7053
f.writerow(['itemID']+['uri']+['key']+['value'])
71-
for number, itemID in enumerate(itemList):
72-
itemMetadataProcessed = []
73-
itemsRemaining = len(itemList) - number
74-
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
75-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
76-
for i in range (0, len (metadata)):
77-
if metadata[i]['key'] == key and metadata[i]['value'] == value:
78-
metadataValue = metadata[i]['value']
79-
for i in range (0, len (metadata)):
80-
if metadata[i]['key'] == 'dc.identifier.uri':
81-
uri = metadata[i]['value']
82-
f.writerow([itemID]+[uri]+[key]+[metadataValue])
54+
offset = 0
55+
recordsEdited = 0
56+
items = ''
57+
while items != []:
58+
endpoint = baseURL+'/rest/filtered-items?query_field[]='+key+'&query_op[]=equals&query_val[]='+value+'&limit=200&offset='+str(offset)
59+
print endpoint
60+
response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
61+
items = response['items']
62+
for item in items:
63+
itemMetadataProcessed = []
64+
itemLink = item['link']
65+
metadata = requests.get(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify).json()
66+
for i in range (0, len (metadata)):
67+
if metadata[i]['key'] == key and metadata[i]['value'] == value:
68+
metadataValue = metadata[i]['value']
69+
for i in range (0, len (metadata)):
70+
if metadata[i]['key'] == 'dc.identifier.uri':
71+
uri = metadata[i]['value']
72+
f.writerow([itemLink]+[uri]+[key]+[metadataValue])
73+
offset = offset + 200
74+
print offset
8375

8476
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
8577

identifyDuplicateKeyValuePairsFromItemsDiffLangTags.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json()
4444
for j in range (0, len (collections)):
4545
collectionID = collections[j]['uuid']
46-
if collectionID != '4dccec82-4cfb-4583-a728-2cb823b15ef0':
46+
if collectionID != '45794375-6640-4efe-848e-082e60bae375':
4747
offset = 0
4848
items = ''
4949
while items != []:
@@ -61,7 +61,7 @@
6161
h, m = divmod(m, 60)
6262
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
6363

64-
f=csv.writer(open(filePath+'DuplicatesRecordsDiffLangTags'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
64+
f=csv.writer(open(filePath+'DuplicateKeysWithDiffLangTags'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
6565
f.writerow(['itemID']+['key:value'])
6666
for number, itemID in enumerate(itemList):
6767
itemMetadataProcessed = []

identifyItemWithDuplicateKeysInCommunity.py

Lines changed: 0 additions & 86 deletions
This file was deleted.

identifyItemsMissingKeyInCommunity.py

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,22 @@
55
import csv
66
from datetime import datetime
77
import urllib3
8+
import argparse
9+
10+
parser = argparse.ArgumentParser()
11+
parser.add_argument('-k', '--key', help='the key to be searched. optional - if not provided, the script will ask for input')
12+
parser.add_argument('-i', '--handle', help='handle of the community to retreive. optional - if not provided, the script will ask for input')
13+
args = parser.parse_args()
14+
15+
if args.key:
16+
key = args.key
17+
else:
18+
key = raw_input('Enter the key to be searched: ')
19+
20+
if args.handle:
21+
handle = args.handle
22+
else:
23+
handle = raw_input('Enter collection handle: ')
824

925
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
1026

@@ -24,9 +40,6 @@
2440
filePath = secrets.filePath
2541
verify = secrets.verify
2642

27-
handle = raw_input('Enter community handle: ')
28-
key = raw_input('Enter key: ')
29-
3043
startTime = time.time()
3144
data = {'email':email,'password':password}
3245
header = {'content-type':'application/json','accept':'application/json'}
@@ -38,46 +51,36 @@
3851
userFullName = status['fullname']
3952
print 'authenticated'
4053

41-
itemList = []
4254
endpoint = baseURL+'/rest/handle/'+handle
4355
community = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
4456
communityID = community['uuid']
45-
4657
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json()
58+
collSels = ''
4759
for j in range (0, len (collections)):
4860
collectionID = collections[j]['uuid']
49-
if collectionID != '4dccec82-4cfb-4583-a728-2cb823b15ef0':
50-
offset = 0
51-
items = ''
52-
while items != []:
53-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
54-
while items.status_code != 200:
55-
time.sleep(5)
56-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
57-
items = items.json()
58-
for k in range (0, len (items)):
59-
itemID = items[k]['uuid']
60-
itemList.append(itemID)
61-
offset = offset + 200
62-
elapsedTime = time.time() - startTime
63-
m, s = divmod(elapsedTime, 60)
64-
h, m = divmod(m, 60)
65-
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
61+
collSel = '&collSel[]=' + collectionID
62+
collSels = collSels + collSel
6663

6764
f=csv.writer(open(filePath+'recordsMissing'+key+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
6865
f.writerow(['itemID']+['key'])
69-
idList =[]
70-
for number, itemID in enumerate(itemList):
71-
itemMetadataProcessed = []
72-
itemsRemaining = len(itemList) - number
73-
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
74-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
75-
for metadataElement in metadata:
76-
itemMetadataProcessed.append(metadataElement['key'])
77-
if key not in itemMetadataProcessed:
78-
f.writerow([itemID])
79-
idList.append(itemID)
80-
print idList
66+
offset = 0
67+
recordsEdited = 0
68+
items = ''
69+
while items != []:
70+
endpoint = baseURL+'/rest/filtered-items?query_field[]='+key+'&query_op[]=doesnt_exist&query_val[]='+collSels+'&limit=200&offset='+str(offset)
71+
print endpoint
72+
response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
73+
items = response['items']
74+
for item in items:
75+
itemMetadataProcessed = []
76+
itemLink = item['link']
77+
metadata = requests.get(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify).json()
78+
for metadataElement in metadata:
79+
itemMetadataProcessed.append(metadataElement['key'])
80+
if key not in itemMetadataProcessed:
81+
f.writerow([itemLink])
82+
offset = offset + 200
83+
print offset
8184

8285
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
8386

0 commit comments

Comments
 (0)