Skip to content

Commit ff5a48a

Browse files
committed
changed item retrieval method & added skipped collections
1 parent e0621f0 commit ff5a48a

27 files changed

+416
-344
lines changed

README.md

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,12 @@ All of these scripts require a secrets.py file in the same directory that must c
1010
filePath = '/Users/dspace_user/dspace-data-collection/data/'
1111
handlePrefix = 'http://dspace.myuni.edu/handle/'
1212
verify = True or False (no quotes). Use False if using an SSH tunnel to connect to the DSpace API
13+
skippedCollections = A list of the 'uuid' of any collections that you wish the script to skip. (e.g. ['45794375-6640-4efe-848e-082e60bae375'])
1314
```
1415
The 'filePath' is directory into which output files will be written and 'handlePrefix' may or may not vary from your DSpace URL depending on your configuration. This secrets.py file will be ignored according to the repository's .gitignore file so that DSpace login details will not be inadvertently exposed through GitHub.
1516

1617
If you are using both a development server and a production server, you can create a separate secrets.py file with a different name (e.g. secretsProd.py) and containing the production server information. When running each of these scripts, you will be prompted to enter the file name (e.g 'secretsProd' without '.py') of an alternate secrets file. If you skip the prompt or incorrectly type the file name, the scripts will default to the information in the secrets.py file. This ensures that you will only edit the production server if you really intend to.
1718

18-
**Note**: All of these scripts skip collection '45794375-6640-4efe-848e-082e60bae375' for local reasons. To change this, edit the following portion of the script (typically between line 27-39)
19-
20-
21-
Skips collection 45794375-6640-4efe-848e-082e60bae375:
22-
23-
for j in range (0, len (collections)):
24-
collectionID = collections[j]['uuid']
25-
if collectionID != '45794375-6640-4efe-848e-082e60bae375':
26-
offset = 0
27-
28-
29-
No collections skipped:
30-
31-
for j in range (0, len (collections)):
32-
collectionID = collections[j]['uuid']
33-
if collectionID != 0:
34-
offset = 0
35-
3619
#### [addKeyValuePairOnHandleCSV.py](addKeyValuePairOnHandleCSV.py)
3720
Based on user input, adds key-value pairs from a specified CSV file of DSpace item handles and the value to be added to that item using the specified key. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
3821

addKeyValuePairOnHandleCSV.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
password = secrets.password
2424
filePath = secrets.filePath
2525
verify = secrets.verify
26+
skippedCollections = secrets.skippedCollections
2627

2728
startTime = time.time()
2829
data = {'email':email,'password':password}

addKeyValuePairToCollection.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
password = secrets.password
4949
filePath = secrets.filePath
5050
verify = secrets.verify
51+
skippedCollections = secrets.skippedCollections
5152

5253
startTime = time.time()
5354
data = {'email':email,'password':password}

addKeyValuePairToCommunity.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
password = secrets.password
4949
filePath = secrets.filePath
5050
verify = secrets.verify
51+
skippedCollections = secrets.skippedCollections
5152

5253
startTime = time.time()
5354
data = {'email':email,'password':password}
@@ -67,7 +68,7 @@
6768
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json()
6869
for j in range (0, len (collections)):
6970
collectionID = collections[j]['uuid']
70-
if collectionID != '45794375-6640-4efe-848e-082e60bae375':
71+
if collectionID not in skippedCollections:
7172
offset = 0
7273
items = ''
7374
while items != []:

addNewItemsToCollection.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
password = secrets.password
4646
filePath = secrets.filePath
4747
verify = secrets.verify
48+
skippedCollections = secrets.skippedCollections
4849

4950
startTime = time.time()
5051

deleteBitstreamsFromItem.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
password = secrets.password
2424
filePath = secrets.filePath
2525
verify = secrets.verify
26+
skippedCollections = secrets.skippedCollections
2627

2728
itemHandle = raw_input('Enter item handle: ')
2829

deleteKeyFromCollection.py

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
print 'Editing Stage'
1717
else:
1818
print 'Editing Stage'
19-
19+
2020
parser = argparse.ArgumentParser()
2121
parser.add_argument('-k', '--deletedKey', help='the key to be deleted. optional - if not provided, the script will ask for input')
2222
parser.add_argument('-i', '--handle', help='handle of the collection to retreive. optional - if not provided, the script will ask for input')
@@ -39,6 +39,7 @@
3939
password = secrets.password
4040
filePath = secrets.filePath
4141
verify = secrets.verify
42+
skippedCollections = secrets.skippedCollections
4243

4344
startTime = time.time()
4445
data = {'email':email,'password':password}
@@ -61,6 +62,7 @@
6162
offset = 0
6263
recordsEdited = 0
6364
items = ''
65+
itemLinks = []
6466
while items != []:
6567
endpoint = baseURL+'/rest/filtered-items?query_field[]='+deletedKey+'&query_op[]=exists&query_val[]='+collSels+'&limit=200&offset='+str(offset)
6668
print endpoint
@@ -69,32 +71,36 @@
6971
for item in items:
7072
itemMetadataProcessed = []
7173
itemLink = item['link']
72-
print itemLink
73-
metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json()
74-
for l in range (0, len (metadata)):
75-
metadata[l].pop('schema', None)
76-
metadata[l].pop('element', None)
77-
metadata[l].pop('qualifier', None)
78-
languageValue = metadata[l]['language']
79-
if metadata[l]['key'] == deletedKey:
80-
provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
81-
provNoteElement = {}
82-
provNoteElement['key'] = 'dc.description.provenance'
83-
provNoteElement['value'] = unicode(provNote)
84-
provNoteElement['language'] = 'en_US'
85-
itemMetadataProcessed.append(provNoteElement)
86-
else:
87-
itemMetadataProcessed.append(metadata[l])
88-
recordsEdited = recordsEdited + 1
89-
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
90-
print 'updated', itemLink, recordsEdited
91-
delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify)
92-
print delete
93-
post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
94-
print post
95-
f.writerow([itemLink]+[deletedKey]+[delete]+[post])
74+
itemLinks.append(itemLink)
9675
offset = offset + 200
9776
print offset
77+
for itemLink in itemLinks:
78+
itemMetadataProcessed = []
79+
print itemLink
80+
metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json()
81+
for l in range (0, len (metadata)):
82+
metadata[l].pop('schema', None)
83+
metadata[l].pop('element', None)
84+
metadata[l].pop('qualifier', None)
85+
languageValue = metadata[l]['language']
86+
if metadata[l]['key'] == deletedKey:
87+
provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
88+
provNoteElement = {}
89+
provNoteElement['key'] = 'dc.description.provenance'
90+
provNoteElement['value'] = unicode(provNote)
91+
provNoteElement['language'] = 'en_US'
92+
itemMetadataProcessed.append(provNoteElement)
93+
else:
94+
itemMetadataProcessed.append(metadata[l])
95+
recordsEdited = recordsEdited + 1
96+
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
97+
print 'updated', itemLink, recordsEdited
98+
delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify)
99+
print delete
100+
post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
101+
print post
102+
f.writerow([itemLink]+[deletedKey]+[delete]+[post])
103+
98104

99105
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
100106

deleteKeyFromCommunity.py

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
print 'Editing Stage'
1717
else:
1818
print 'Editing Stage'
19-
19+
2020
parser = argparse.ArgumentParser()
2121
parser.add_argument('-k', '--deletedKey', help='the key to be deleted. optional - if not provided, the script will ask for input')
2222
parser.add_argument('-i', '--handle', help='handle of the community to retreive. optional - if not provided, the script will ask for input')
@@ -39,6 +39,7 @@
3939
password = secrets.password
4040
filePath = secrets.filePath
4141
verify = secrets.verify
42+
skippedCollections = secrets.skippedCollections
4243

4344
startTime = time.time()
4445
data = {'email':email,'password':password}
@@ -66,6 +67,7 @@
6667
offset = 0
6768
recordsEdited = 0
6869
items = ''
70+
itemLinks = []
6971
while items != []:
7072
endpoint = baseURL+'/rest/filtered-items?query_field[]='+deletedKey+'&query_op[]=exists&query_val[]='+collSels+'&limit=200&offset='+str(offset)
7173
print endpoint
@@ -74,32 +76,35 @@
7476
for item in items:
7577
itemMetadataProcessed = []
7678
itemLink = item['link']
77-
print itemLink
78-
metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json()
79-
for l in range (0, len (metadata)):
80-
metadata[l].pop('schema', None)
81-
metadata[l].pop('element', None)
82-
metadata[l].pop('qualifier', None)
83-
languageValue = metadata[l]['language']
84-
if metadata[l]['key'] == deletedKey:
85-
provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
86-
provNoteElement = {}
87-
provNoteElement['key'] = 'dc.description.provenance'
88-
provNoteElement['value'] = unicode(provNote)
89-
provNoteElement['language'] = 'en_US'
90-
itemMetadataProcessed.append(provNoteElement)
91-
else:
92-
itemMetadataProcessed.append(metadata[l])
93-
recordsEdited = recordsEdited + 1
94-
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
95-
print 'updated', itemLink, recordsEdited
96-
delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify)
97-
print delete
98-
post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
99-
print post
100-
f.writerow([itemLink]+[deletedKey]+[delete]+[post])
79+
itemLinks.append(itemLink)
10180
offset = offset + 200
10281
print offset
82+
for itemLink in itemLinks:
83+
itemMetadataProcessed = []
84+
print itemLink
85+
metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json()
86+
for l in range (0, len (metadata)):
87+
metadata[l].pop('schema', None)
88+
metadata[l].pop('element', None)
89+
metadata[l].pop('qualifier', None)
90+
languageValue = metadata[l]['language']
91+
if metadata[l]['key'] == deletedKey:
92+
provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
93+
provNoteElement = {}
94+
provNoteElement['key'] = 'dc.description.provenance'
95+
provNoteElement['value'] = unicode(provNote)
96+
provNoteElement['language'] = 'en_US'
97+
itemMetadataProcessed.append(provNoteElement)
98+
else:
99+
itemMetadataProcessed.append(metadata[l])
100+
recordsEdited = recordsEdited + 1
101+
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
102+
print 'updated', itemLink, recordsEdited
103+
delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify)
104+
print delete
105+
post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
106+
print post
107+
f.writerow([itemLink]+[deletedKey]+[delete]+[post])
103108

104109
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
105110

deleteKeyValuePairFromCollection.py

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
import csv
66
from datetime import datetime
77
import urllib3
8-
9-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
8+
import argparse
109

1110
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
1211
if secretsVersion != '':
@@ -18,15 +17,33 @@
1817
else:
1918
print 'Editing Stage'
2019

20+
parser = argparse.ArgumentParser()
21+
parser.add_argument('-k', '--deletedKey', help='the key to be deleted. optional - if not provided, the script will ask for input')
22+
parser.add_argument('-v', '--deletedValue', help='the value to be deleted. optional - if not provided, the script will ask for input')
23+
parser.add_argument('-i', '--handle', help='handle of the community to retreive. optional - if not provided, the script will ask for input')
24+
args = parser.parse_args()
25+
26+
if args.deletedKey:
27+
deletedKey = args.deletedKey
28+
else:
29+
deletedKey = raw_input('Enter the key to be deleted: ')
30+
if args.deletedValue:
31+
deletedValue = args.deletedValue
32+
else:
33+
deletedValue = raw_input('Enter the value to be deleted: ')
34+
if args.handle:
35+
handle = args.handle
36+
else:
37+
handle = raw_input('Enter collection handle: ')
38+
39+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
40+
2141
baseURL = secrets.baseURL
2242
email = secrets.email
2343
password = secrets.password
2444
filePath = secrets.filePath
2545
verify = secrets.verify
26-
27-
collectionHandle = raw_input('Enter collection handle: ')
28-
deletedKey = raw_input('Enter key to be deleted: ')
29-
deletedValue = raw_input('Enter value to be deleted: ')
46+
skippedCollections = secrets.skippedCollections
3047

3148
startTime = time.time()
3249
data = {'email':email,'password':password}
@@ -38,36 +55,36 @@
3855
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
3956
print 'authenticated'
4057

41-
itemList = []
42-
endpoint = baseURL+'/rest/handle/'+collectionHandle
58+
endpoint = baseURL+'/rest/handle/'+handle
4359
collection = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
4460
collectionID = collection['uuid']
61+
collSels = '&collSel[]=' + collectionID
62+
63+
f=csv.writer(open(filePath+'deletedKey'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
64+
f.writerow(['itemID']+['deletedKey']+['deletedValue']+['delete']+['post'])
65+
recordsEdited = 0
4566
offset = 0
4667
items = ''
68+
itemLinks = []
4769
while items != []:
48-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
49-
while items.status_code != 200:
50-
time.sleep(5)
51-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
52-
items = items.json()
53-
for k in range (0, len (items)):
54-
itemID = items[k]['uuid']
55-
itemList.append(itemID)
70+
endpoint = baseURL+'/rest/filtered-items?query_field[]='+deletedKey+'&query_op[]=exists&query_val[]='+collSels+'&limit=200&offset='+str(offset)
71+
print endpoint
72+
response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
73+
items = response['items']
74+
for item in items:
75+
itemMetadataProcessed = []
76+
itemLink = item['link']
77+
itemLinks.append(itemLink)
5678
offset = offset + 200
57-
elapsedTime = time.time() - startTime
58-
m, s = divmod(elapsedTime, 60)
59-
h, m = divmod(m, 60)
60-
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
61-
62-
recordsEdited = 0
63-
f=csv.writer(open(filePath+'deletedKey'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
64-
f.writerow(['itemID']+['deletedKey']+['deletedValue']+['delete']+['post'])
65-
for number, itemID in enumerate(itemList):
66-
itemsRemaining = len(itemList) - number
67-
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
68-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
79+
print offset
80+
for itemLink in itemLinks:
6981
itemMetadataProcessed = []
82+
print itemLink
83+
metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json()
7084
for l in range (0, len (metadata)):
85+
metadata[l].pop('schema', None)
86+
metadata[l].pop('element', None)
87+
metadata[l].pop('qualifier', None)
7188
if metadata[l]['key'] == deletedKey and metadata[l]['value'] == deletedValue:
7289
provNote = '\''+deletedKey+':'+deletedValue+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
7390
provNoteElement = {}
@@ -77,16 +94,15 @@
7794
itemMetadataProcessed.append(provNoteElement)
7895
else:
7996
itemMetadataProcessed.append(metadata[l])
80-
8197
if itemMetadataProcessed != metadata:
8298
recordsEdited = recordsEdited + 1
8399
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
84-
print 'updated', itemID, recordsEdited
85-
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify)
100+
print 'updated', itemLink, recordsEdited
101+
delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify)
86102
print delete
87-
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
103+
post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
88104
print post
89-
f.writerow([itemID]+[deletedKey]+[deletedValue]+[delete]+[post])
105+
f.writerow([itemLink]+[deletedKey]+[deletedValue]+[delete]+[post])
90106

91107
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
92108

editBitstreamsNames.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
password = secrets.password
3333
filePath = secrets.filePath
3434
verify = secrets.verify
35+
skippedCollections = secrets.skippedCollections
3536

3637
startTime = time.time()
3738
data = {'email':email,'password':password}

0 commit comments

Comments
 (0)