Skip to content

Commit 257b61c

Browse files
authored
Merge pull request #12 from MITLibraries/reconcile-command
reconcile command
2 parents 9cdc68b + 1957bdf commit 257b61c

File tree

3 files changed

+64
-8
lines changed

3 files changed

+64
-8
lines changed

dsaps/cli.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import csv
12
import datetime
23
import glob
34
import json
@@ -105,5 +106,44 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type,
105106
models.elapsed_time(start_time, 'Total runtime:')
106107

107108

109+
@main.command()
110+
@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
111+
help='The path of the CSV file of metadata.')
112+
@click.option('-f', '--file_path', prompt='Enter the path',
113+
help='The path of the content, a URL or local drive path.')
114+
@click.option('-t', '--file_type', prompt='Enter the file type',
115+
help='The file type to be uploaded.')
116+
def reconcile(metadata_csv, file_path, file_type):
117+
if file_path.startswith('http'):
118+
file_dict = models.build_file_dict_remote(file_path, file_type, {})
119+
else:
120+
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
121+
for file in files:
122+
file_name = os.path.splitext(os.path.basename(file))[0]
123+
file_dict[file_name] = file
124+
metadata_ids = []
125+
with open(metadata_csv) as csvfile:
126+
reader = csv.DictReader(csvfile)
127+
for row in reader:
128+
value = row['file_identifier']
129+
metadata_ids.append(value)
130+
file_matches = []
131+
file_ids = []
132+
for file_id, v in file_dict.items():
133+
file_ids.append(file_id)
134+
for metadata_id in [m for m in metadata_ids if file_id == m]:
135+
file_matches.append(file_id)
136+
metadata_matches = []
137+
for metadata_id in metadata_ids:
138+
for file_id in file_dict:
139+
if file_id == metadata_id:
140+
metadata_matches.append(metadata_id)
141+
no_files = set(metadata_ids) - set(metadata_matches)
142+
no_metadata = set(file_ids) - set(file_matches)
143+
models.create_csv_from_list(no_metadata, 'no_metadata.csv')
144+
models.create_csv_from_list(no_files, 'no_files.csv')
145+
models.create_csv_from_list(metadata_matches, 'metadata_matches.csv')
146+
147+
108148
if __name__ == '__main__':
109149
main()

dsaps/models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import csv
12
import datetime
23
from functools import partial
34
import operator
@@ -197,6 +198,15 @@ def build_file_dict_remote(directory_url, file_type, file_dict):
197198
return file_dict
198199

199200

201+
def create_csv_from_list(list_name, output):
202+
"""Creates CSV file from list content."""
203+
with open(output, 'w') as f:
204+
writer = csv.writer(f)
205+
writer.writerow(['id'])
206+
for item in list_name:
207+
writer.writerow([item])
208+
209+
200210
def elapsed_time(start_time, label):
201211
"""Calculate elapsed time."""
202212
td = datetime.timedelta(seconds=time.time() - start_time)

tests/test_models.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def sample_content(tmp_path):
2525

2626

2727
def test_authenticate(client):
28-
"""Test authenticate function."""
28+
"""Test authenticate method."""
2929
with requests_mock.Mocker() as m:
3030
email = 'test@test.mock'
3131
password = '1234'
@@ -39,7 +39,7 @@ def test_authenticate(client):
3939

4040

4141
def test_get_record(client):
42-
"""Test get_record function."""
42+
"""Test get_record method."""
4343
with requests_mock.Mocker() as m:
4444
uri = 'mock://example.com/items/123?expand=all'
4545
json_object = {'metadata': {'title': 'Sample title'}, 'type': 'item'}
@@ -49,7 +49,7 @@ def test_get_record(client):
4949

5050

5151
def test_filtered_item_search(client):
52-
"""Test filtered_item_search function."""
52+
"""Test filtered_item_search method."""
5353
with requests_mock.Mocker() as m:
5454
key = 'dc.title'
5555
string = 'test'
@@ -65,7 +65,7 @@ def test_filtered_item_search(client):
6565

6666

6767
def test_post_coll_to_comm(client):
68-
"""Test post_coll_to_comm function."""
68+
"""Test post_coll_to_comm method."""
6969
with requests_mock.Mocker() as m:
7070
comm_handle = '1234'
7171
coll_name = 'Test Collection'
@@ -79,7 +79,7 @@ def test_post_coll_to_comm(client):
7979

8080

8181
def test_post_items_to_coll(client, sample_content):
82-
"""Test post_items_to_coll function."""
82+
"""Test post_items_to_coll method."""
8383
with requests_mock.Mocker() as m:
8484
coll_metadata = [{"metadata": [
8585
{"key": "file_identifier",
@@ -102,7 +102,7 @@ def test_post_items_to_coll(client, sample_content):
102102

103103

104104
def test_post_bitstreams_to_item(client, sample_content):
105-
"""Test post_bitstreams_to_item function."""
105+
"""Test post_bitstreams_to_item method."""
106106
with requests_mock.Mocker() as m:
107107
item_id = 'a1b2'
108108
ingest_type = 'local'
@@ -118,7 +118,7 @@ def test_post_bitstreams_to_item(client, sample_content):
118118

119119

120120
def test__pop_inst(client):
121-
"""Test _pop_inst function."""
121+
"""Test _pop_inst method."""
122122
class_type = models.Collection
123123
rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []}
124124
rec_obj = client._pop_inst(class_type, rec_obj)
@@ -127,7 +127,7 @@ def test__pop_inst(client):
127127

128128

129129
def test__build_uuid_list(client):
130-
"""Test _build_uuid_list function."""
130+
"""Test _build_uuid_list method."""
131131
rec_obj = {'items': [{'uuid': '1234'}]}
132132
children = 'items'
133133
child_list = client._build_uuid_list(rec_obj, children)
@@ -149,3 +149,9 @@ def test_build_file_dict_remote():
149149
file_list = models.build_file_dict_remote(directory_url, file_type,
150150
file_dict)
151151
assert '999' in file_list
152+
153+
154+
# # How to test this? Applies to asaps as well
155+
# def test_create_csv_from_list():
156+
# """Test create_csv_from_list function."""
157+
# assert False

0 commit comments

Comments
 (0)