Skip to content

Commit c8d52b6

Browse files
committed
PR updates
1 parent 4c1d830 commit c8d52b6

File tree

7 files changed

+124
-131
lines changed

7 files changed

+124
-131
lines changed

dsaps/cli.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,17 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type,
9191
@main.command()
9292
@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
9393
help='The path of the CSV file of metadata.')
94+
@click.option('-o', '--output_path', prompt='Enter the output path',
95+
default='', help='The path of the output files, include '
96+
'/ at the end of the path')
9497
@click.option('-f', '--file_path', prompt='Enter the path',
95-
help='The path of the content, a URL or local drive path.')
98+
help='The path of the content, a URL or local drive path.'
99+
'Include / at the end of a local drive path.')
96100
@click.option('-t', '--file_type', prompt='Enter the file type',
97101
help='The file type to be uploaded.')
98-
def reconcile(metadata_csv, file_path, file_type):
99-
workflows.reconcile_files_and_metadata(metadata_csv, file_path, file_type)
102+
def reconcile(metadata_csv, file_path, file_type, output_path):
103+
workflows.reconcile_files_and_metadata(metadata_csv, output_path,
104+
file_path, file_type)
100105

101106

102107
@main.command()

dsaps/models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,11 @@ def post_bitstreams_to_item(self, item_id, file_identifier, file_dict,
128128
"""Post a sorted set of bitstreams to a specified item."""
129129
file_dict = collections.OrderedDict(sorted(file_dict.items()))
130130
for bitstream, v in file_dict.items():
131-
bit_id = self.post_bitstream(item_id, file_identifier, file_dict,
132-
ingest_type, bitstream)
131+
bit_id = self.post_bitstream(item_id, file_dict, ingest_type,
132+
bitstream)
133133
yield bit_id
134134

135-
def post_bitstream(self, item_id, file_identifier, file_dict, ingest_type,
135+
def post_bitstream(self, item_id, file_dict, ingest_type,
136136
bitstream):
137137
"""Post a bitstream to a specified item."""
138138
bitstream_path = file_dict[bitstream]

dsaps/workflows.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,17 @@
55
from dsaps import models
66

77

8-
def create_file_dict_and_list(file_path, file_type):
9-
"""Creates a dict of file IDs and file paths and a list of file IDs."""
8+
def create_file_dict(file_path, file_type):
9+
"""Creates a dict of file IDs and file paths."""
1010
if file_path.startswith('http'):
1111
file_dict = models.build_file_dict_remote(file_path, file_type, {})
1212
else:
1313
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
1414
file_dict = {}
15-
file_ids = []
1615
for file in files:
1716
file_name = os.path.splitext(os.path.basename(file))[0]
1817
file_dict[file_name] = file
19-
file_ids.append(file_name)
20-
return file_dict, file_ids
18+
return file_dict
2119

2220

2321
def create_metadata_id_list(metadata_csv):
@@ -31,7 +29,7 @@ def create_metadata_id_list(metadata_csv):
3129
return metadata_ids
3230

3331

34-
def match_files_to_metadata(file_dict, file_ids, metadata_ids):
32+
def match_files_to_metadata(file_dict, metadata_ids):
3533
"""Creates a list of files matched to metadata records."""
3634
file_matches = []
3735
for file_id, v in file_dict.items():
@@ -45,35 +43,37 @@ def match_metadata_to_files(file_dict, metadata_ids):
4543
"""Creates a list of metadata records matched to files."""
4644
metadata_matches = []
4745
for metadata_id in metadata_ids:
48-
for file_id in file_dict:
49-
if file_id.startswith(metadata_id):
50-
metadata_matches.append(metadata_id)
46+
for file_id in [f for f in file_dict
47+
if f.startswith(metadata_id)]:
48+
metadata_matches.append(metadata_id)
5149
return metadata_matches
5250

5351

54-
def reconcile_files_and_metadata(metadata_csv, file_path, file_type):
52+
def reconcile_files_and_metadata(metadata_csv, output_path, file_path,
53+
file_type):
5554
"""Runs a reconciliation of files and metadata."""
56-
file_dict, file_ids = create_file_dict_and_list(file_path, file_type)
55+
file_dict = create_file_dict(file_path, file_type)
56+
file_ids = file_dict.keys()
5757
metadata_ids = create_metadata_id_list(metadata_csv)
5858
metadata_matches = match_metadata_to_files(file_dict, metadata_ids)
59-
file_matches = match_files_to_metadata(file_dict, file_ids, metadata_ids)
59+
file_matches = match_files_to_metadata(file_dict, metadata_ids)
6060
no_files = set(metadata_ids) - set(metadata_matches)
6161
no_metadata = set(file_ids) - set(file_matches)
62-
models.create_csv_from_list(no_metadata, 'no_metadata')
63-
models.create_csv_from_list(no_files, 'no_files')
64-
models.create_csv_from_list(metadata_matches, 'metadata_matches')
65-
update_metadata_csv(metadata_csv, metadata_matches)
62+
models.create_csv_from_list(no_metadata, f'{output_path}no_metadata')
63+
models.create_csv_from_list(no_files, f'{output_path}no_files')
64+
models.create_csv_from_list(metadata_matches,
65+
f'{output_path}metadata_matches')
66+
update_metadata_csv(metadata_csv, output_path, metadata_matches)
6667

6768

68-
def update_metadata_csv(metadata_csv, metadata_matches):
69+
def update_metadata_csv(metadata_csv, output_path, metadata_matches):
6970
"""Creates an updated CSV of metadata records with matching files."""
7071
with open(metadata_csv) as csvfile:
7172
reader = csv.DictReader(csvfile)
7273
upd_md_file_name = f'updated-{os.path.basename(metadata_csv)}'
73-
with open(f'{upd_md_file_name}', 'w') as updated_csv:
74+
with open(f'{output_path}{upd_md_file_name}', 'w') as updated_csv:
7475
writer = csv.DictWriter(updated_csv, fieldnames=reader.fieldnames)
7576
writer.writeheader()
76-
csvfile.seek(0)
7777
for row in reader:
7878
if row['file_identifier'] in metadata_matches:
7979
writer.writerow(row)

tests/conftest.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from dsaps import models
88

99

10-
@pytest.fixture(autouse=True)
10+
@pytest.fixture()
1111
def client():
1212
client = models.Client('mock://example.com/')
1313
client.header = {}
@@ -17,7 +17,7 @@ def client():
1717

1818

1919
@pytest.fixture(autouse=True)
20-
def ds_mock():
20+
def web_mock():
2121
with requests_mock.Mocker() as m:
2222
cookies = {'JSESSIONID': '11111111'}
2323
m.post('mock://example.com/login', cookies=cookies)
@@ -44,27 +44,39 @@ def ds_mock():
4444
b_json_2 = {'uuid': 'e5f6'}
4545
url_2 = 'mock://example.com/items/a1b2/bitstreams?name=test_02.pdf'
4646
m.post(url_2, json=b_json_2)
47+
m.get('mock://remoteserver.com/files/test_01.pdf', content=b'')
4748
yield m
4849

4950

50-
@pytest.fixture(autouse=True)
51+
@pytest.fixture()
5152
def runner():
5253
return CliRunner()
5354

5455

55-
@pytest.fixture(autouse=True)
56-
def sample_files_dir(tmp_path):
57-
sample_files_dir = tmp_path / 'files'
58-
sample_files_dir.mkdir()
59-
with open(f'{sample_files_dir}/test_01.pdf', 'w'):
56+
@pytest.fixture()
57+
def input_dir(tmp_path):
58+
input_dir = tmp_path / 'files'
59+
input_dir.mkdir()
60+
input_2nd_lvl = input_dir / 'more_files'
61+
input_2nd_lvl.mkdir()
62+
with open(f'{input_dir}/test_01.pdf', 'w'):
6063
pass
61-
with open(f'{sample_files_dir}/test_02.pdf', 'w'):
64+
with open(f'{input_2nd_lvl}/test_02.pdf', 'w'):
6265
pass
63-
with open(f'{sample_files_dir}/best_01.pdf', 'w'):
66+
with open(f'{input_dir}/best_01.pdf', 'w'):
6467
pass
65-
with open(f'{sample_files_dir}/metadata.csv', 'w') as csvfile:
68+
with open(f'{input_dir}/test_01.jpg', 'w'):
69+
pass
70+
with open(f'{input_dir}/metadata.csv', 'w') as csvfile:
6671
writer = csv.writer(csvfile)
6772
writer.writerow(['uri'] + ['title'] + ['file_identifier'])
6873
writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test'])
6974
writer.writerow(['/repo/0/ao/456'] + ['Tast Item'] + ['tast'])
70-
return str(sample_files_dir)
75+
return str(f'{input_dir}/')
76+
77+
78+
@pytest.fixture()
79+
def output_dir(tmp_path):
80+
output_dir = tmp_path / 'output'
81+
output_dir.mkdir()
82+
return str(f'{output_dir}/')

tests/test_cli.py

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,17 @@
1-
import csv
2-
import os
3-
import requests_mock
4-
51
from dsaps.cli import main
62

73

8-
def test_reconcile(runner):
4+
def test_reconcile(runner, input_dir, output_dir):
95
"""Test reconcile command."""
10-
with requests_mock.Mocker() as m:
11-
with runner.isolated_filesystem():
12-
os.mkdir('files')
13-
with open('metadata.csv', 'w') as csvfile:
14-
writer = csv.writer(csvfile)
15-
writer.writerow(['uri'] + ['title'] + ['file_identifier'])
16-
writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test'])
17-
cookies = {'JSESSIONID': '11111111'}
18-
user_json = {'fullname': 'User Name'}
19-
m.post('mock://example.com/login', cookies=cookies)
20-
m.get('mock://example.com/status', json=user_json)
21-
result = runner.invoke(main,
22-
['--url', 'mock://example.com/',
23-
'--email', 'test@test.mock',
24-
'--password', '1234',
25-
'reconcile',
26-
'--metadata_csv', 'metadata.csv',
27-
'--file_path', 'files',
28-
'--file_type', 'pdf'
29-
])
6+
result = runner.invoke(main,
7+
['--url', 'mock://example.com/',
8+
'--email', 'test@test.mock',
9+
'--password', '1234',
10+
'reconcile',
11+
'--metadata_csv',
12+
f'{input_dir}/metadata.csv',
13+
'--file_path', 'files',
14+
'--file_type', 'pdf',
15+
'--output_path', f'{output_dir}'
16+
])
3017
assert result.exit_code == 0

tests/test_models.py

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_post_coll_to_comm(client):
4545
assert coll_id == '5678'
4646

4747

48-
def test_post_items_to_coll(client, sample_files_dir):
48+
def test_post_items_to_coll(client, input_dir):
4949
"""Test post_items_to_coll method."""
5050
coll_metadata = [{"metadata": [
5151
{"key": "file_identifier",
@@ -57,38 +57,35 @@ def test_post_items_to_coll(client, sample_files_dir):
5757
"value": "repo/0/ao/123"}]}]
5858
coll_id = '789'
5959
ingest_type = 'local'
60-
file_dict = {'test_01': f'{sample_files_dir}/test_01.pdf'}
60+
file_dict = {'test_01': f'{input_dir}test_01.pdf'}
6161
item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict,
6262
ingest_type)
6363
for item_id in item_ids:
6464
assert 'a1b2' == item_id
6565

6666

67-
def test_post_bitstreams_to_item(client, sample_files_dir):
67+
def test_post_bitstreams_to_item(client, input_dir):
6868
"""Test post_bitstreams_to_item method."""
6969
item_id = 'a1b2'
7070
ingest_type = 'local'
7171
file_identifier = '123'
72-
file_dict = {'test_02': f'{sample_files_dir}/test_02.pdf',
73-
'test_01': f'{sample_files_dir}/test_01.pdf'}
72+
file_dict = {'test_02': f'{input_dir}more_files/test_02.pdf',
73+
'test_01': f'{input_dir}test_01.pdf'}
7474
bit_ids = client.post_bitstreams_to_item(item_id, file_identifier,
7575
file_dict, ingest_type)
76-
bit_ids_output = []
77-
for bit_id in bit_ids:
78-
bit_ids_output.append(bit_id)
79-
assert bit_ids_output[0] == 'c3d4'
80-
assert bit_ids_output[1] == 'e5f6'
76+
assert next(bit_ids) == 'c3d4'
77+
assert next(bit_ids) == 'e5f6'
8178

8279

83-
def test_post_bitstream(client, sample_files_dir):
80+
def test_post_bitstream(client, input_dir):
8481
"""Test post_bitstream method."""
8582
item_id = 'a1b2'
86-
ingest_type = 'local'
87-
file_identifier = '123'
88-
file_dict = {'test_01': f'{sample_files_dir}/test_01.pdf'}
83+
file_dict = {'test_01': f'{input_dir}test_01.pdf'}
8984
bitstream = 'test_01'
90-
bit_id = client.post_bitstream(item_id, file_identifier, file_dict,
91-
ingest_type, bitstream)
85+
bit_id = client.post_bitstream(item_id, file_dict, 'local', bitstream)
86+
assert 'c3d4' == bit_id
87+
file_dict = {'test_01': 'mock://remoteserver.com/files/test_01.pdf'}
88+
bit_id = client.post_bitstream(item_id, file_dict, 'remote', bitstream)
9289
assert 'c3d4' == bit_id
9390

9491

@@ -126,15 +123,14 @@ def test_build_file_dict_remote():
126123
assert '999' in file_list
127124

128125

129-
def test_create_csv_from_list(runner):
126+
def test_create_csv_from_list(output_dir):
130127
"""Test create_csv_from_list function."""
131-
with runner.isolated_filesystem():
132-
list_name = ['123']
133-
models.create_csv_from_list(list_name, 'output')
134-
with open('output.csv') as csvfile:
135-
reader = csv.DictReader(csvfile)
136-
for row in reader:
137-
assert row['id'] == '123'
128+
list_name = ['123']
129+
models.create_csv_from_list(list_name, f'{output_dir}output')
130+
with open(f'{output_dir}output.csv') as csvfile:
131+
reader = csv.DictReader(csvfile)
132+
for row in reader:
133+
assert row['id'] == '123'
138134

139135

140136
def test_metadata_elems_from_row():

0 commit comments

Comments
 (0)