PR updates

ehanson8 · ehanson8 · commit c8d52b6617d3 · 2021-03-18T15:21:40.000-04:00
diff --git a/dsaps/cli.py b/dsaps/cli.py
@@ -91,12 +91,17 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type,
 @main.command()
 @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
               help='The path of the CSV file of metadata.')
+@click.option('-o', '--output_path', prompt='Enter the output path',
+              default='', help='The path of the output files, include '
+              '/ at the end of the path')
 @click.option('-f', '--file_path', prompt='Enter the path',
-              help='The path of the content, a URL or local drive path.')
+              help='The path of the content, a URL or local drive path.'
+              'Include / at the end of a local drive path.')
 @click.option('-t', '--file_type', prompt='Enter the file type',
               help='The file type to be uploaded.')
-def reconcile(metadata_csv, file_path, file_type):
-    workflows.reconcile_files_and_metadata(metadata_csv, file_path, file_type)
+def reconcile(metadata_csv, file_path, file_type, output_path):
+    workflows.reconcile_files_and_metadata(metadata_csv, output_path,
+                                           file_path, file_type)
 
 
 @main.command()
diff --git a/dsaps/models.py b/dsaps/models.py
@@ -128,11 +128,11 @@ def post_bitstreams_to_item(self, item_id, file_identifier, file_dict,
         """Post a sorted set of bitstreams to a specified item."""
         file_dict = collections.OrderedDict(sorted(file_dict.items()))
         for bitstream, v in file_dict.items():
-            bit_id = self.post_bitstream(item_id, file_identifier, file_dict,
-                                         ingest_type, bitstream)
+            bit_id = self.post_bitstream(item_id, file_dict, ingest_type,
+                                         bitstream)
             yield bit_id
 
-    def post_bitstream(self, item_id, file_identifier, file_dict, ingest_type,
+    def post_bitstream(self, item_id, file_dict, ingest_type,
                        bitstream):
         """Post a bitstream to a specified item."""
         bitstream_path = file_dict[bitstream]
diff --git a/dsaps/workflows.py b/dsaps/workflows.py
@@ -5,19 +5,17 @@
 from dsaps import models
 
 
-def create_file_dict_and_list(file_path, file_type):
-    """Creates a dict of file IDs and file paths and a list of file IDs."""
+def create_file_dict(file_path, file_type):
+    """Creates a dict of file IDs and file paths."""
     if file_path.startswith('http'):
         file_dict = models.build_file_dict_remote(file_path, file_type, {})
     else:
         files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
         file_dict = {}
-        file_ids = []
         for file in files:
             file_name = os.path.splitext(os.path.basename(file))[0]
             file_dict[file_name] = file
-            file_ids.append(file_name)
-    return file_dict, file_ids
+    return file_dict
 
 
 def create_metadata_id_list(metadata_csv):
@@ -31,7 +29,7 @@ def create_metadata_id_list(metadata_csv):
     return metadata_ids
 
 
-def match_files_to_metadata(file_dict, file_ids, metadata_ids):
+def match_files_to_metadata(file_dict, metadata_ids):
     """Creates a list of files matched to metadata records."""
     file_matches = []
     for file_id, v in file_dict.items():
@@ -45,35 +43,37 @@ def match_metadata_to_files(file_dict, metadata_ids):
     """Creates a list of metadata records matched to files."""
     metadata_matches = []
     for metadata_id in metadata_ids:
-        for file_id in file_dict:
-            if file_id.startswith(metadata_id):
-                metadata_matches.append(metadata_id)
+        for file_id in [f for f in file_dict
+                        if f.startswith(metadata_id)]:
+            metadata_matches.append(metadata_id)
     return metadata_matches
 
 
-def reconcile_files_and_metadata(metadata_csv, file_path, file_type):
+def reconcile_files_and_metadata(metadata_csv, output_path, file_path,
+                                 file_type):
     """Runs a reconciliation of files and metadata."""
-    file_dict, file_ids = create_file_dict_and_list(file_path, file_type)
+    file_dict = create_file_dict(file_path, file_type)
+    file_ids = file_dict.keys()
     metadata_ids = create_metadata_id_list(metadata_csv)
     metadata_matches = match_metadata_to_files(file_dict, metadata_ids)
-    file_matches = match_files_to_metadata(file_dict, file_ids, metadata_ids)
+    file_matches = match_files_to_metadata(file_dict, metadata_ids)
     no_files = set(metadata_ids) - set(metadata_matches)
     no_metadata = set(file_ids) - set(file_matches)
-    models.create_csv_from_list(no_metadata, 'no_metadata')
-    models.create_csv_from_list(no_files, 'no_files')
-    models.create_csv_from_list(metadata_matches, 'metadata_matches')
-    update_metadata_csv(metadata_csv, metadata_matches)
+    models.create_csv_from_list(no_metadata, f'{output_path}no_metadata')
+    models.create_csv_from_list(no_files, f'{output_path}no_files')
+    models.create_csv_from_list(metadata_matches,
+                                f'{output_path}metadata_matches')
+    update_metadata_csv(metadata_csv, output_path, metadata_matches)
 
 
-def update_metadata_csv(metadata_csv, metadata_matches):
+def update_metadata_csv(metadata_csv, output_path, metadata_matches):
     """Creates an updated CSV of metadata records with matching files."""
     with open(metadata_csv) as csvfile:
         reader = csv.DictReader(csvfile)
         upd_md_file_name = f'updated-{os.path.basename(metadata_csv)}'
-        with open(f'{upd_md_file_name}', 'w') as updated_csv:
+        with open(f'{output_path}{upd_md_file_name}', 'w') as updated_csv:
             writer = csv.DictWriter(updated_csv, fieldnames=reader.fieldnames)
             writer.writeheader()
-            csvfile.seek(0)
             for row in reader:
                 if row['file_identifier'] in metadata_matches:
                     writer.writerow(row)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -7,7 +7,7 @@
 from dsaps import models
 
 
-@pytest.fixture(autouse=True)
+@pytest.fixture()
 def client():
     client = models.Client('mock://example.com/')
     client.header = {}
@@ -17,7 +17,7 @@ def client():
 
 
 @pytest.fixture(autouse=True)
-def ds_mock():
+def web_mock():
     with requests_mock.Mocker() as m:
         cookies = {'JSESSIONID': '11111111'}
         m.post('mock://example.com/login', cookies=cookies)
@@ -44,27 +44,39 @@ def ds_mock():
         b_json_2 = {'uuid': 'e5f6'}
         url_2 = 'mock://example.com/items/a1b2/bitstreams?name=test_02.pdf'
         m.post(url_2, json=b_json_2)
+        m.get('mock://remoteserver.com/files/test_01.pdf', content=b'')
         yield m
 
 
-@pytest.fixture(autouse=True)
+@pytest.fixture()
 def runner():
     return CliRunner()
 
 
-@pytest.fixture(autouse=True)
-def sample_files_dir(tmp_path):
-    sample_files_dir = tmp_path / 'files'
-    sample_files_dir.mkdir()
-    with open(f'{sample_files_dir}/test_01.pdf', 'w'):
+@pytest.fixture()
+def input_dir(tmp_path):
+    input_dir = tmp_path / 'files'
+    input_dir.mkdir()
+    input_2nd_lvl = input_dir / 'more_files'
+    input_2nd_lvl.mkdir()
+    with open(f'{input_dir}/test_01.pdf', 'w'):
         pass
-    with open(f'{sample_files_dir}/test_02.pdf', 'w'):
+    with open(f'{input_2nd_lvl}/test_02.pdf', 'w'):
         pass
-    with open(f'{sample_files_dir}/best_01.pdf', 'w'):
+    with open(f'{input_dir}/best_01.pdf', 'w'):
         pass
-    with open(f'{sample_files_dir}/metadata.csv', 'w') as csvfile:
+    with open(f'{input_dir}/test_01.jpg', 'w'):
+        pass
+    with open(f'{input_dir}/metadata.csv', 'w') as csvfile:
         writer = csv.writer(csvfile)
         writer.writerow(['uri'] + ['title'] + ['file_identifier'])
         writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test'])
         writer.writerow(['/repo/0/ao/456'] + ['Tast Item'] + ['tast'])
-    return str(sample_files_dir)
+    return str(f'{input_dir}/')
+
+
+@pytest.fixture()
+def output_dir(tmp_path):
+    output_dir = tmp_path / 'output'
+    output_dir.mkdir()
+    return str(f'{output_dir}/')
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,30 +1,17 @@
-import csv
-import os
-import requests_mock
-
 from dsaps.cli import main
 
 
-def test_reconcile(runner):
+def test_reconcile(runner, input_dir, output_dir):
     """Test reconcile command."""
-    with requests_mock.Mocker() as m:
-        with runner.isolated_filesystem():
-            os.mkdir('files')
-            with open('metadata.csv', 'w') as csvfile:
-                writer = csv.writer(csvfile)
-                writer.writerow(['uri'] + ['title'] + ['file_identifier'])
-                writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test'])
-            cookies = {'JSESSIONID': '11111111'}
-            user_json = {'fullname': 'User Name'}
-            m.post('mock://example.com/login', cookies=cookies)
-            m.get('mock://example.com/status', json=user_json)
-            result = runner.invoke(main,
-                                   ['--url', 'mock://example.com/',
-                                    '--email', 'test@test.mock',
-                                    '--password', '1234',
-                                    'reconcile',
-                                    '--metadata_csv', 'metadata.csv',
-                                    '--file_path', 'files',
-                                    '--file_type', 'pdf'
-                                    ])
+    result = runner.invoke(main,
+                           ['--url', 'mock://example.com/',
+                            '--email', 'test@test.mock',
+                            '--password', '1234',
+                            'reconcile',
+                            '--metadata_csv',
+                            f'{input_dir}/metadata.csv',
+                            '--file_path', 'files',
+                            '--file_type', 'pdf',
+                            '--output_path', f'{output_dir}'
+                            ])
     assert result.exit_code == 0
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -45,7 +45,7 @@ def test_post_coll_to_comm(client):
     assert coll_id == '5678'
 
 
-def test_post_items_to_coll(client, sample_files_dir):
+def test_post_items_to_coll(client, input_dir):
     """Test post_items_to_coll method."""
     coll_metadata = [{"metadata": [
                      {"key": "file_identifier",
@@ -57,38 +57,35 @@ def test_post_items_to_coll(client, sample_files_dir):
                       "value": "repo/0/ao/123"}]}]
     coll_id = '789'
     ingest_type = 'local'
-    file_dict = {'test_01': f'{sample_files_dir}/test_01.pdf'}
+    file_dict = {'test_01': f'{input_dir}test_01.pdf'}
     item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict,
                                          ingest_type)
     for item_id in item_ids:
         assert 'a1b2' == item_id
 
 
-def test_post_bitstreams_to_item(client, sample_files_dir):
+def test_post_bitstreams_to_item(client, input_dir):
     """Test post_bitstreams_to_item method."""
     item_id = 'a1b2'
     ingest_type = 'local'
     file_identifier = '123'
-    file_dict = {'test_02': f'{sample_files_dir}/test_02.pdf',
-                 'test_01': f'{sample_files_dir}/test_01.pdf'}
+    file_dict = {'test_02': f'{input_dir}more_files/test_02.pdf',
+                 'test_01': f'{input_dir}test_01.pdf'}
     bit_ids = client.post_bitstreams_to_item(item_id, file_identifier,
                                              file_dict, ingest_type)
-    bit_ids_output = []
-    for bit_id in bit_ids:
-        bit_ids_output.append(bit_id)
-    assert bit_ids_output[0] == 'c3d4'
-    assert bit_ids_output[1] == 'e5f6'
+    assert next(bit_ids) == 'c3d4'
+    assert next(bit_ids) == 'e5f6'
 
 
-def test_post_bitstream(client, sample_files_dir):
+def test_post_bitstream(client, input_dir):
     """Test post_bitstream method."""
     item_id = 'a1b2'
-    ingest_type = 'local'
-    file_identifier = '123'
-    file_dict = {'test_01': f'{sample_files_dir}/test_01.pdf'}
+    file_dict = {'test_01': f'{input_dir}test_01.pdf'}
     bitstream = 'test_01'
-    bit_id = client.post_bitstream(item_id, file_identifier, file_dict,
-                                   ingest_type, bitstream)
+    bit_id = client.post_bitstream(item_id, file_dict, 'local', bitstream)
+    assert 'c3d4' == bit_id
+    file_dict = {'test_01': 'mock://remoteserver.com/files/test_01.pdf'}
+    bit_id = client.post_bitstream(item_id, file_dict, 'remote', bitstream)
     assert 'c3d4' == bit_id
 
 
@@ -126,15 +123,14 @@ def test_build_file_dict_remote():
         assert '999' in file_list
 
 
-def test_create_csv_from_list(runner):
+def test_create_csv_from_list(output_dir):
     """Test create_csv_from_list function."""
-    with runner.isolated_filesystem():
-        list_name = ['123']
-        models.create_csv_from_list(list_name, 'output')
-        with open('output.csv') as csvfile:
-            reader = csv.DictReader(csvfile)
-            for row in reader:
-                assert row['id'] == '123'
+    list_name = ['123']
+    models.create_csv_from_list(list_name, f'{output_dir}output')
+    with open(f'{output_dir}output.csv') as csvfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            assert row['id'] == '123'
 
 
 def test_metadata_elems_from_row():
diff --git a/tests/test_workflows.py b/tests/test_workflows.py