55from dsaps import models
66
77
8- def create_file_dict_and_list (file_path , file_type ):
9- """Creates a dict of file IDs and file paths and a list of file IDs ."""
8+ def create_file_dict (file_path , file_type ):
9+ """Creates a dict of file IDs and file paths."""
1010 if file_path .startswith ('http' ):
1111 file_dict = models .build_file_dict_remote (file_path , file_type , {})
1212 else :
1313 files = glob .glob (f'{ file_path } /**/*.{ file_type } ' , recursive = True )
1414 file_dict = {}
15- file_ids = []
1615 for file in files :
1716 file_name = os .path .splitext (os .path .basename (file ))[0 ]
1817 file_dict [file_name ] = file
19- file_ids .append (file_name )
20- return file_dict , file_ids
18+ return file_dict
2119
2220
2321def create_metadata_id_list (metadata_csv ):
@@ -31,7 +29,7 @@ def create_metadata_id_list(metadata_csv):
3129 return metadata_ids
3230
3331
34- def match_files_to_metadata (file_dict , file_ids , metadata_ids ):
32+ def match_files_to_metadata (file_dict , metadata_ids ):
3533 """Creates a list of files matched to metadata records."""
3634 file_matches = []
3735 for file_id , v in file_dict .items ():
@@ -45,35 +43,37 @@ def match_metadata_to_files(file_dict, metadata_ids):
4543 """Creates a list of metadata records matched to files."""
4644 metadata_matches = []
4745 for metadata_id in metadata_ids :
48- for file_id in file_dict :
49- if file_id .startswith (metadata_id ):
50- metadata_matches .append (metadata_id )
46+ for file_id in [ f for f in file_dict
47+ if f .startswith (metadata_id )] :
48+ metadata_matches .append (metadata_id )
5149 return metadata_matches
5250
5351
54- def reconcile_files_and_metadata (metadata_csv , file_path , file_type ):
52+ def reconcile_files_and_metadata (metadata_csv , output_path , file_path ,
53+ file_type ):
5554 """Runs a reconciliation of files and metadata."""
56- file_dict , file_ids = create_file_dict_and_list (file_path , file_type )
55+ file_dict = create_file_dict (file_path , file_type )
56+ file_ids = file_dict .keys ()
5757 metadata_ids = create_metadata_id_list (metadata_csv )
5858 metadata_matches = match_metadata_to_files (file_dict , metadata_ids )
59- file_matches = match_files_to_metadata (file_dict , file_ids , metadata_ids )
59+ file_matches = match_files_to_metadata (file_dict , metadata_ids )
6060 no_files = set (metadata_ids ) - set (metadata_matches )
6161 no_metadata = set (file_ids ) - set (file_matches )
62- models .create_csv_from_list (no_metadata , 'no_metadata' )
63- models .create_csv_from_list (no_files , 'no_files' )
64- models .create_csv_from_list (metadata_matches , 'metadata_matches' )
65- update_metadata_csv (metadata_csv , metadata_matches )
62+ models .create_csv_from_list (no_metadata , f'{ output_path } no_metadata' )
63+ models .create_csv_from_list (no_files , f'{ output_path } no_files' )
64+ models .create_csv_from_list (metadata_matches ,
65+ f'{ output_path } metadata_matches' )
66+ update_metadata_csv (metadata_csv , output_path , metadata_matches )
6667
6768
68- def update_metadata_csv (metadata_csv , metadata_matches ):
69+ def update_metadata_csv (metadata_csv , output_path , metadata_matches ):
6970 """Creates an updated CSV of metadata records with matching files."""
7071 with open (metadata_csv ) as csvfile :
7172 reader = csv .DictReader (csvfile )
7273 upd_md_file_name = f'updated-{ os .path .basename (metadata_csv )} '
73- with open (f'{ upd_md_file_name } ' , 'w' ) as updated_csv :
74+ with open (f'{ output_path } { upd_md_file_name } ' , 'w' ) as updated_csv :
7475 writer = csv .DictWriter (updated_csv , fieldnames = reader .fieldnames )
7576 writer .writeheader ()
76- csvfile .seek (0 )
7777 for row in reader :
7878 if row ['file_identifier' ] in metadata_matches :
7979 writer .writerow (row )
0 commit comments