@@ -135,6 +135,16 @@ def split_key(entry):
135135def process_prod_tag (prod_tag , year = "2025" , ccdb_url = None , username = None ):
136136 base_path = f"/alice/sim/{ year } /{ prod_tag } "
137137
138+ workflow_files = alien_find (base_path , "workflow.json" )
139+ # exclude some unnecessary paths
140+ workflow_files = [
141+ zf for zf in workflow_files
142+ if "/AOD/" not in zf and "/QC/" not in zf and "/TimeseriesTPCmerging/" not in zf and "/Stage" not in zf
143+ ]
144+ # directories containing workflow.json
145+ workflow_dirs = {os .path .dirname (wf ) for wf in workflow_files }
146+ print (f"Found { len (workflow_dirs )} workflow dirs" )
147+
138148 # Step 1: find all log_archive.zip files
139149 print (f"Querying AliEn for all directories with zip files" )
140150 zip_files = alien_find (base_path , "log_archive.zip" )
@@ -144,10 +154,15 @@ def process_prod_tag(prod_tag, year="2025", ccdb_url=None, username=None):
144154 zf for zf in zip_files
145155 if "/AOD/" not in zf and "/QC/" not in zf and "/TimeseriesTPCmerging/" not in zf and "/Stage" not in zf
146156 ]
157+ zip_files_dirs = {os .path .dirname (zf ) for zf in zip_files }
158+ print (f"Found { len (zip_files_dirs )} zip dirs" )
159+
160+ # keep only zips in dirs where workflow.json also exists
161+ relevant_zips = [zf for zf in zip_files if os .path .dirname (zf ) in workflow_dirs ]
147162
148163 # Step 2: group by run_number
149164 runs = defaultdict (list )
150- for zf in zip_files :
165+ for zf in relevant_zips :
151166 parsed = parse_workflow_path (zf , prod_tag )
152167 if parsed is None :
153168 continue
0 commit comments