Skip to content

Commit 3a0c812

Browse files
committed
Restrict search space in MCProd harvester tool
1 parent b2a9dcc commit 3a0c812

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

MC/prodinfo/mcprodinfo_harvester.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,16 @@ def split_key(entry):
135135
def process_prod_tag(prod_tag, year="2025", ccdb_url=None, username=None):
136136
base_path = f"/alice/sim/{year}/{prod_tag}"
137137

138+
workflow_files = alien_find(base_path, "workflow.json")
139+
# exclude some unnecessary paths
140+
workflow_files = [
141+
zf for zf in workflow_files
142+
if "/AOD/" not in zf and "/QC/" not in zf and "/TimeseriesTPCmerging/" not in zf and "/Stage" not in zf
143+
]
144+
# directories containing workflow.json
145+
workflow_dirs = {os.path.dirname(wf) for wf in workflow_files}
146+
print (f"Found {len(workflow_dirs)} workflow dirs")
147+
138148
# Step 1: find all log_archive.zip files
139149
print (f"Querying AliEn for all directories with zip files")
140150
zip_files = alien_find(base_path, "log_archive.zip")
@@ -144,10 +154,15 @@ def process_prod_tag(prod_tag, year="2025", ccdb_url=None, username=None):
144154
zf for zf in zip_files
145155
if "/AOD/" not in zf and "/QC/" not in zf and "/TimeseriesTPCmerging/" not in zf and "/Stage" not in zf
146156
]
157+
zip_files_dirs = {os.path.dirname(zf) for zf in zip_files}
158+
print (f"Found {len(zip_files_dirs)} zip dirs")
159+
160+
# keep only zips in dirs where workflow.json also exists
161+
relevant_zips = [zf for zf in zip_files if os.path.dirname(zf) in workflow_dirs]
147162

148163
# Step 2: group by run_number
149164
runs = defaultdict(list)
150-
for zf in zip_files:
165+
for zf in relevant_zips:
151166
parsed = parse_workflow_path(zf, prod_tag)
152167
if parsed is None:
153168
continue

0 commit comments

Comments
 (0)