Skip to content

Commit cee54bd

Browse files
committed
Adding utility to remove duplicate BCs from a MC-AO2D
This should fix the situation discussed in https://its.cern.ch/jira/browse/O2-6227 Also apply the processing step as part of the O2DPG MC chain, so as to avoid the problem in the future.
1 parent a53c0bd commit cee54bd

File tree

2 files changed

+310
-1
lines changed

2 files changed

+310
-1
lines changed

MC/bin/o2dpg_sim_workflow.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1950,9 +1950,12 @@ def remove_json_prefix(path):
19501950
AOD_merge_task = createTask(name='aodmerge', needs = aodmergerneeds, lab=["AOD"], mem='2000', cpu='1')
19511951
AOD_merge_task['cmd'] = ' set -e ; [ -f aodmerge_input.txt ] && rm aodmerge_input.txt; '
19521952
AOD_merge_task['cmd'] += ' for i in `seq 1 ' + str(NTIMEFRAMES) + '`; do echo "tf${i}/AO2D.root" >> aodmerge_input.txt; done; '
1953-
AOD_merge_task['cmd'] += ' o2-aod-merger --input aodmerge_input.txt --output AO2D.root'
1953+
AOD_merge_task['cmd'] += ' o2-aod-merger --input aodmerge_input.txt --output AO2D_pre.root'
19541954
# produce MonaLisa event stat file
19551955
AOD_merge_task['cmd'] += ' ; ${O2DPG_ROOT}/MC/bin/o2dpg_determine_eventstat.py'
1956+
# reindex the BC + connected tables because it there could be duplicate BC entries due to the orbit-early treatment
1957+
# see https://its.cern.ch/jira/browse/O2-6227
1958+
AOD_merge_task['cmd'] += ' ; root -q -b -l "${O2DPG_ROOT}/MC/utils/AODBcRewriter.C(\\\"AO2D_pre.root\\\",\\\"AO2D.root\\\")"'
19561959
AOD_merge_task['alternative_alienv_package'] = "None" # we want latest software for this step
19571960
workflow['stages'].append(AOD_merge_task)
19581961

MC/utils/AODBcRewriter.C

Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
// A utility to remove duplicate bunch crossing entries
2+
// from the O2bc_ table/TTree and to adjust all tables refering
3+
// to fIndexBC.
4+
// Situations of duplicate BCs can arise in O2DPG MC and are harder to avoid
5+
// directly in the AO2D creation. This tool provides a convenient
6+
// post-processing step to rectify the situation.
7+
// The tool might need adjustment whenever the AO2D data format changes, for
8+
// instance when new tables are added which are directly joinable to the BC
9+
// table.
10+
11+
// started by sandro.wenzel@cern.ch August 2025
12+
13+
// Usage: root -l -b -q 'AODBcRewriter.C("input.root","output.root")'
14+
15+
#if !defined(__CLING__) || defined(__ROOTCLING__)
16+
#include "TBranch.h"
17+
#include "TDirectory.h"
18+
#include "TFile.h"
19+
#include "TKey.h"
20+
#include "TLeaf.h"
21+
#include "TString.h"
22+
#include "TTree.h"
23+
24+
#include <algorithm>
25+
#include <iostream>
26+
#include <memory>
27+
#include <numeric>
28+
#include <unordered_map>
29+
#include <vector>
30+
#endif
31+
32+
/// Helper to manage branch buffers and copying
33+
struct BranchHandler {
34+
std::string name;
35+
std::string type;
36+
void *inBuf = nullptr;
37+
void *outBuf = nullptr;
38+
TBranch *inBranch = nullptr;
39+
TBranch *outBranch = nullptr;
40+
41+
BranchHandler(TBranch *br, TTree *outTree = nullptr) {
42+
inBranch = br;
43+
name = br->GetName();
44+
TLeaf *leaf = (TLeaf *)br->GetListOfLeaves()->At(0);
45+
type = leaf->GetTypeName();
46+
47+
if (type == "Int_t") {
48+
inBuf = new Int_t;
49+
if (outTree) {
50+
outBuf = new Int_t;
51+
outBranch = outTree->Branch(name.c_str(), (Int_t *)outBuf);
52+
}
53+
} else if (type == "ULong64_t") {
54+
inBuf = new ULong64_t;
55+
if (outTree) {
56+
outBuf = new ULong64_t;
57+
outBranch = outTree->Branch(name.c_str(), (ULong64_t *)outBuf);
58+
}
59+
} else if (type == "UChar_t") {
60+
inBuf = new UChar_t;
61+
if (outTree) {
62+
outBuf = new UChar_t;
63+
outBranch = outTree->Branch(name.c_str(), (UChar_t *)outBuf);
64+
}
65+
} else {
66+
std::cerr << "Unsupported type " << type << " for branch " << name
67+
<< std::endl;
68+
}
69+
if (inBuf)
70+
inBranch->SetAddress(inBuf);
71+
}
72+
73+
void copyValue() {
74+
if (inBuf && outBuf) {
75+
TLeaf *leaf = (TLeaf *)inBranch->GetListOfLeaves()->At(0);
76+
size_t sz = leaf->GetLenType();
77+
memcpy(outBuf, inBuf, sz);
78+
}
79+
}
80+
81+
~BranchHandler() { deleteBuffer(); }
82+
83+
void deleteBuffer() {
84+
if (type == "Int_t") {
85+
delete (Int_t *)inBuf;
86+
delete (Int_t *)outBuf;
87+
} else if (type == "ULong64_t") {
88+
delete (ULong64_t *)inBuf;
89+
delete (ULong64_t *)outBuf;
90+
} else if (type == "UChar_t") {
91+
delete (UChar_t *)inBuf;
92+
delete (UChar_t *)outBuf;
93+
}
94+
inBuf = outBuf = nullptr;
95+
}
96+
};
97+
98+
/// Copy any TObject (tree or dir handled recursively)
99+
void copyObject(TObject *obj, TDirectory *outDir) {
100+
if (!obj || !outDir)
101+
return;
102+
outDir->cd();
103+
if (obj->InheritsFrom("TDirectory")) {
104+
TDirectory *srcDir = (TDirectory *)obj;
105+
std::cout << "📂 Copying directory: " << srcDir->GetName() << std::endl;
106+
TDirectory *newDir = outDir->mkdir(srcDir->GetName());
107+
TIter nextKey(srcDir->GetListOfKeys());
108+
TKey *key;
109+
while ((key = (TKey *)nextKey())) {
110+
TObject *subObj = key->ReadObj();
111+
copyObject(subObj, newDir);
112+
}
113+
} else if (obj->InheritsFrom("TTree")) {
114+
TTree *t = (TTree *)obj;
115+
std::cout << " Copying untouched TTree: " << t->GetName() << std::endl;
116+
TTree *tnew = t->CloneTree(-1, "fast");
117+
tnew->SetDirectory(outDir);
118+
tnew->Write();
119+
} else {
120+
std::cout << " Copying object: " << obj->GetName() << " ["
121+
<< obj->ClassName() << "]" << std::endl;
122+
obj->Write();
123+
}
124+
}
125+
126+
/// Process one DF_* directory
127+
void processDF(TDirectory *dirIn, TDirectory *dirOut) {
128+
std::cout << "\n===================================================="
129+
<< std::endl;
130+
std::cout << "▶ Processing DF folder: " << dirIn->GetName() << std::endl;
131+
132+
TTree *treeBCs = nullptr;
133+
TTree *treeFlags = nullptr;
134+
std::vector<TTree *> treesWithBCid;
135+
std::vector<TObject *> otherObjects;
136+
137+
// Inspect folder contents
138+
for (auto subkeyObj : *(dirIn->GetListOfKeys())) {
139+
TKey *subkey = (TKey *)subkeyObj;
140+
TObject *obj = dirIn->Get(subkey->GetName());
141+
if (obj->InheritsFrom(TTree::Class())) {
142+
TTree *tree = (TTree *)obj;
143+
TString tname = tree->GetName();
144+
if (tname.BeginsWith("O2bc_")) {
145+
treeBCs = tree;
146+
std::cout << " Found O2bc: " << tname << std::endl;
147+
} else if (tname == "O2bcflag") {
148+
// this is a special table as it is directly joinable to O2bc
149+
// according to the datamodel
150+
treeFlags = tree;
151+
std::cout << " Found O2bcflag" << std::endl;
152+
} else if (tree->GetBranch("fIndexBCs")) {
153+
treesWithBCid.push_back(tree);
154+
std::cout << " Needs reindex: " << tname << std::endl;
155+
} else {
156+
otherObjects.push_back(tree);
157+
std::cout << " Unaffected TTree: " << tname << std::endl;
158+
}
159+
} else {
160+
otherObjects.push_back(obj);
161+
}
162+
}
163+
164+
if (!treeBCs) {
165+
std::cout << "⚠ No O2bc found in " << dirIn->GetName()
166+
<< " → just copying objects" << std::endl;
167+
for (auto obj : otherObjects) {
168+
copyObject(obj, dirOut);
169+
}
170+
return;
171+
}
172+
173+
// Read fGlobalBC values
174+
ULong64_t fGlobalBC;
175+
treeBCs->SetBranchAddress("fGlobalBC", &fGlobalBC);
176+
std::vector<ULong64_t> originalBCs(treeBCs->GetEntries());
177+
for (Long64_t i = 0; i < treeBCs->GetEntries(); i++) {
178+
treeBCs->GetEntry(i);
179+
originalBCs[i] = fGlobalBC;
180+
}
181+
182+
std::cout << " O2bc entries: " << originalBCs.size() << std::endl;
183+
184+
// Build mapping
185+
std::vector<Int_t> indexMap(originalBCs.size(), -1);
186+
std::vector<ULong64_t> uniqueBCs;
187+
std::vector<size_t> order(originalBCs.size());
188+
std::iota(order.begin(), order.end(), 0);
189+
std::sort(order.begin(), order.end(), [&](size_t a, size_t b) {
190+
return originalBCs[a] < originalBCs[b];
191+
});
192+
Int_t newIdx = -1;
193+
ULong64_t prevVal = -1;
194+
std::unordered_map<size_t, std::vector<size_t>> newIndexOrigins;
195+
for (auto oldIdx : order) {
196+
ULong64_t val = originalBCs[oldIdx];
197+
if (newIdx < 0 || val != prevVal) {
198+
++newIdx;
199+
prevVal = val;
200+
uniqueBCs.push_back(val);
201+
}
202+
indexMap[oldIdx] = newIdx;
203+
newIndexOrigins[newIdx].push_back(oldIdx);
204+
}
205+
std::cout << " Unique BCs after deduplication: " << uniqueBCs.size()
206+
<< std::endl;
207+
208+
// --- Rewrite O2bc ---
209+
dirOut->cd();
210+
TTree *treeBCsOut = new TTree(treeBCs->GetName(), "fixed O2bc tree");
211+
std::vector<std::unique_ptr<BranchHandler>> bcBranches;
212+
for (auto brObj : *treeBCs->GetListOfBranches()) {
213+
TBranch *br = (TBranch *)brObj;
214+
if (TString(br->GetName()) == "fGlobalBC")
215+
continue;
216+
bcBranches.emplace_back(std::make_unique<BranchHandler>(br, treeBCsOut));
217+
}
218+
ULong64_t outBC;
219+
treeBCsOut->Branch("fGlobalBC", &outBC, "fGlobalBC/l");
220+
221+
for (int newIdx = 0; newIdx < uniqueBCs.size(); newIdx++) {
222+
auto &oldIndices = newIndexOrigins[newIdx];
223+
if (oldIndices.empty())
224+
continue;
225+
size_t oldIdx = oldIndices.front();
226+
treeBCs->GetEntry(oldIdx);
227+
outBC = originalBCs[oldIdx];
228+
for (auto &bh : bcBranches) {
229+
bh->copyValue();
230+
}
231+
treeBCsOut->Fill();
232+
}
233+
std::cout << " Wrote O2bc with " << treeBCsOut->GetEntries() << " entries"
234+
<< std::endl;
235+
treeBCsOut->Write();
236+
237+
// --- Rewrite O2bcflag ---
238+
if (treeFlags) {
239+
std::cout << " Rebuilding O2bcflag..." << std::endl;
240+
dirOut->cd();
241+
TTree *treeFlagsOut = treeFlags->CloneTree(0);
242+
std::vector<std::unique_ptr<BranchHandler>> flagBranches;
243+
for (auto brObj : *treeFlags->GetListOfBranches()) {
244+
TBranch *br = (TBranch *)brObj;
245+
flagBranches.emplace_back(
246+
std::make_unique<BranchHandler>(br, treeFlagsOut));
247+
}
248+
for (int newIdx = 0; newIdx < uniqueBCs.size(); newIdx++) {
249+
auto &oldIndices = newIndexOrigins[newIdx];
250+
if (oldIndices.empty())
251+
continue;
252+
size_t oldIdx = oldIndices.front();
253+
treeFlags->GetEntry(oldIdx);
254+
for (auto &fh : flagBranches) {
255+
fh->copyValue();
256+
}
257+
treeFlagsOut->Fill();
258+
}
259+
std::cout << " Wrote O2bcflag with " << treeFlagsOut->GetEntries()
260+
<< " entries" << std::endl;
261+
treeFlagsOut->Write();
262+
}
263+
264+
// --- Rewrite trees with fIndexBCs ---
265+
for (auto tree : treesWithBCid) {
266+
std::cout << " Reindexing tree " << tree->GetName() << std::endl;
267+
dirOut->cd();
268+
TTree *treeOut = tree->CloneTree(0);
269+
Int_t oldBCid, newBCid;
270+
tree->SetBranchAddress("fIndexBCs", &oldBCid);
271+
treeOut->SetBranchAddress("fIndexBCs", &newBCid);
272+
for (Long64_t i = 0; i < tree->GetEntries(); i++) {
273+
tree->GetEntry(i);
274+
newBCid = indexMap[oldBCid];
275+
treeOut->Fill();
276+
}
277+
std::cout << " Wrote " << treeOut->GetEntries() << " entries"
278+
<< std::endl;
279+
treeOut->Write();
280+
}
281+
282+
// Copy unaffected objects
283+
for (auto obj : otherObjects) {
284+
copyObject(obj, dirOut);
285+
}
286+
}
287+
288+
void AODBcRewriter(const char *inFileName = "input.root",
289+
const char *outFileName = "output.root") {
290+
TFile *fin = TFile::Open(inFileName, "READ");
291+
TFile *fout = TFile::Open(outFileName, "RECREATE");
292+
fout->SetCompressionSettings(fin->GetCompressionSettings());
293+
for (auto keyObj : *(fin->GetListOfKeys())) {
294+
TKey *key = (TKey *)keyObj;
295+
TObject *obj = key->ReadObj();
296+
if (obj->InheritsFrom(TDirectory::Class()) &&
297+
TString(key->GetName()).BeginsWith("DF_"))
298+
processDF((TDirectory *)obj, fout->mkdir(key->GetName()));
299+
else {
300+
fout->cd();
301+
copyObject(obj, fout);
302+
}
303+
}
304+
fout->Close();
305+
fin->Close();
306+
}

0 commit comments

Comments
 (0)