Skip to content

Commit 0277817

Browse files
committed
AOD: Prepare Thinner for TPC-only V0s
This patch carefully checks which tracks are associated with a V0 and flags to not be discarded. This is written such that the thinner is unaware of the track-type. Additionally, this patch further generalizes the thinner by making it check manually for the table_name with regex. E.g., previously the v0-name was specifically ‘O2v0_001’, now this has been changed to ’O2v0_00?’. That way, we no longer care about the actual version contained in the table. Hence, new AO2Ds can also be thinned. Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 9288e81 commit 0277817

File tree

1 file changed

+152
-46
lines changed

1 file changed

+152
-46
lines changed

Framework/AODMerger/src/aodThinner.cxx

Lines changed: 152 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,30 @@
1212
#include <map>
1313
#include <list>
1414
#include <fstream>
15+
#include <unordered_map>
1516
#include <getopt.h>
17+
#include <filesystem>
1618

1719
#include "TSystem.h"
20+
#include "TStopwatch.h"
21+
#include "TString.h"
22+
#include "TRegexp.h"
1823
#include "TFile.h"
1924
#include "TTree.h"
25+
#include "TBranch.h"
2026
#include "TList.h"
2127
#include "TKey.h"
2228
#include "TDirectory.h"
2329
#include "TObjString.h"
24-
#include <TGrid.h>
25-
#include <TMap.h>
26-
#include <TLeaf.h>
30+
#include "TGrid.h"
31+
#include "TMap.h"
32+
#include "TLeaf.h"
2733

2834
#include "aodMerger.h"
2935

3036
// AOD reduction tool
3137
// Designed for the 2022 pp data with specific selections:
32-
// - Remove all TPC only tracks
38+
// - Remove all TPC only tracks, optionally keep TPC-only V0 tracks
3339
// - Remove all V0s which refer to any removed track
3440
// - Remove all cascade which refer to any removed V0
3541
// - Remove all ambiguous track entries which point to a track with collision
@@ -38,38 +44,66 @@ int main(int argc, char* argv[])
3844
{
3945
std::string inputFileName("AO2D.root");
4046
std::string outputFileName("AO2D_thinned.root");
41-
int exitCode = 0; // 0: success, >0: failure
47+
int exitCode = 0; // 0: success, !=0: failure
48+
bool bV0TPC = false, bOverwrite = false;
4249

4350
int option_index = 1;
51+
52+
const char* const short_opts = "i:o:KOh";
4453
static struct option long_options[] = {
45-
{"input", required_argument, nullptr, 0},
46-
{"output", required_argument, nullptr, 1},
47-
{"help", no_argument, nullptr, 2},
54+
{"input", required_argument, nullptr, 'i'},
55+
{"output", required_argument, nullptr, 'o'},
56+
{"keep-v0-tpc-only-tracks", no_argument, nullptr, 'K'},
57+
{"overwrite", no_argument, nullptr, 'O'},
58+
{"help", no_argument, nullptr, 'h'},
4859
{nullptr, 0, nullptr, 0}};
4960

5061
while (true) {
51-
int c = getopt_long(argc, argv, "", long_options, &option_index);
52-
if (c == -1) {
53-
break;
54-
} else if (c == 0) {
55-
inputFileName = optarg;
56-
} else if (c == 1) {
57-
outputFileName = optarg;
58-
} else if (c == 2) {
59-
printf("AO2D thinning tool. Options: \n");
60-
printf(" --input <inputfile.root> Contains input file path to the file to be thinned. Default: %s\n", inputFileName.c_str());
61-
printf(" --output <outputfile.root> Target output ROOT file. Default: %s\n", outputFileName.c_str());
62-
return -1;
63-
} else {
64-
return -2;
62+
const auto opt = getopt_long(argc, argv, short_opts, long_options, &option_index);
63+
if (opt == -1) {
64+
break; // use defaults
65+
}
66+
switch (opt) {
67+
case 'i':
68+
inputFileName = optarg;
69+
break;
70+
case 'o':
71+
outputFileName = optarg;
72+
break;
73+
case 'K':
74+
bV0TPC = true;
75+
printf("Keeping TPC-only tracks associated with V0s\n");
76+
break;
77+
case 'O':
78+
bOverwrite = true;
79+
printf("Overwriting exisitng output file if exisiting\n");
80+
break;
81+
case 'h':
82+
case '?':
83+
default:
84+
printf("AO2D thinning tool. Options: \n");
85+
printf(" --input/-i <inputfile.root> Contains input file path to the file to be thinned. Default: %s\n", inputFileName.c_str());
86+
printf(" --output/-o <outputfile.root> Target output ROOT file. Default: %s\n", outputFileName.c_str());
87+
printf("\n");
88+
printf(" Optional Arguments:\n");
89+
printf(" --overwrite/-O Overwrite exisiting output file\n");
90+
printf(" --keep-v0-tpc-only-tracks/-K Keep tracks associated to TPC-only V0s\n");
91+
return -1;
6592
}
6693
}
6794

6895
printf("AOD reduction started with:\n");
6996
printf(" Input file: %s\n", inputFileName.c_str());
7097
printf(" Ouput file name: %s\n", outputFileName.c_str());
7198

72-
auto outputFile = TFile::Open(outputFileName.c_str(), "RECREATE", "", 501);
99+
TStopwatch clock;
100+
clock.Start(kTRUE);
101+
102+
auto outputFile = TFile::Open(Form("%s?reproducible=thinned", outputFileName.c_str()), (bOverwrite) ? "RECREATE" : "CREATE", "", 501);
103+
if (outputFile == nullptr) {
104+
printf("Error: File %s exists or cannot be created!\n", outputFileName.c_str());
105+
return 1;
106+
}
73107
TDirectory* outputDir = nullptr;
74108

75109
if (inputFileName.find("alien:") == 0) {
@@ -87,18 +121,21 @@ int main(int argc, char* argv[])
87121
keyList->Sort();
88122

89123
for (auto key1 : *keyList) {
124+
// Keep metaData
90125
if (((TObjString*)key1)->GetString().EqualTo("metaData")) {
91126
auto metaData = (TMap*)inputFile->Get("metaData");
92127
outputFile->cd();
93128
metaData->Write("metaData", TObject::kSingleKey);
94129
}
95130

131+
// Keep parentFiles
96132
if (((TObjString*)key1)->GetString().EqualTo("parentFiles")) {
97133
auto parentFiles = (TMap*)inputFile->Get("parentFiles");
98134
outputFile->cd();
99135
parentFiles->Write("parentFiles", TObject::kSingleKey);
100136
}
101137

138+
// Skip everything else, except 'DF_*'
102139
if (!((TObjString*)key1)->GetString().BeginsWith("DF_")) {
103140
continue;
104141
}
@@ -131,15 +168,27 @@ int main(int argc, char* argv[])
131168
}
132169
}
133170

171+
// Scan versions e.g. 001 or 002 ...
172+
TString v0Name{"O2v0_???"}, trkExtraName{"O2trackextra*"};
173+
TRegexp v0Re(v0Name, kTRUE), trkExtraRe(trkExtraName, kTRUE);
174+
for (TObject* obj : *treeList) {
175+
TString st = obj->GetName();
176+
if (st.Index(v0Re) != kNPOS) {
177+
v0Name = st;
178+
} else if (st.Index(trkExtraRe) != kNPOS) {
179+
trkExtraName = st;
180+
}
181+
}
182+
134183
// Certain order needed in order to populate vectors of skipped entries
135-
auto v0Entry = (TObject*)treeList->FindObject("O2v0_001");
184+
auto v0Entry = (TObject*)treeList->FindObject(v0Name);
136185
treeList->Remove(v0Entry);
137186
treeList->AddFirst(v0Entry);
138187

139188
// Prepare maps for track skimming
140-
auto trackExtraTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2trackextra")); // for example we can use this line to access the track table
189+
auto trackExtraTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, trkExtraName.Data())); // for example we can use this line to access the track table
141190
if (trackExtraTree == nullptr) {
142-
printf("O2trackextra table not found\n");
191+
printf("%s table not found\n", trkExtraName.Data());
143192
exitCode = 6;
144193
break;
145194
}
@@ -149,27 +198,60 @@ int main(int argc, char* argv[])
149198
exitCode = 7;
150199
break;
151200
}
152-
auto v0s = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2v0_001"));
201+
auto v0s = (TTree*)inputFile->Get(Form("%s/%s", dfName, v0Name.Data()));
153202
if (v0s == nullptr) {
154-
printf("O2v0_001 table not found\n");
203+
printf("%s table not found\n", v0Name.Data());
155204
exitCode = 8;
156205
break;
157206
}
158207

208+
std::unordered_map<int, bool> keepV0TPCs;
209+
if (bV0TPC) {
210+
// We need to loop over the V0s once and flag the prong indices
211+
// technically we need to flag only V0s with TPC-only prongs but
212+
// this requires in-depth knowledge of the data-model which we don't need.
213+
int trackIdxPos = 0, trackIdxNeg = 0;
214+
v0s->SetBranchAddress("fIndexTracks_Pos", &trackIdxPos);
215+
v0s->SetBranchAddress("fIndexTracks_Neg", &trackIdxNeg);
216+
for (int i{0}; v0s->LoadTree(i) >= 0; ++i) {
217+
v0s->GetEntry(i);
218+
keepV0TPCs[trackIdxPos] = true;
219+
keepV0TPCs[trackIdxNeg] = true;
220+
}
221+
}
222+
159223
std::vector<int> acceptedTracks(trackExtraTree->GetEntries(), -1);
160224
std::vector<bool> hasCollision(trackExtraTree->GetEntries(), false);
161225
std::vector<int> keepV0s(v0s->GetEntries(), -1);
162226

163227
uint8_t tpcNClsFindable = 0;
228+
bool bTPClsFindable = false;
164229
uint8_t ITSClusterMap = 0;
230+
bool bITSClusterMap = false;
165231
uint8_t TRDPattern = 0;
232+
bool bTRDPattern = false;
166233
float_t TOFChi2 = 0;
167-
// char16_t fTPCNClsFindableMinusFound = 0;
168-
trackExtraTree->SetBranchAddress("fTPCNClsFindable", &tpcNClsFindable);
169-
trackExtraTree->SetBranchAddress("fITSClusterMap", &ITSClusterMap);
170-
trackExtraTree->SetBranchAddress("fTRDPattern", &TRDPattern);
171-
trackExtraTree->SetBranchAddress("fTOFChi2", &TOFChi2);
172-
// trackExtraTree->SetBranchAddress("fTPCNClsFindableMinusFound", &fTPCNClsFindableMinusFound);
234+
bool bTOFChi2 = false;
235+
236+
// Test if track properties exist
237+
TBranch* br = nullptr;
238+
TIter next(trackExtraTree->GetListOfBranches());
239+
while ((br = (TBranch*)next())) {
240+
TString brName = br->GetName();
241+
if (brName == "fTPCNClsFindable") {
242+
trackExtraTree->SetBranchAddress("fTPCNClsFindable", &tpcNClsFindable);
243+
bTPClsFindable = true;
244+
} else if (brName == "fITSClusterMap") {
245+
trackExtraTree->SetBranchAddress("fITSClusterMap", &ITSClusterMap);
246+
bITSClusterMap = true;
247+
} else if (brName == "fTRDPattern") {
248+
trackExtraTree->SetBranchAddress("fTRDPattern", &TRDPattern);
249+
bTRDPattern = true;
250+
} else if (brName == "fTOFChi2") {
251+
trackExtraTree->SetBranchAddress("fTOFChi2", &TOFChi2);
252+
bTOFChi2 = true;
253+
}
254+
}
173255

174256
int fIndexCollisions = 0;
175257
track_iu->SetBranchAddress("fIndexCollisions", &fIndexCollisions);
@@ -181,20 +263,23 @@ int main(int argc, char* argv[])
181263
trackExtraTree->GetEntry(i);
182264
track_iu->GetEntry(i);
183265

184-
// Remove TPC only tracks
185-
if (tpcNClsFindable > 0. && ITSClusterMap == 0 && TRDPattern == 0 && TOFChi2 < -1.) {
266+
// Flag collisions
267+
hasCollision[i] = (fIndexCollisions >= 0);
268+
269+
// Remove TPC only tracks, if (opt.) they are not assoc. to a V0
270+
if ((!bTPClsFindable || tpcNClsFindable > 0.) &&
271+
(!bITSClusterMap || ITSClusterMap == 0) &&
272+
(!bTRDPattern || TRDPattern == 0) &&
273+
(!bTOFChi2 || TOFChi2 < -1.) &&
274+
(!bV0TPC || keepV0TPCs.find(i) == keepV0TPCs.end())) {
186275
counter++;
187276
} else {
188277
acceptedTracks[i] = i - counter;
189278
}
190-
hasCollision[i] = (fIndexCollisions >= 0);
191279
}
192280

193281
for (auto key2 : *treeList) {
194-
auto treeName = ((TObjString*)key2)->GetString().Data();
195-
196-
auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName));
197-
printf(" Processing tree %s with %lld entries with total size %lld\n", treeName, inputTree->GetEntries(), inputTree->GetTotBytes());
282+
TString treeName = ((TObjString*)key2)->GetString().Data();
198283

199284
// Connect trees but do not copy entries (using the clone function)
200285
// NOTE Basket size etc. are copied in CloneTree()
@@ -203,6 +288,9 @@ int main(int argc, char* argv[])
203288
printf("Writing to output folder %s\n", dfName);
204289
}
205290
outputDir->cd();
291+
292+
auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName.Data()));
293+
printf(" Processing tree %s with %lld entries with total size %lld\n", treeName.Data(), inputTree->GetEntries(), inputTree->GetTotBytes());
206294
auto outputTree = inputTree->CloneTree(0);
207295
outputTree->SetAutoFlush(0);
208296

@@ -213,7 +301,7 @@ int main(int argc, char* argv[])
213301
for (int i = 0; i < branches->GetEntriesFast(); ++i) {
214302
TBranch* br = (TBranch*)branches->UncheckedAt(i);
215303
TString branchName(br->GetName());
216-
TString tableName(getTableName(branchName, treeName));
304+
TString tableName(getTableName(branchName, treeName.Data()));
217305
// register index of track index ONLY
218306
if (!tableName.EqualTo("O2track")) {
219307
continue;
@@ -243,10 +331,10 @@ int main(int argc, char* argv[])
243331
}
244332
}
245333

246-
bool processingTracks = memcmp(treeName, "O2track", 7) == 0; // matches any of the track tables
247-
bool processingCascades = memcmp(treeName, "O2cascade", 9) == 0;
248-
bool processingV0s = memcmp(treeName, "O2v0", 4) == 0;
249-
bool processingAmbiguousTracks = memcmp(treeName, "O2ambiguoustrack", 16) == 0;
334+
bool processingTracks = treeName.BeginsWith("O2track"); // matches any of the track tables
335+
bool processingCascades = treeName.BeginsWith("O2cascade");
336+
bool processingV0s = treeName.BeginsWith("O2v0");
337+
bool processingAmbiguousTracks = treeName.BeginsWith("O2ambiguoustrack");
250338

251339
auto indexV0s = -1;
252340
if (processingCascades) {
@@ -337,6 +425,24 @@ int main(int argc, char* argv[])
337425
gSystem->Unlink(outputFile->GetName());
338426
}
339427

428+
clock.Stop();
429+
430+
try {
431+
// Report savings
432+
auto sBefore = std::filesystem::file_size(inputFileName);
433+
auto sAfter = std::filesystem::file_size(outputFileName);
434+
if (sBefore == 0 || sAfter == 0) {
435+
printf("Warning: Empty input or output file after thinning!\n");
436+
exitCode = 42;
437+
}
438+
auto spaceSaving = (1 - ((double)sAfter) / ((double)sBefore)) * 100;
439+
printf("Stats: After=%ju / Before=%ju Bytes ---> Saving %.1f%% diskspace!\n", sAfter, sBefore, spaceSaving);
440+
} catch (std::filesystem::filesystem_error& err) {
441+
printf("Warning: %s\n", err.what());
442+
printf("Skipping Stats Reporting.\n");
443+
}
444+
445+
printf("Timing: CPU=%.2f (s); Real=%.2f (s)\n", clock.CpuTime(), clock.RealTime());
340446
printf("End of AOD thinning.\n");
341447

342448
return exitCode;

0 commit comments

Comments
 (0)