Skip to content

Commit 588d554

Browse files
committed
AOD: Prepare Thinner for TPC-only V0s
This patch carefully checks which tracks are associated with a V0 and flags to not be discarded. This is written such that the thinner is unaware of the track-type. Additionally, this patch further generalizes the thinner by making it check manually for the table_name with regex. E.g., previously the v0-name was specifically ‘O2v0_001’, now this has been changed to ’O2v0_00?’. That way, we no longer care about the actual version contained in the table. Hence, new AO2Ds can also be thinned. Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 9288e81 commit 588d554

File tree

1 file changed

+123
-39
lines changed

1 file changed

+123
-39
lines changed

Framework/AODMerger/src/aodThinner.cxx

Lines changed: 123 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,29 @@
1212
#include <map>
1313
#include <list>
1414
#include <fstream>
15+
#include <unordered_map>
1516
#include <getopt.h>
17+
#include <filesystem>
1618

1719
#include "TSystem.h"
20+
#include "TStopwatch.h"
21+
#include "TString.h"
22+
#include "TRegexp.h"
1823
#include "TFile.h"
1924
#include "TTree.h"
2025
#include "TList.h"
2126
#include "TKey.h"
2227
#include "TDirectory.h"
2328
#include "TObjString.h"
24-
#include <TGrid.h>
25-
#include <TMap.h>
26-
#include <TLeaf.h>
29+
#include "TGrid.h"
30+
#include "TMap.h"
31+
#include "TLeaf.h"
2732

2833
#include "aodMerger.h"
2934

3035
// AOD reduction tool
3136
// Designed for the 2022 pp data with specific selections:
32-
// - Remove all TPC only tracks
37+
// - Remove all TPC only tracks, optionally keep TPC-only V0 tracks
3338
// - Remove all V0s which refer to any removed track
3439
// - Remove all cascade which refer to any removed V0
3540
// - Remove all ambiguous track entries which point to a track with collision
@@ -38,38 +43,66 @@ int main(int argc, char* argv[])
3843
{
3944
std::string inputFileName("AO2D.root");
4045
std::string outputFileName("AO2D_thinned.root");
41-
int exitCode = 0; // 0: success, >0: failure
46+
int exitCode = 0; // 0: success, !=0: failure
47+
bool bV0TPC = false, bOverwrite = false;
4248

4349
int option_index = 1;
50+
51+
const char* const short_opts = "i:o:KOh";
4452
static struct option long_options[] = {
45-
{"input", required_argument, nullptr, 0},
46-
{"output", required_argument, nullptr, 1},
47-
{"help", no_argument, nullptr, 2},
53+
{"input", required_argument, nullptr, 'i'},
54+
{"output", required_argument, nullptr, 'o'},
55+
{"keep-v0-tpc-only-tracks", no_argument, nullptr, 'K'},
56+
{"overwrite", no_argument, nullptr, 'O'},
57+
{"help", no_argument, nullptr, 'h'},
4858
{nullptr, 0, nullptr, 0}};
4959

5060
while (true) {
51-
int c = getopt_long(argc, argv, "", long_options, &option_index);
52-
if (c == -1) {
53-
break;
54-
} else if (c == 0) {
55-
inputFileName = optarg;
56-
} else if (c == 1) {
57-
outputFileName = optarg;
58-
} else if (c == 2) {
59-
printf("AO2D thinning tool. Options: \n");
60-
printf(" --input <inputfile.root> Contains input file path to the file to be thinned. Default: %s\n", inputFileName.c_str());
61-
printf(" --output <outputfile.root> Target output ROOT file. Default: %s\n", outputFileName.c_str());
62-
return -1;
63-
} else {
64-
return -2;
61+
const auto opt = getopt_long(argc, argv, short_opts, long_options, &option_index);
62+
if (opt == -1) {
63+
break; // use defaults
64+
}
65+
switch (opt) {
66+
case 'i':
67+
inputFileName = optarg;
68+
break;
69+
case 'o':
70+
outputFileName = optarg;
71+
break;
72+
case 'K':
73+
bV0TPC = true;
74+
printf("Keeping TPC-only tracks associated with V0s\n");
75+
break;
76+
case 'O':
77+
bOverwrite = true;
78+
printf("Overwriting exisitng output file if exisiting\n");
79+
break;
80+
case 'h':
81+
case '?':
82+
default:
83+
printf("AO2D thinning tool. Options: \n");
84+
printf(" --input/-i <inputfile.root> Contains input file path to the file to be thinned. Default: %s\n", inputFileName.c_str());
85+
printf(" --output/-o <outputfile.root> Target output ROOT file. Default: %s\n", outputFileName.c_str());
86+
printf("\n");
87+
printf(" Optional Arguments:\n");
88+
printf(" --overwrite/-O Overwrite exisiting output file\n");
89+
printf(" --keep-v0-tpc-only-tracks/-K Keep tracks associated to TPC-only V0s\n");
90+
return -1;
6591
}
6692
}
6793

6894
printf("AOD reduction started with:\n");
6995
printf(" Input file: %s\n", inputFileName.c_str());
7096
printf(" Ouput file name: %s\n", outputFileName.c_str());
7197

72-
auto outputFile = TFile::Open(outputFileName.c_str(), "RECREATE", "", 501);
98+
TStopwatch clock;
99+
clock.Start(kTRUE);
100+
101+
auto outputFile = TFile::Open(outputFileName.c_str(), (bOverwrite) ? "RECREATE" : "CREATE", "", 501);
102+
if (outputFile == nullptr) {
103+
printf("Error: File %s exists or cannot be created!\n", outputFileName.c_str());
104+
return 1;
105+
}
73106
TDirectory* outputDir = nullptr;
74107

75108
if (inputFileName.find("alien:") == 0) {
@@ -87,18 +120,21 @@ int main(int argc, char* argv[])
87120
keyList->Sort();
88121

89122
for (auto key1 : *keyList) {
123+
// Keep metaData
90124
if (((TObjString*)key1)->GetString().EqualTo("metaData")) {
91125
auto metaData = (TMap*)inputFile->Get("metaData");
92126
outputFile->cd();
93127
metaData->Write("metaData", TObject::kSingleKey);
94128
}
95129

130+
// Keep parentFiles
96131
if (((TObjString*)key1)->GetString().EqualTo("parentFiles")) {
97132
auto parentFiles = (TMap*)inputFile->Get("parentFiles");
98133
outputFile->cd();
99134
parentFiles->Write("parentFiles", TObject::kSingleKey);
100135
}
101136

137+
// Skip everything else, except 'DF_*'
102138
if (!((TObjString*)key1)->GetString().BeginsWith("DF_")) {
103139
continue;
104140
}
@@ -131,13 +167,25 @@ int main(int argc, char* argv[])
131167
}
132168
}
133169

170+
// Scan versions e.g. 001 or 002 ...
171+
TString v0Name{"O2v0_00?"}, trkExtraName{"O2trackextra*"};
172+
TRegexp v0Re(v0Name, kTRUE), trkExtraRe(trkExtraName, kTRUE);
173+
for (TObject* obj : *treeList) {
174+
TString st = obj->GetName();
175+
if (st.Index(v0Re) != kNPOS) {
176+
v0Name = st;
177+
} else if (st.Index(trkExtraRe) != kNPOS) {
178+
trkExtraName = st;
179+
}
180+
}
181+
134182
// Certain order needed in order to populate vectors of skipped entries
135-
auto v0Entry = (TObject*)treeList->FindObject("O2v0_001");
183+
auto v0Entry = (TObject*)treeList->FindObject(v0Name);
136184
treeList->Remove(v0Entry);
137185
treeList->AddFirst(v0Entry);
138186

139187
// Prepare maps for track skimming
140-
auto trackExtraTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2trackextra")); // for example we can use this line to access the track table
188+
auto trackExtraTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, trkExtraName.Data())); // for example we can use this line to access the track table
141189
if (trackExtraTree == nullptr) {
142190
printf("O2trackextra table not found\n");
143191
exitCode = 6;
@@ -149,13 +197,28 @@ int main(int argc, char* argv[])
149197
exitCode = 7;
150198
break;
151199
}
152-
auto v0s = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2v0_001"));
200+
auto v0s = (TTree*)inputFile->Get(Form("%s/%s", dfName, v0Name.Data()));
153201
if (v0s == nullptr) {
154-
printf("O2v0_001 table not found\n");
202+
printf("%s table not found\n", v0Name.Data());
155203
exitCode = 8;
156204
break;
157205
}
158206

207+
std::unordered_map<int, bool> keepV0TPCs;
208+
if (bV0TPC) {
209+
// We need to loop over the V0s once and flag the prong indices
210+
// technically we need to flag only V0s with TPC-only prongs but
211+
// this requires in-depth knowledge of the data-model which we don't need.
212+
int trackIdxPos = 0, trackIdxNeg = 0;
213+
v0s->SetBranchAddress("fIndexTracks_Pos", &trackIdxPos);
214+
v0s->SetBranchAddress("fIndexTracks_Neg", &trackIdxNeg);
215+
for (int i{0}; v0s->LoadTree(i) >= 0; ++i) {
216+
v0s->GetEntry(i);
217+
keepV0TPCs[trackIdxPos] = true;
218+
keepV0TPCs[trackIdxNeg] = true;
219+
}
220+
}
221+
159222
std::vector<int> acceptedTracks(trackExtraTree->GetEntries(), -1);
160223
std::vector<bool> hasCollision(trackExtraTree->GetEntries(), false);
161224
std::vector<int> keepV0s(v0s->GetEntries(), -1);
@@ -181,20 +244,20 @@ int main(int argc, char* argv[])
181244
trackExtraTree->GetEntry(i);
182245
track_iu->GetEntry(i);
183246

184-
// Remove TPC only tracks
185-
if (tpcNClsFindable > 0. && ITSClusterMap == 0 && TRDPattern == 0 && TOFChi2 < -1.) {
247+
// Flag collisions
248+
hasCollision[i] = (fIndexCollisions >= 0);
249+
250+
// Remove TPC only tracks, if (opt.) they are not assoc. to a V0
251+
if (tpcNClsFindable > 0. && ITSClusterMap == 0 && TRDPattern == 0 && TOFChi2 < -1. &&
252+
(!bV0TPC || keepV0TPCs.find(i) == keepV0TPCs.end())) {
186253
counter++;
187254
} else {
188255
acceptedTracks[i] = i - counter;
189256
}
190-
hasCollision[i] = (fIndexCollisions >= 0);
191257
}
192258

193259
for (auto key2 : *treeList) {
194-
auto treeName = ((TObjString*)key2)->GetString().Data();
195-
196-
auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName));
197-
printf(" Processing tree %s with %lld entries with total size %lld\n", treeName, inputTree->GetEntries(), inputTree->GetTotBytes());
260+
TString treeName = ((TObjString*)key2)->GetString().Data();
198261

199262
// Connect trees but do not copy entries (using the clone function)
200263
// NOTE Basket size etc. are copied in CloneTree()
@@ -203,6 +266,9 @@ int main(int argc, char* argv[])
203266
printf("Writing to output folder %s\n", dfName);
204267
}
205268
outputDir->cd();
269+
270+
auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName.Data()));
271+
printf(" Processing tree %s with %lld entries with total size %lld\n", treeName.Data(), inputTree->GetEntries(), inputTree->GetTotBytes());
206272
auto outputTree = inputTree->CloneTree(0);
207273
outputTree->SetAutoFlush(0);
208274

@@ -213,7 +279,7 @@ int main(int argc, char* argv[])
213279
for (int i = 0; i < branches->GetEntriesFast(); ++i) {
214280
TBranch* br = (TBranch*)branches->UncheckedAt(i);
215281
TString branchName(br->GetName());
216-
TString tableName(getTableName(branchName, treeName));
282+
TString tableName(getTableName(branchName, treeName.Data()));
217283
// register index of track index ONLY
218284
if (!tableName.EqualTo("O2track")) {
219285
continue;
@@ -243,10 +309,10 @@ int main(int argc, char* argv[])
243309
}
244310
}
245311

246-
bool processingTracks = memcmp(treeName, "O2track", 7) == 0; // matches any of the track tables
247-
bool processingCascades = memcmp(treeName, "O2cascade", 9) == 0;
248-
bool processingV0s = memcmp(treeName, "O2v0", 4) == 0;
249-
bool processingAmbiguousTracks = memcmp(treeName, "O2ambiguoustrack", 16) == 0;
312+
bool processingTracks = treeName.BeginsWith("O2track"); // matches any of the track tables
313+
bool processingCascades = treeName.BeginsWith("O2cascade");
314+
bool processingV0s = treeName.BeginsWith("O2v0");
315+
bool processingAmbiguousTracks = treeName.BeginsWith("O2ambiguoustrack");
250316

251317
auto indexV0s = -1;
252318
if (processingCascades) {
@@ -337,6 +403,24 @@ int main(int argc, char* argv[])
337403
gSystem->Unlink(outputFile->GetName());
338404
}
339405

406+
clock.Stop();
407+
408+
try {
409+
// Report savings
410+
auto sBefore = std::filesystem::file_size(inputFileName);
411+
auto sAfter = std::filesystem::file_size(outputFileName);
412+
if (sBefore == 0 || sAfter == 0) {
413+
printf("Warning: Empty input or output file after thinning!\n");
414+
exitCode = 42;
415+
}
416+
auto spaceSaving = (1 - ((double)sAfter) / ((double)sBefore)) * 100;
417+
printf("Stats: After=%ju / Before=%ju Bytes ---> Saving %.1f%% diskspace!\n", sAfter, sBefore, spaceSaving);
418+
} catch (std::filesystem::filesystem_error& err) {
419+
printf("Warning: %s\n", err.what());
420+
printf("Skipping Stats Reporting.\n");
421+
}
422+
423+
printf("Timing: CPU=%.2f (s); Real=%.2f (s)\n", clock.CpuTime(), clock.RealTime());
340424
printf("End of AOD thinning.\n");
341425

342426
return exitCode;

0 commit comments

Comments
 (0)