Skip to content

Commit bbdeec6

Browse files
fgrosaalibuild
andauthored
[EventFiltering] Add scripts for BDT trainings and add info for particles reco in wrong collision (#2742)
* Add scripts for BDT trainings and remove particles reco in wrong collision from signal * Add configs * Please consider the following formatting changes * Fix Megalint --------- Co-authored-by: ALICE Action Bot <alibuild@cern.ch>
1 parent a106c54 commit bbdeec6

10 files changed

Lines changed: 1003 additions & 20 deletions

EventFiltering/PWGHF/HFFilter.cxx

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ struct HfFilter { // Main struct for HF triggers
112112
Service<o2::ccdb::BasicCCDBManager> ccdb;
113113
Configurable<std::string> url{"ccdb-url", "http://alice-ccdb.cern.ch", "url of the ccdb repository"};
114114
Configurable<std::string> mlModelPathCCDB{"mlModelPathCCDB", "Analysis/PWGHF/ML/HFTrigger/", "Path on CCDB"};
115-
Configurable<long> timestampCCDB{"timestampCCDB", -1, "timestamp of the ONNX file for ML model used to query in CCDB. Exceptions: > 0 for the specific timestamp, 0 gets the run dependent timestamp"};
115+
Configurable<int64_t> timestampCCDB{"timestampCCDB", -1, "timestamp of the ONNX file for ML model used to query in CCDB. Exceptions: > 0 for the specific timestamp, 0 gets the run dependent timestamp"};
116116
Configurable<bool> loadModelsFromCCDB{"loadModelsFromCCDB", false, "Flag to enable or disable the loading of models from CCDB"};
117117
Configurable<std::string> ccdbPathGrpMag{"ccdbPathGrpMag", "GLO/Config/GRPMagField", "CCDB path of the GRPMagField object"};
118118
Configurable<string> ccdbPathTPC{"ccdbPathTPC", "Users/i/iarsene/Calib/TPCpostCalib", "base path to the ccdb object"};
@@ -296,10 +296,10 @@ struct HfFilter { // Main struct for HF triggers
296296
LOG(fatal) << "Can not find the TPC Post Calibration object!";
297297
}
298298

299-
hMapPion[0] = (TH3F*)calibList->FindObject("mean_map_pion");
300-
hMapPion[1] = (TH3F*)calibList->FindObject("sigma_map_pion");
301-
hMapProton[0] = (TH3F*)calibList->FindObject("mean_map_proton");
302-
hMapProton[1] = (TH3F*)calibList->FindObject("sigma_map_proton");
299+
hMapPion[0] = reinterpret_cast<TH3F*>(calibList->FindObject("mean_map_pion"));
300+
hMapPion[1] = reinterpret_cast<TH3F*>(calibList->FindObject("sigma_map_pion"));
301+
hMapProton[0] = reinterpret_cast<TH3F*>(calibList->FindObject("mean_map_proton"));
302+
hMapProton[1] = reinterpret_cast<TH3F*>(calibList->FindObject("sigma_map_proton"));
303303

304304
if (!hMapPion[0] || !hMapPion[1] || !hMapProton[0] || !hMapProton[1]) {
305305
LOG(fatal) << "Can not find histograms!";
@@ -323,7 +323,7 @@ struct HfFilter { // Main struct for HF triggers
323323
bool keepEvent[kNtriggersHF]{false};
324324
//
325325

326-
std::vector<std::vector<long>> indicesDau2Prong{};
326+
std::vector<std::vector<int64_t>> indicesDau2Prong{};
327327

328328
auto cand2ProngsThisColl = cand2Prongs.sliceBy(hf2ProngPerCollision, thisCollId);
329329
for (const auto& cand2Prong : cand2ProngsThisColl) { // start loop over 2 prongs
@@ -415,7 +415,7 @@ struct HfFilter { // Main struct for HF triggers
415415
} // end high-pT selection
416416

417417
if (isCharmTagged) {
418-
indicesDau2Prong.push_back(std::vector<long>{trackPos.globalIndex(), trackNeg.globalIndex()});
418+
indicesDau2Prong.push_back(std::vector<int64_t>{trackPos.globalIndex(), trackNeg.globalIndex()});
419419
} // end multi-charm selection
420420

421421
auto trackIdsThisCollision = trackIndices.sliceBy(trackIndicesPerCollision, thisCollId);
@@ -529,7 +529,7 @@ struct HfFilter { // Main struct for HF triggers
529529

530530
} // end loop over 2-prong candidates
531531

532-
std::vector<std::vector<long>> indicesDau3Prong{};
532+
std::vector<std::vector<int64_t>> indicesDau3Prong{};
533533
auto cand3ProngsThisColl = cand3Prongs.sliceBy(hf3ProngPerCollision, thisCollId);
534534
for (const auto& cand3Prong : cand3ProngsThisColl) { // start loop over 3 prongs
535535
std::array<int8_t, kNCharmParticles - 1> is3Prong = {
@@ -636,7 +636,7 @@ struct HfFilter { // Main struct for HF triggers
636636
}
637637

638638
if (std::accumulate(isCharmTagged.begin(), isCharmTagged.end(), 0)) {
639-
indicesDau3Prong.push_back(std::vector<long>{trackFirst.globalIndex(), trackSecond.globalIndex(), trackThird.globalIndex()});
639+
indicesDau3Prong.push_back(std::vector<int64_t>{trackFirst.globalIndex(), trackSecond.globalIndex(), trackThird.globalIndex()});
640640
} // end multiple 3-prong selection
641641

642642
auto pVec3Prong = RecoDecay::pVec(pVecFirst, pVecSecond, pVecThird);

EventFiltering/PWGHF/HFFilterHelpers.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
/// \author Alexandre Bigot <alexandre.bigot@cern.ch>, Strasbourg University
1919
/// \author Biao Zhang <biao.zhang@cern.ch>, CCNU
2020

21-
#ifndef O2_ANALYSIS_HF_FILTER_HELPERS_H_
22-
#define O2_ANALYSIS_HF_FILTER_HELPERS_H_
21+
#ifndef EVENTFILTERING_PWGHF_HFFILTERHELPERS_H_
22+
#define EVENTFILTERING_PWGHF_HFFILTERHELPERS_H_
2323

2424
#include "Framework/DataTypes.h"
2525
#include "Framework/AnalysisDataModel.h"
@@ -37,6 +37,9 @@
3737
#include <array>
3838
#include <string>
3939
#include <cmath>
40+
#include <map>
41+
#include <memory>
42+
#include <algorithm>
4043

4144
#include "Math/Vector3D.h"
4245
#include "Math/Vector4D.h"
@@ -831,7 +834,7 @@ int computeNumberOfCandidates(std::vector<std::vector<T>> indices)
831834
/// \param mlModelPathCCDB is the model path in CCDB
832835
/// \param timestampCCDB is the CCDB timestamp
833836
/// \return the pointer to the ONNX Ort::Experimental::Session
834-
Ort::Experimental::Session* InitONNXSession(std::string& onnxFile, std::string partName, Ort::Env& env, Ort::SessionOptions& sessionOpt, std::vector<std::vector<int64_t>>& inputShapes, int& dataType, bool loadModelsFromCCDB, o2::ccdb::CcdbApi& ccdbApi, std::string mlModelPathCCDB, long timestampCCDB)
837+
Ort::Experimental::Session* InitONNXSession(std::string& onnxFile, std::string partName, Ort::Env& env, Ort::SessionOptions& sessionOpt, std::vector<std::vector<int64_t>>& inputShapes, int& dataType, bool loadModelsFromCCDB, o2::ccdb::CcdbApi& ccdbApi, std::string mlModelPathCCDB, int64_t timestampCCDB)
835838
{
836839
// hard coded, we do not let the user change this
837840
sessionOpt.SetIntraOpNumThreads(1);
@@ -980,6 +983,7 @@ DECLARE_SOA_COLUMN(NsigmaPrTOF3, nsigmaPrTOF3, float); //!
980983
DECLARE_SOA_COLUMN(FlagOrigin, flagOrigin, int8_t); //!
981984
DECLARE_SOA_COLUMN(Channel, channel, int8_t); //!
982985
DECLARE_SOA_COLUMN(HFSelBit, hfselbit, int8_t); //!
986+
DECLARE_SOA_COLUMN(IsInCorrectColl, isInCorrectColl, bool); //!
983987
} // namespace hftraining
984988

985989
DECLARE_SOA_TABLE(HFTrigTrain2P, "AOD", "HFTRIGTRAIN2P", //!
@@ -1000,7 +1004,8 @@ DECLARE_SOA_TABLE(HFTrigTrain2P, "AOD", "HFTRIGTRAIN2P", //!
10001004
hftraining::NsigmaKaTPC2,
10011005
hftraining::NsigmaPiTOF2,
10021006
hftraining::NsigmaKaTOF2,
1003-
hftraining::FlagOrigin);
1007+
hftraining::FlagOrigin,
1008+
hftraining::IsInCorrectColl);
10041009
DECLARE_SOA_TABLE(HFTrigTrain3P, "AOD", "HFTRIGTRAIN3P", //!
10051010
hftraining::InvMassDplus,
10061011
hftraining::InvMassDsToKKPi,
@@ -1041,7 +1046,8 @@ DECLARE_SOA_TABLE(HFTrigTrain3P, "AOD", "HFTRIGTRAIN3P", //!
10411046
hftraining::NsigmaPrTOF3,
10421047
hftraining::FlagOrigin,
10431048
hftraining::Channel,
1044-
hftraining::HFSelBit);
1049+
hftraining::HFSelBit,
1050+
hftraining::IsInCorrectColl);
10451051

10461052
namespace hfoptimisationTree
10471053
{
@@ -1086,4 +1092,4 @@ DECLARE_SOA_TABLE(HFOptimisationTreeCollisions, "AOD", "HFOPTIMTREECOLL", //!
10861092
hfoptimisationTree::CollisionIndex)
10871093
} // namespace o2::aod
10881094

1089-
#endif // O2_ANALYSIS_HF_FILTER_HELPERS_
1095+
#endif // EVENTFILTERING_PWGHF_HFFILTERHELPERS_H_

EventFiltering/PWGHF/HFFilterPrepareMLSamples.cxx

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ struct HfFilterPrepareMlSamples { // Main struct
7373
void process(aod::Hf2Prongs const& cand2Prongs,
7474
aod::Hf3Prongs const& cand3Prongs,
7575
aod::McParticles const& particlesMC,
76-
aod::Collisions const& collisions,
76+
soa::Join<aod::Collisions, aod::McCollisionLabels> const& collisions,
7777
BigTracksMCPID const&,
7878
aod::BCsWithTimestamps const&)
7979
{
@@ -117,19 +117,21 @@ struct HfFilterPrepareMlSamples { // Main struct
117117
int8_t flag = RecoDecay::OriginType::None;
118118

119119
// D0(bar) → π± K∓
120+
bool isInCorrectColl{false};
120121
auto indexRec = RecoDecay::getMatchedMCRec(particlesMC, std::array{trackPos, trackNeg}, pdg::Code::kD0, array{+kPiPlus, -kKPlus}, true, &sign);
121122
if (indexRec > -1) {
122123
auto particle = particlesMC.rawIteratorAt(indexRec);
123124
flag = RecoDecay::getCharmHadronOrigin(particlesMC, particle);
125+
isInCorrectColl = (collision.mcCollisionId() == particle.mcCollisionId());
124126
if (flag < RecoDecay::OriginType::Prompt) {
125127
continue;
126128
}
127129
}
128130

129-
float pseudoRndm = trackPos.pt() * 1000. - (long)(trackPos.pt() * 1000);
131+
float pseudoRndm = trackPos.pt() * 1000. - (int64_t)(trackPos.pt() * 1000);
130132
if ((fillSignal && indexRec > -1) || (fillBackground && indexRec < 0 && pseudoRndm < donwSampleBkgFactor)) {
131133
train2P(invMassD0, invMassD0bar, pt2Prong, trackParPos.getPt(), dcaPos[0], dcaPos[1], trackPos.tpcNSigmaPi(), trackPos.tpcNSigmaKa(), trackPos.tofNSigmaPi(), trackPos.tofNSigmaKa(),
132-
trackParNeg.getPt(), dcaNeg[0], dcaNeg[1], trackNeg.tpcNSigmaPi(), trackNeg.tpcNSigmaKa(), trackNeg.tofNSigmaPi(), trackNeg.tofNSigmaKa(), flag);
134+
trackParNeg.getPt(), dcaNeg[0], dcaNeg[1], trackNeg.tpcNSigmaPi(), trackNeg.tpcNSigmaKa(), trackNeg.tofNSigmaPi(), trackNeg.tofNSigmaKa(), flag, isInCorrectColl);
133135
}
134136
} // end loop over 2-prong candidates
135137

@@ -223,21 +225,23 @@ struct HfFilterPrepareMlSamples { // Main struct
223225
}
224226
}
225227

228+
bool isInCorrectColl{false};
226229
if (indexRec > -1) {
227230
auto particle = particlesMC.rawIteratorAt(indexRec);
228231
flag = RecoDecay::getCharmHadronOrigin(particlesMC, particle);
232+
isInCorrectColl = (collision.mcCollisionId() == particle.mcCollisionId());
229233
if (flag < RecoDecay::OriginType::Prompt) {
230234
continue;
231235
}
232236
}
233237

234-
float pseudoRndm = trackFirst.pt() * 1000. - (long)(trackFirst.pt() * 1000);
238+
float pseudoRndm = trackFirst.pt() * 1000. - (int64_t)(trackFirst.pt() * 1000);
235239
if ((fillSignal && indexRec > -1) || (fillBackground && indexRec < 0 && pseudoRndm < donwSampleBkgFactor)) {
236240
train3P(invMassDplus, invMassDsToKKPi, invMassDsToPiKK, invMassLcToPKPi, invMassLcToPiKP, invMassXicToPKPi, invMassXicToPiKP, pt3Prong, deltaMassKKFirst, deltaMassKKSecond,
237241
trackParFirst.getPt(), dcaFirst[0], dcaFirst[1], trackFirst.tpcNSigmaPi(), trackFirst.tpcNSigmaKa(), trackFirst.tpcNSigmaPr(), trackFirst.tofNSigmaPi(), trackFirst.tofNSigmaKa(), trackFirst.tofNSigmaPr(),
238242
trackParSecond.getPt(), dcaSecond[0], dcaSecond[1], trackSecond.tpcNSigmaPi(), trackSecond.tpcNSigmaKa(), trackSecond.tpcNSigmaPr(), trackSecond.tofNSigmaPi(), trackSecond.tofNSigmaKa(), trackSecond.tofNSigmaPr(),
239243
trackParThird.getPt(), dcaThird[0], dcaThird[1], trackThird.tpcNSigmaPi(), trackThird.tpcNSigmaKa(), trackThird.tpcNSigmaPr(), trackThird.tofNSigmaPi(), trackThird.tofNSigmaKa(), trackThird.tofNSigmaPr(),
240-
flag, channel, cand3Prong.hfflag());
244+
flag, channel, cand3Prong.hfflag(), isInCorrectColl);
241245
}
242246
} // end loop over 3-prong candidates
243247
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
---
2+
data_prep:
3+
dirs:
4+
Prompt:
5+
[
6+
training_samples/LHC22b1a_train_31002,
7+
training_samples/LHC22b1b_train_31003,
8+
]
9+
Nonprompt:
10+
[
11+
training_samples/LHC22b1a_train_31002,
12+
training_samples/LHC22b1b_train_31003,
13+
]
14+
Bkg: [training_samples/LHC221k6_pp_train_31202]
15+
# options: D0ToKPi, DplusToPiKPi, DsToKKPi, LcToPKPi, XicToPKPi
16+
channel: D0ToKPi
17+
preselection:
18+
enable: false # preselection possible only for Ds and baryons
19+
nsigma_tpc_proton: 1000. # nsigma cut for proton
20+
nsigma_tof_proton: 1000. # nsigma cut for proton
21+
delta_mass_kk: 1000. # mass difference between KK and phi in GeV/c2
22+
class_balance:
23+
share: equal # options: equal, all_signal
24+
bkg_factor: 5 # factor to be applied to bkg compared to signal sample
25+
test_fraction: 0.3
26+
seed_split: 42
27+
28+
ml:
29+
raw_output: false
30+
roc_auc_approach: ovo
31+
roc_auc_average: macro
32+
training_vars: [fPT1, fDCAPrimXY1, fDCAPrimZ1, fPT2, fDCAPrimXY2, fDCAPrimZ2]
33+
hyper_pars:
34+
{
35+
"max_depth": 4,
36+
"learning_rate": 0.01,
37+
"n_estimators": 1000,
38+
"min_child_weight": 5,
39+
"n_jobs": 4,
40+
"tree_method": hist,
41+
}
42+
hyper_pars_opt:
43+
activate: false
44+
ntrials: 25
45+
njobs: 4
46+
timeout: 1800
47+
hyper_par_ranges:
48+
{
49+
"max_depth": !!python/tuple [3, 6],
50+
"learning_rate": !!python/tuple [0.01, 0.1],
51+
"n_estimators": !!python/tuple [300, 1500],
52+
"min_child_weight": !!python/tuple [1, 10],
53+
"subsample": !!python/tuple [0.8, 1.],
54+
"colsample_bytree": !!python/tuple [0.8, 1.],
55+
}
56+
57+
output:
58+
directory: trainings/D0
59+
out_labels: # output labels, keep the right number of classes
60+
Bkg: Bkg
61+
Prompt: Prompt
62+
Nonprompt: Nonprompt
63+
# list of variables saved in the dataframes with the applied models
64+
column_to_save_list: ["fPT2Prong"]
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
---
2+
data_prep:
3+
dirs:
4+
Prompt:
5+
[
6+
training_samples/LHC22b1a_train_31002,
7+
training_samples/LHC22b1b_train_31003,
8+
]
9+
Nonprompt:
10+
[
11+
training_samples/LHC22b1a_train_31002,
12+
training_samples/LHC22b1b_train_31003,
13+
]
14+
Bkg: [training_samples/LHC221k6_pp_train_31202]
15+
# options: D0ToKPi, DplusToPiKPi, DsToKKPi, LcToPKPi, XicToPKPi
16+
channel: DplusToPiKPi
17+
preselection:
18+
enable: false # preselection possible only for Ds and baryons
19+
nsigma_tpc_proton: 1000. # nsigma cut for proton
20+
nsigma_tof_proton: 1000. # nsigma cut for proton
21+
delta_mass_kk: 1000. # mass difference between KK and phi in GeV/c2
22+
class_balance:
23+
share: equal # options: equal, all_signal
24+
bkg_factor: 5 # factor to be applied to bkg compared to signal sample
25+
test_fraction: 0.3
26+
seed_split: 42
27+
28+
ml:
29+
raw_output: false
30+
roc_auc_approach: ovo
31+
roc_auc_average: macro
32+
training_vars:
33+
[
34+
fPT1,
35+
fDCAPrimXY1,
36+
fDCAPrimZ1,
37+
fPT2,
38+
fDCAPrimXY2,
39+
fDCAPrimZ2,
40+
fPT3,
41+
fDCAPrimXY3,
42+
fDCAPrimZ3,
43+
]
44+
hyper_pars:
45+
{
46+
"max_depth": 4,
47+
"learning_rate": 0.01,
48+
"n_estimators": 1000,
49+
"min_child_weight": 5,
50+
"n_jobs": 4,
51+
"tree_method": hist,
52+
}
53+
hyper_pars_opt:
54+
activate: false
55+
ntrials: 25
56+
njobs: 4
57+
timeout: 1800
58+
hyper_par_ranges:
59+
{
60+
"max_depth": !!python/tuple [3, 6],
61+
"learning_rate": !!python/tuple [0.01, 0.1],
62+
"n_estimators": !!python/tuple [300, 1500],
63+
"min_child_weight": !!python/tuple [1, 10],
64+
"subsample": !!python/tuple [0.8, 1.],
65+
"colsample_bytree": !!python/tuple [0.8, 1.],
66+
}
67+
68+
output:
69+
directory: trainings/Dplus
70+
out_labels: # output labels, keep the right number of classes
71+
Bkg: Bkg
72+
Prompt: Prompt
73+
Nonprompt: Nonprompt
74+
# list of variables saved in the dataframes with the applied models
75+
column_to_save_list: ["fPT3Prong"]
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
---
2+
data_prep:
3+
dirs:
4+
Prompt: [training_samples/LHC22b1b/31003, training_samples/LHC22b1a/31002]
5+
Nonprompt:
6+
[training_samples/LHC22b1a/31002, training_samples/LHC22b1b/31003]
7+
Bkg: [training_samples/LHC21k6/31202]
8+
# options: D0ToKPi, DplusToPiKPi, DsToKKPi, LcToPKPi, XicToPKPi
9+
channel: DsToKKPi
10+
preselection:
11+
enable: true # preselection possible only for Ds and baryons
12+
nsigma_tpc_proton: 1000. # nsigma cut for proton
13+
nsigma_tof_proton: 1000. # nsigma cut for proton
14+
delta_mass_kk: 0.020 # mass difference between KK and phi in GeV/c2
15+
class_balance:
16+
share: all_signal # options: equal, all_signal
17+
bkg_factor: 1 # factor to be applied to bkg compared to signal sample
18+
test_fraction: 0.5
19+
seed_split: 42
20+
21+
ml:
22+
raw_output: false
23+
roc_auc_approach: ovo
24+
roc_auc_average: macro
25+
training_vars:
26+
[
27+
fPT1,
28+
fDCAPrimXY1,
29+
fDCAPrimZ1,
30+
fPT2,
31+
fDCAPrimXY2,
32+
fDCAPrimZ2,
33+
fPT3,
34+
fDCAPrimXY3,
35+
fDCAPrimZ3,
36+
]
37+
hyper_pars:
38+
{
39+
"max_depth": 4,
40+
"learning_rate": 0.09849808014809752,
41+
"n_estimators": 1198,
42+
"min_child_weight": 7,
43+
"n_jobs": 4,
44+
"tree_method": "hist",
45+
"subsample": 0.8012697244903896,
46+
"colsample_bytree": 0.9309143772778852,
47+
}
48+
hyper_pars_opt:
49+
activate: true
50+
ntrials: 25
51+
njobs: 4
52+
timeout: 3600
53+
hyper_par_ranges:
54+
{
55+
"max_depth": !!python/tuple [3, 4],
56+
"learning_rate": !!python/tuple [0.01, 0.1],
57+
"n_estimators": !!python/tuple [300, 1200],
58+
"min_child_weight": !!python/tuple [1, 10],
59+
"subsample": !!python/tuple [0.8, 1.],
60+
"colsample_bytree": !!python/tuple [0.8, 1.],
61+
}
62+
63+
output:
64+
directory: trainings/Ds
65+
out_labels: # output labels, keep the right number of classes
66+
Bkg: Bkg
67+
Prompt: Prompt
68+
Nonprompt: Nonprompt
69+
# list of variables saved in the dataframes with the applied models
70+
column_to_save_list: ["fPT3Prong"]

0 commit comments

Comments
 (0)