Add ML inference and BDT scores in output table

fgrosa · fgrosa · commit 152abf81f9c9 · 2025-06-13T14:44:20.000+02:00
diff --git a/DPG/Tasks/AOTTrack/CMakeLists.txt b/DPG/Tasks/AOTTrack/CMakeLists.txt
@@ -87,6 +87,6 @@ o2physics_add_dpl_workflow(tag-and-probe-dmesons
 
 o2physics_add_dpl_workflow(derived-data-creator-d0-calibration
     SOURCES derivedDataCreatorD0Calibration.cxx
-    PUBLIC_LINK_LIBRARIES O2Physics::AnalysisCore O2Physics::AnalysisCore O2::DCAFitter
+    PUBLIC_LINK_LIBRARIES O2Physics::AnalysisCore O2Physics::AnalysisCore O2::DCAFitter O2Physics::MLCore
     COMPONENT_NAME Analysis)
 
diff --git a/DPG/Tasks/AOTTrack/D0CalibTables.h b/DPG/Tasks/AOTTrack/D0CalibTables.h
@@ -61,6 +61,17 @@ namespace o2
       return static_cast<uint8_t>(std::clamp(roundValue, 0, 255));
     }
 
+    /// It compresses a value to a uint16_t with a given precision
+    ///\param origValue is the original values
+    ///\param precision is the desired precision
+    ///\return The value compressed to a uint16_t
+    template<typename T>
+    uint16_t getCompressedUint16(T origValue, double precision)
+    {
+      int roundValue = static_cast<int>(std::round(origValue / precision));
+      return static_cast<uint16_t>(std::clamp(roundValue, 0, 65535));
+    }
+
     /// It uses a sinh-based scaling function, which provides a compromise between fixed-step and relative quantization.
     // This approach reflects typical resolution formulas and is well-suited for detector calibration data.
     ///\param origValue is the original value
@@ -108,7 +119,7 @@ namespace o2
     template<typename T>
     int8_t getCompressedCosPa(T cosPa)
     {
-      return getCompressedUint8(cosPa - 0.25, 0.001); // in the range from 0.75 to 1
+      return getCompressedUint8(cosPa - 0.75, 0.001); // in the range from 0.75 to 1
     }
 
     /// It compresses the chi2
@@ -131,6 +142,24 @@ namespace o2
       return compressedNumSigma;
     }
 
+    /// It compresses the bdt score (1./65535 precision)
+    ///\param bdtScore is the bdt score
+    ///\return The bdt score compressed to a uint16_t with 1./65535 precision
+    template<typename T>
+    uint16_t getCompressedBdtScoreBkg(T bdtScore)
+    {
+      return getCompressedUint16(bdtScore, 1./65535);
+    }
+
+    /// It compresses the bdt score (1./255 precision)
+    ///\param bdtScore is the bdt score
+    ///\return The bdt score compressed to a uint8_t with 1./255 precision
+    template<typename T>
+    uint8_t getCompressedBdtScoreSgn(T bdtScore)
+    {
+      return getCompressedUint8(bdtScore, 1./255);
+    }
+
     /// It compresses the number of sigma (0.1 sigma precision)
     ///\param occupancy is the occupancy value
     ///\return The number of sigma compressed to a int8_t with 0.1 precision
@@ -174,11 +203,11 @@ namespace o2
       0,
       1.0,
       2.0,
+      3.0,
       4.0,
       6.0,
       8.0,
       12.0,
-      16.0,
       24.0,
       50.0,
       1000.0};
@@ -187,12 +216,12 @@ namespace o2
     // default values for the cuts
     constexpr float CutsCand[NBinsPtCand][NCutVarsCand] = {{0.400, 0., 10., 10., 0.97, 0.97, 0, 2, 0.01, 0.01},   /* 0  < pT < 1    */
                                                            {0.400, 0., 10., 10., 0.97, 0.97, 0, 2, 0.01, 0.01},   /* 1  < pT < 2    */
-                                                           {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 2  < pT < 4    */
+                                                           {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 2  < pT < 3    */
+                                                           {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 3  < pT < 4    */
                                                            {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 4  < pT < 6    */
                                                            {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 6  < pT < 8    */
                                                            {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 8  < pT < 12   */
-                                                           {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 12 < pT < 16   */
-                                                           {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 16 < pT < 24   */
+                                                           {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 12 < pT < 24   */
                                                            {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01},   /* 24 < pT < 50   */
                                                            {0.400, 0., 10., 10., 0.95, 0.95, 0, 2, 0.01, 0.01}};  /* 50 < pT < 1000 */
 
@@ -211,6 +240,51 @@ namespace o2
 
     // column labels
     static const std::vector<std::string> labelsCutVarCand = {"delta inv. mass", "max d0d0", "max pointing angle", "max pointing angle XY", "min cos pointing angle", "min cos pointing angle xy", "min norm decay length", "min norm decay length XY", "min decay length", "min decay length XY"};
+
+    static constexpr int NBinsPtMl = 10;
+    // default values for the pT bin edges (can be used to configure histogram axis)
+    // offset by 1 from the bin numbers in cuts array
+    constexpr double BinsPtMl[NBinsPtMl + 1] = {
+      0,
+      1.0,
+      2.0,
+      3.0,
+      4.0,
+      6.0,
+      8.0,
+      12.0,
+      24.0,
+      50.0,
+      1000.0};
+    auto vecBinsPtMl = std::vector<double>{BinsPtMl, BinsPtMl + NBinsPtMl + 1};
+
+    // default values for the cuts
+    constexpr double CutsMl[NBinsPtMl][3] = {{1., 0., 0.},   /* 0  < pT < 1    */
+                                             {1., 0., 0.},   /* 1  < pT < 2    */
+                                             {1., 0., 0.},   /* 2  < pT < 3    */
+                                             {1., 0., 0.},   /* 3  < pT < 4    */
+                                             {1., 0., 0.},   /* 4  < pT < 6    */
+                                             {1., 0., 0.},   /* 6  < pT < 8    */
+                                             {1., 0., 0.},   /* 8  < pT < 12   */
+                                             {1., 0., 0.},   /* 12 < pT < 24   */
+                                             {1., 0., 0.},   /* 24 < pT < 50   */
+                                             {1., 0., 0.}};  /* 50 < pT < 1000 */
+
+    // row labels
+    static const std::vector<std::string> labelsPtMl = {
+      "pT bin 0",
+      "pT bin 1",
+      "pT bin 2",
+      "pT bin 3",
+      "pT bin 4",
+      "pT bin 5",
+      "pT bin 6",
+      "pT bin 7",
+      "pT bin 8",
+      "pT bin 9"};
+
+    // column labels
+    static const std::vector<std::string> labelsCutMl = {"max BDT score bkg", "min BDT score prompt", "min BDT score nonprompt"};
   } // namespace hf_calib
 
   namespace aod
@@ -330,6 +404,12 @@ namespace o2
       DECLARE_SOA_COLUMN(PointingAngle, pointingAngle, uint8_t); //! compressed pointing angle
       DECLARE_SOA_COLUMN(PointingAngleXY, pointingAngleXY, uint8_t); //! compressed pointing angle XY
       DECLARE_SOA_COLUMN(DecVtxChi2, decVtxChi2, uint8_t); //! compressed decay vertex chi2
+      DECLARE_SOA_COLUMN(BdtScoreBkgD0, bdtScoreBkgD0, uint16_t); //! compressed BDT score (bkg, D0 mass hypo)
+      DECLARE_SOA_COLUMN(BdtScorePromptD0, bdtScorePromptD0, uint8_t); //! compressed BDT score (prompt, D0 mass hypo)
+      DECLARE_SOA_COLUMN(BdtScoreNonpromptD0, bdtScoreNonpromptD0, uint8_t); //! compressed BDT score (non-prompt, D0 mass hypo)
+      DECLARE_SOA_COLUMN(BdtScoreBkgD0bar, bdtScoreBkgD0bar, uint16_t); //! compressed BDT score (bkg, D0bar mass hypo)
+      DECLARE_SOA_COLUMN(BdtScorePromptD0bar, bdtScorePromptD0bar, uint8_t); //! compressed BDT score (prompt, D0bar mass hypo)
+      DECLARE_SOA_COLUMN(BdtScoreNonpromptD0bar, bdtScoreNonpromptD0bar, uint8_t); //! compressed BDT score (non-prompt, D0bar mass hypo)
     } // namespace hf_calib
 
     DECLARE_SOA_TABLE(D0CalibCand, "AOD", "D0CALIBCANDS",
@@ -351,7 +431,13 @@ namespace o2
                       hf_calib::CosPaXY,
                       hf_calib::PointingAngle,
                       hf_calib::PointingAngleXY,
-                      hf_calib::DecVtxChi2);
+                      hf_calib::DecVtxChi2,
+                      hf_calib::BdtScoreBkgD0,
+                      hf_calib::BdtScorePromptD0,
+                      hf_calib::BdtScoreNonpromptD0,
+                      hf_calib::BdtScoreBkgD0bar,
+                      hf_calib::BdtScorePromptD0bar,
+                      hf_calib::BdtScoreNonpromptD0bar);
   } // namespace aod
 } // namespace o2
 #endif // D0CALIBTABLES_H_
diff --git a/DPG/Tasks/AOTTrack/derivedDataCreatorD0Calibration.cxx b/DPG/Tasks/AOTTrack/derivedDataCreatorD0Calibration.cxx
@@ -14,24 +14,12 @@
 ///
 /// \author Fabrizio Grosa <fabrizio.grosa@cern.ch>, CERN
 
-#include <algorithm>
-#include <array>
-#include <cmath>
-#include <map>
-#include <string>
-
-#include <TH1D.h>
-#include <TRandom3.h>
-
-#include "CommonConstants/PhysicsConstants.h"
-#include "DCAFitter/DCAFitterN.h"
-#include "Framework/AnalysisTask.h"
-#include "Framework/runDataProcessing.h"
-#include "Framework/RunningWorkflowInfo.h"
-#include "ReconstructionDataFormats/DCA.h"
-
 #include "D0CalibTables.h"
 
+#include "PWGHF/Utils/utilsAnalysis.h"
+#include "PWGHF/Utils/utilsBfieldCCDB.h"
+#include "PWGHF/Utils/utilsPid.h"
+
 #include "Common/Core/RecoDecay.h"
 #include "Common/Core/TrackSelectorPID.h"
 #include "Common/Core/trackUtilities.h"
@@ -42,9 +30,23 @@
 #include "Common/DataModel/PIDResponseTPC.h"
 #include "Common/DataModel/TrackSelectionTables.h"
 #include "CommonDataFormat/InteractionRecord.h"
+#include "Tools/ML/MlResponse.h"
 
-#include "PWGHF/Utils/utilsAnalysis.h"
-#include "PWGHF/Utils/utilsBfieldCCDB.h"
+#include <CommonConstants/PhysicsConstants.h>
+#include <DCAFitter/DCAFitterN.h>
+#include <Framework/AnalysisTask.h>
+#include <Framework/runDataProcessing.h>
+#include <Framework/RunningWorkflowInfo.h>
+#include <ReconstructionDataFormats/DCA.h>
+
+#include <TH1D.h>
+#include <TRandom3.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <map>
+#include <string>
 
 using namespace o2;
 using namespace o2::analysis;
@@ -86,15 +88,28 @@ struct DerivedDataCreatorD0Calibration {
   struct : ConfigurableGroup {
     Configurable<bool> apply{"apply", false, "flag to apply downsampling"};
     Configurable<std::string> pathCcdbWeights{"pathCcdbWeights", "", "CCDB path containing pT-differential weights"};
+    std::string prefix = "downsampling";
   } cfgDownsampling;
 
+  struct : ConfigurableGroup {
+    Configurable<bool> apply{"apply", false, "flag to apply downsampling"};
+    Configurable<std::vector<double>> binsPt{"binsPt", std::vector<double>{hf_calib::vecBinsPtMl}, "pT bin limits for ML models inference"};
+    Configurable<LabeledArray<double>> thresholdMlScores{"thresholdMlScores", {hf_calib::CutsMl[0], hf_calib::NBinsPtMl, 3, hf_calib::labelsPtMl, hf_calib::labelsCutMl}, "Threshold values for Ml output scores of D0 candidates"};
+    Configurable<bool> loadMlModelsFromCCDB{"loadMlModelsFromCCDB", true, "Flag to enable or disable the loading of ML models from CCDB"};
+    Configurable<std::vector<std::string>> modelPathsCCDB{"modelPathsCCDB", std::vector<std::string>{"Users/f/fgrosa/D0Calib/BDT/Pt0_1"}, "Paths of models on CCDB"};
+    Configurable<std::vector<std::string>> onnxFileNames{"onnxFileNames", std::vector<std::string>{"ModelHandler_pT_0_1.onnx"}, "ONNX file names for each pT bin (if not from CCDB full path)"};
+    std::string prefix = "ml";
+  } cfgMl;
+
   using TracksWCovExtraPid = soa::Join<aod::Tracks, aod::TracksCov, aod::TracksExtra, aod::TrackSelection, aod::pidTPCFullPi, aod::pidTOFFullPi, aod::pidTPCFullKa, aod::pidTOFFullKa>;
   using CollisionsWEvSel = soa::Join<aod::Collisions, aod::CentFT0Cs, aod::EvSels>;
 
   Preslice<aod::TrackAssoc> trackIndicesPerCollision = aod::track_association::collisionId;
 
   o2::vertexing::DCAFitterN<2> df; // 2-prong vertex fitter
   Service<o2::ccdb::BasicCCDBManager> ccdb;
+  o2::ccdb::CcdbApi ccdbApi;
+  o2::analysis::MlResponse<float> mlResponse;
 
   TrackSelectorPi selectorPion;
   TrackSelectorKa selectorKaon;
@@ -119,6 +134,18 @@ struct DerivedDataCreatorD0Calibration {
       histDownSampl.setObject(reinterpret_cast<TH1D*>(ccdb->getSpecific<TH1D>(cfgDownsampling.pathCcdbWeights)));
     }
 
+    if (cfgMl.apply) {
+      std::vector<int> cutDir = {o2::cuts_ml::CutDirection::CutGreater, o2::cuts_ml::CutDirection::CutSmaller, o2::cuts_ml::CutDirection::CutSmaller};
+      mlResponse.configure(cfgMl.binsPt, cfgMl.thresholdMlScores, cutDir, 3);
+      if (cfgMl.loadMlModelsFromCCDB) {
+        ccdbApi.init("http://alice-ccdb.cern.ch");
+        mlResponse.setModelPathsCCDB(cfgMl.onnxFileNames, ccdbApi, cfgMl.modelPathsCCDB, -1);
+      } else {
+        mlResponse.setModelPathsLocal(cfgMl.onnxFileNames);
+      }
+      mlResponse.init();
+    }
+
     df.setPropagateToPCA(true);
     df.setMaxR(200.f);
     df.setMaxDZIni(4.f);
@@ -374,16 +401,26 @@ struct DerivedDataCreatorD0Calibration {
           }
 
           float invMassD0{0.f}, invMassD0bar{0.f};
+          std::vector<float> bdtScoresD0{0.f, 1.f, 1.f}, bdtScoresD0bar{0.f, 1.f, 1.f}; // always selected a priori
           if (massHypo == D0MassHypo::D0 || massHypo == D0MassHypo::D0AndD0Bar) {
             invMassD0 = RecoDecay::m(std::array{pVecPos, pVecNeg}, std::array{o2::constants::physics::MassPiPlus, o2::constants::physics::MassKPlus});
             if (std::abs(invMassD0 - o2::constants::physics::MassD0) > cfgCandCuts.topologicalCuts->get(ptBinD0, "delta inv. mass")) {
               massHypo -= D0MassHypo::D0;
+              bdtScoresD0 = std::vector<float>{1.f, 0.f, 0.f};
+            } else {
+            // apply BDT models
+            std::vector<float> featuresCandD0 = {dcaPos.getY(), dcaNeg.getY(), chi2PCA, cosPaD0, cosPaXYD0, decLenXYD0, decLenD0, dcaPos.getY() * dcaNeg.getY(), aod::pid_tpc_tof_utils::combineNSigma<false>(trackPos.tpcNSigmaPi(), trackPos.tofNSigmaPi()), aod::pid_tpc_tof_utils::combineNSigma<false>(trackNeg.tpcNSigmaKa(), trackNeg.tofNSigmaKa()), trackPos.tpcNSigmaPi(), trackPos.tpcNSigmaKa(), aod::pid_tpc_tof_utils::combineNSigma<false>(trackPos.tpcNSigmaKa(), trackPos.tofNSigmaKa()), trackNeg.tpcNSigmaPi(), trackNeg.tpcNSigmaKa(), aod::pid_tpc_tof_utils::combineNSigma<false>(trackNeg.tpcNSigmaPi(), trackNeg.tofNSigmaPi())};
+            mlResponse.isSelectedMl(featuresCandD0, ptD0, bdtScoresD0);
             }
           }
           if (massHypo >= D0MassHypo::D0Bar) {
             invMassD0bar = RecoDecay::m(std::array{pVecNeg, pVecPos}, std::array{o2::constants::physics::MassPiPlus, o2::constants::physics::MassKPlus});
             if (std::abs(invMassD0bar - o2::constants::physics::MassD0) > cfgCandCuts.topologicalCuts->get(ptBinD0, "delta inv. mass")) {
               massHypo -= D0MassHypo::D0Bar;
+              bdtScoresD0bar = std::vector<float>{1.f, 0.f, 0.f};
+            } else {
+              std::vector<float> featuresCandD0bar = {dcaPos.getY(), dcaNeg.getY(), chi2PCA, cosPaD0, cosPaXYD0, decLenXYD0, decLenD0, dcaPos.getY() * dcaNeg.getY(), aod::pid_tpc_tof_utils::combineNSigma<false>(trackNeg.tpcNSigmaPi(), trackNeg.tofNSigmaPi()), aod::pid_tpc_tof_utils::combineNSigma<false>(trackPos.tpcNSigmaKa(), trackPos.tofNSigmaKa()), trackNeg.tpcNSigmaPi(), trackNeg.tpcNSigmaKa(), aod::pid_tpc_tof_utils::combineNSigma<false>(trackNeg.tpcNSigmaKa(), trackNeg.tofNSigmaKa()), trackPos.tpcNSigmaPi(), trackPos.tpcNSigmaKa(), aod::pid_tpc_tof_utils::combineNSigma<false>(trackPos.tpcNSigmaPi(), trackPos.tofNSigmaPi())};
+              mlResponse.isSelectedMl(featuresCandD0bar, ptD0, bdtScoresD0bar);
             }
           }
           if (massHypo == 0) {
@@ -426,7 +463,7 @@ struct DerivedDataCreatorD0Calibration {
           // candidate
           candTable(selectedCollisions[collision.globalIndex()], selectedTracks[trackPos.globalIndex()], selectedTracks[trackNeg.globalIndex()], massHypo, ptD0, etaD0, phiD0, invMassD0, invMassD0bar,
                     getCompressedDecayLength(decLenD0), getCompressedDecayLength(decLenXYD0), getCompressedNormDecayLength(decLenD0/errorDecayLengthD0), getCompressedNormDecayLength(decLenXYD0/errorDecayLengthXYD0),
-                    getCompressedCosPa(cosPaD0), getCompressedCosPa(cosPaXYD0), getCompressedPointingAngle(paD0), getCompressedPointingAngle(paXYD0), getCompressedChi2(chi2PCA));
+                    getCompressedCosPa(cosPaD0), getCompressedCosPa(cosPaXYD0), getCompressedPointingAngle(paD0), getCompressedPointingAngle(paXYD0), getCompressedChi2(chi2PCA), getCompressedBdtScoreBkg(bdtScoresD0[0]), getCompressedBdtScoreSgn(bdtScoresD0[1]), getCompressedBdtScoreSgn(bdtScoresD0[2]), getCompressedBdtScoreBkg(bdtScoresD0bar[0]), getCompressedBdtScoreSgn(bdtScoresD0bar[1]), getCompressedBdtScoreSgn(bdtScoresD0bar[2]));
         } // end loop over negative tracks
       } // end loop over positive tracks
     } // end loop over collisions tracks