Skip to content

Commit f4f8f43

Browse files
authored
Improve logging in case of OOM (#13786)
1 parent 24e05f9 commit f4f8f43

File tree

3 files changed

+26
-4
lines changed

3 files changed

+26
-4
lines changed

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ class TimeFrame
259259
void printCellLUTonLayer(int i);
260260
void printTrackletLUTs();
261261
void printCellLUTs();
262+
void printROFInfo(const int rofId);
262263

263264
IndexTableUtils mIndexTableUtils;
264265

Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,5 +608,19 @@ void TimeFrame::printNClsPerROF()
608608
std::cout << std::endl;
609609
}
610610
}
611+
612+
void TimeFrame::printROFInfo(const int rofId)
613+
{
614+
std::cout << "ROF " << rofId << " dump:" << std::endl;
615+
for (int iLayer{0}; iLayer < mClusters.size(); ++iLayer) {
616+
std::cout << "Layer " << iLayer << " has: " << getClustersOnLayer(rofId, iLayer).size() << " clusters." << std::endl;
617+
}
618+
std::cout << "Number of seeding vertices: " << getPrimaryVertices(rofId).size() << std::endl;
619+
int iVertex{0};
620+
for (auto& v : getPrimaryVertices(rofId)) {
621+
std::cout << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors." << std::endl;
622+
}
623+
}
624+
611625
} // namespace its
612626
} // namespace o2

Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ namespace o2
3535
{
3636
namespace its
3737
{
38+
using o2::its::constants::GB;
3839

3940
Tracker::Tracker(o2::its::TrackerTraits* traits)
4041
{
@@ -74,25 +75,31 @@ void Tracker::clustersToTracks(std::function<void(std::string s)> logger, std::f
7475
&Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex);
7576
nTracklets += mTraits->getTFNumberOfTracklets();
7677
if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) {
77-
error(fmt::format("Too much memory used during trackleting in iteration {}, check the detector status and/or the selections.", iteration));
78+
mTimeFrame->printROFInfo(iROFs);
79+
error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.",
80+
iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB));
7881
break;
7982
}
8083
float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f;
8184
if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) {
82-
error(fmt::format("Too many tracklets per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, mTrkParams[iteration].TrackletsPerClusterLimit));
85+
error(fmt::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}",
86+
trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit));
8387
break;
8488
}
8589

8690
timeCells += evaluateTask(
8791
&Tracker::computeCells, "Cell finding", [](std::string) {}, iteration);
8892
nCells += mTraits->getTFNumberOfCells();
8993
if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) {
90-
error(fmt::format("Too much memory used during cell finding in iteration {}, check the detector status and/or the selections.", iteration));
94+
mTimeFrame->printROFInfo(iROFs);
95+
error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.",
96+
iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB));
9197
break;
9298
}
9399
float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f;
94100
if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) {
95-
error(fmt::format("Too many cells per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, mTrkParams[iteration].CellsPerClusterLimit));
101+
error(fmt::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}",
102+
cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit));
96103
break;
97104
}
98105

0 commit comments

Comments
 (0)