Skip to content

Commit f248969

Browse files
authored
ITS: track memory allocations + graceful of exceeding limit (#14326)
* ITS: track and hard limit memory allocations + tbb Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch> * ITS: fix single threaded Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch> --------- Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent d121ffe commit f248969

23 files changed

+1273
-847
lines changed

Detectors/ITSMFT/ITS/tracking/CMakeLists.txt

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# granted to it by virtue of its status as an Intergovernmental Organization
1010
# or submit itself to any jurisdiction.
1111

12+
#add_compile_options(-O0 -g -fPIC -fno-omit-frame-pointer)
1213
o2_add_library(ITStracking
1314
TARGETVARNAME targetName
1415
SOURCES src/ClusterLines.cxx
@@ -35,12 +36,8 @@ o2_add_library(ITStracking
3536
O2::ITSBase
3637
O2::ITSReconstruction
3738
O2::ITSMFTReconstruction
38-
O2::DataFormatsITS)
39-
40-
if (OpenMP_CXX_FOUND)
41-
target_compile_definitions(${targetName} PRIVATE WITH_OPENMP)
42-
target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX)
43-
endif()
39+
O2::DataFormatsITS
40+
PRIVATE_LINK_LIBRARIES TBB::tbb)
4441

4542
o2_add_library(ITSTrackingInterface
4643
TARGETVARNAME targetName
@@ -50,11 +47,6 @@ o2_add_library(ITSTrackingInterface
5047
O2::Framework
5148
O2::GPUTracking)
5249

53-
if (OpenMP_CXX_FOUND)
54-
target_compile_definitions(${targetName} PRIVATE WITH_OPENMP)
55-
target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX)
56-
endif()
57-
5850
o2_target_root_dictionary(ITStracking
5951
HEADERS include/ITStracking/ClusterLines.h
6052
include/ITStracking/Tracklet.h

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
1414
#define TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
1515

16+
#include "ITStracking/BoundedAllocator.h"
1617
#include "ITStracking/TimeFrame.h"
1718
#include "ITStracking/Configuration.h"
1819
#include "ITStrackingGPU/Utils.h"
@@ -62,7 +63,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
6263
void loadTrackSeedsDevice();
6364
void loadTrackSeedsChi2Device();
6465
void loadRoadsDevice();
65-
void loadTrackSeedsDevice(std::vector<CellSeed>&);
66+
void loadTrackSeedsDevice(bounded_vector<CellSeed>&);
6667
void createTrackletsBuffers();
6768
void createCellsBuffers(const int);
6869
void createCellsDevice();
@@ -72,10 +73,10 @@ class TimeFrameGPU : public TimeFrame<nLayers>
7273
void createNeighboursDevice(const unsigned int layer, std::vector<std::pair<int, int>>& neighbours);
7374
void createNeighboursLUTDevice(const int, const unsigned int);
7475
void createNeighboursDeviceArray();
75-
void createTrackITSExtDevice(std::vector<CellSeed>&);
76-
void downloadTrackITSExtDevice(std::vector<CellSeed>&);
77-
void downloadCellsNeighboursDevice(std::vector<std::vector<std::pair<int, int>>>&, const int);
78-
void downloadNeighboursLUTDevice(std::vector<int>&, const int);
76+
void createTrackITSExtDevice(bounded_vector<CellSeed>&);
77+
void downloadTrackITSExtDevice(bounded_vector<CellSeed>&);
78+
void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
79+
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
7980
void downloadCellsDevice();
8081
void downloadCellsLUTDevice();
8182
void unregisterRest();
@@ -90,7 +91,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
9091
int getNClustersInRofSpan(const int, const int, const int) const;
9192
IndexTableUtils* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
9293
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
93-
std::vector<o2::its::TrackITSExt>& getTrackITSExt() { return mTrackITSExt; }
94+
auto& getTrackITSExt() { return mTrackITSExt; }
9495
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
9596
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
9697
unsigned char* getDeviceUsedClusters(const int);
@@ -199,7 +200,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
199200
bool mFirstInit = true;
200201

201202
// Temporary buffer for storing output tracks from GPU tracking
202-
std::vector<TrackITSExt> mTrackITSExt;
203+
bounded_vector<TrackITSExt> mTrackITSExt;
203204
};
204205

205206
template <int nLayers>

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,13 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
7171
gsl::span<int*> trackletsLUTsHost,
7272
const int iteration,
7373
const float NSigmaCut,
74-
std::vector<float>& phiCuts,
74+
bounded_vector<float>& phiCuts,
7575
const float resolutionPV,
7676
std::array<float, nLayers>& minR,
7777
std::array<float, nLayers>& maxR,
78-
std::vector<float>& resolutions,
78+
bounded_vector<float>& resolutions,
7979
std::vector<float>& radii,
80-
std::vector<float>& mulScatAng,
80+
bounded_vector<float>& mulScatAng,
8181
const int nBlocks,
8282
const int nThreads);
8383

@@ -104,13 +104,13 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
104104
gsl::span<int*> trackletsLUTsHost,
105105
const int iteration,
106106
const float NSigmaCut,
107-
std::vector<float>& phiCuts,
107+
bounded_vector<float>& phiCuts,
108108
const float resolutionPV,
109109
std::array<float, nLayers>& minR,
110110
std::array<float, nLayers>& maxR,
111-
std::vector<float>& resolutions,
111+
bounded_vector<float>& resolutions,
112112
std::vector<float>& radii,
113-
std::vector<float>& mulScatAng,
113+
bounded_vector<float>& mulScatAng,
114114
const int nBlocks,
115115
const int nThreads);
116116

@@ -190,7 +190,7 @@ void processNeighboursHandler(const int startLayer,
190190
std::array<int*, nLayers - 2>& neighbours,
191191
gsl::span<int*> neighboursDeviceLUTs,
192192
const TrackingFrameInfo** foundTrackingFrameInfo,
193-
std::vector<CellSeed>& seedsHost,
193+
bounded_vector<CellSeed>& seedsHost,
194194
const float bz,
195195
const float MaxChi2ClusterAttachment,
196196
const float maxChi2NDF,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ void TimeFrameGPU<nLayers>::loadRoadsDevice()
420420
}
421421

422422
template <int nLayers>
423-
void TimeFrameGPU<nLayers>::loadTrackSeedsDevice(std::vector<CellSeed>& seeds)
423+
void TimeFrameGPU<nLayers>::loadTrackSeedsDevice(bounded_vector<CellSeed>& seeds)
424424
{
425425
START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), "loading track seeds");
426426
LOGP(debug, "gpu-transfer: loading {} track seeds, for {} MB.", seeds.size(), seeds.size() * sizeof(CellSeed) / MB);
@@ -466,11 +466,10 @@ void TimeFrameGPU<nLayers>::createNeighboursDeviceArray()
466466
}
467467

468468
template <int nLayers>
469-
void TimeFrameGPU<nLayers>::createTrackITSExtDevice(std::vector<CellSeed>& seeds)
469+
void TimeFrameGPU<nLayers>::createTrackITSExtDevice(bounded_vector<CellSeed>& seeds)
470470
{
471471
START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), "reserving tracks");
472-
mTrackITSExt.clear();
473-
mTrackITSExt.resize(seeds.size());
472+
mTrackITSExt = bounded_vector<TrackITSExt>(seeds.size(), {}, this->getMemoryPool().get());
474473
LOGP(debug, "gpu-allocation: reserving {} tracks, for {} MB.", seeds.size(), seeds.size() * sizeof(o2::its::TrackITSExt) / MB);
475474
allocMemAsync(reinterpret_cast<void**>(&mTrackITSExtDevice), seeds.size() * sizeof(o2::its::TrackITSExt), mGpuStreams[0], this->getExtAllocator());
476475
GPUChkErrS(cudaMemsetAsync(mTrackITSExtDevice, 0, seeds.size() * sizeof(o2::its::TrackITSExt), mGpuStreams[0]->get()));
@@ -503,7 +502,7 @@ void TimeFrameGPU<nLayers>::downloadCellsLUTDevice()
503502
}
504503

505504
template <int nLayers>
506-
void TimeFrameGPU<nLayers>::downloadCellsNeighboursDevice(std::vector<std::vector<std::pair<int, int>>>& neighbours, const int layer)
505+
void TimeFrameGPU<nLayers>::downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>& neighbours, const int layer)
507506
{
508507
START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), fmt::format("downloading neighbours from layer {}", layer));
509508
LOGP(debug, "gpu-transfer: downloading {} neighbours, for {} MB.", neighbours[layer].size(), neighbours[layer].size() * sizeof(std::pair<int, int>) / MB);
@@ -512,7 +511,7 @@ void TimeFrameGPU<nLayers>::downloadCellsNeighboursDevice(std::vector<std::vecto
512511
}
513512

514513
template <int nLayers>
515-
void TimeFrameGPU<nLayers>::downloadNeighboursLUTDevice(std::vector<int>& lut, const int layer)
514+
void TimeFrameGPU<nLayers>::downloadNeighboursLUTDevice(bounded_vector<int>& lut, const int layer)
516515
{
517516
START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), fmt::format("downloading neighbours LUT from layer {}", layer));
518517
LOGP(debug, "gpu-transfer: downloading neighbours LUT for {} elements on layer {}, for {} MB.", lut.size(), layer, lut.size() * sizeof(int) / MB);
@@ -521,7 +520,7 @@ void TimeFrameGPU<nLayers>::downloadNeighboursLUTDevice(std::vector<int>& lut, c
521520
}
522521

523522
template <int nLayers>
524-
void TimeFrameGPU<nLayers>::downloadTrackITSExtDevice(std::vector<CellSeed>& seeds)
523+
void TimeFrameGPU<nLayers>::downloadTrackITSExtDevice(bounded_vector<CellSeed>& seeds)
525524
{
526525
START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), "downloading tracks");
527526
LOGP(debug, "gpu-transfer: downloading {} tracks, for {} MB.", mTrackITSExt.size(), mTrackITSExt.size() * sizeof(o2::its::TrackITSExt) / MB);

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
221221
auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance();
222222
for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) {
223223
const int minimumLayer{startLevel - 1};
224-
std::vector<CellSeed> trackSeeds;
224+
bounded_vector<CellSeed> trackSeeds(this->getMemoryPool().get());
225225
for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) {
226226
if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) {
227227
continue;

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -862,13 +862,13 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
862862
gsl::span<int*> trackletsLUTsHost,
863863
const int iteration,
864864
const float NSigmaCut,
865-
std::vector<float>& phiCuts,
865+
bounded_vector<float>& phiCuts,
866866
const float resolutionPV,
867867
std::array<float, nLayers>& minRs,
868868
std::array<float, nLayers>& maxRs,
869-
std::vector<float>& resolutions,
869+
bounded_vector<float>& resolutions,
870870
std::vector<float>& radii,
871-
std::vector<float>& mulScatAng,
871+
bounded_vector<float>& mulScatAng,
872872
const int nBlocks,
873873
const int nThreads)
874874
{
@@ -928,13 +928,13 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
928928
gsl::span<int*> trackletsLUTsHost,
929929
const int iteration,
930930
const float NSigmaCut,
931-
std::vector<float>& phiCuts,
931+
bounded_vector<float>& phiCuts,
932932
const float resolutionPV,
933933
std::array<float, nLayers>& minRs,
934934
std::array<float, nLayers>& maxRs,
935-
std::vector<float>& resolutions,
935+
bounded_vector<float>& resolutions,
936936
std::vector<float>& radii,
937-
std::vector<float>& mulScatAng,
937+
bounded_vector<float>& mulScatAng,
938938
const int nBlocks,
939939
const int nThreads)
940940
{
@@ -1139,7 +1139,7 @@ void processNeighboursHandler(const int startLayer,
11391139
std::array<int*, nLayers - 2>& neighbours,
11401140
gsl::span<int*> neighboursDeviceLUTs,
11411141
const TrackingFrameInfo** foundTrackingFrameInfo,
1142-
std::vector<CellSeed>& seedsHost,
1142+
bounded_vector<CellSeed>& seedsHost,
11431143
const float bz,
11441144
const float maxChi2ClusterAttachment,
11451145
const float maxChi2NDF,
@@ -1257,9 +1257,8 @@ void processNeighboursHandler(const int startLayer,
12571257
thrust::device_vector<CellSeed> outSeeds(updatedCellSeed.size());
12581258
auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5)));
12591259
auto s{end - outSeeds.begin()};
1260-
std::vector<CellSeed> outSeedsHost(s);
1261-
thrust::copy(outSeeds.begin(), outSeeds.begin() + s, outSeedsHost.begin());
1262-
seedsHost.insert(seedsHost.end(), outSeedsHost.begin(), outSeedsHost.end());
1260+
seedsHost.reserve(seedsHost.size() + s);
1261+
thrust::copy(outSeeds.begin(), outSeeds.begin() + s, std::back_inserter(seedsHost));
12631262
}
12641263
12651264
void trackSeedHandler(CellSeed* trackSeeds,
@@ -1316,13 +1315,13 @@ template void countTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
13161315
gsl::span<int*> trackletsLUTsHost,
13171316
const int iteration,
13181317
const float NSigmaCut,
1319-
std::vector<float>& phiCuts,
1318+
bounded_vector<float>& phiCuts,
13201319
const float resolutionPV,
13211320
std::array<float, 7>& minRs,
13221321
std::array<float, 7>& maxRs,
1323-
std::vector<float>& resolutions,
1322+
bounded_vector<float>& resolutions,
13241323
std::vector<float>& radii,
1325-
std::vector<float>& mulScatAng,
1324+
bounded_vector<float>& mulScatAng,
13261325
const int nBlocks,
13271326
const int nThreads);
13281327
@@ -1348,13 +1347,13 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
13481347
gsl::span<int*> trackletsLUTsHost,
13491348
const int iteration,
13501349
const float NSigmaCut,
1351-
std::vector<float>& phiCuts,
1350+
bounded_vector<float>& phiCuts,
13521351
const float resolutionPV,
13531352
std::array<float, 7>& minRs,
13541353
std::array<float, 7>& maxRs,
1355-
std::vector<float>& resolutions,
1354+
bounded_vector<float>& resolutions,
13561355
std::vector<float>& radii,
1357-
std::vector<float>& mulScatAng,
1356+
bounded_vector<float>& mulScatAng,
13581357
const int nBlocks,
13591358
const int nThreads);
13601359
@@ -1367,7 +1366,7 @@ template void processNeighboursHandler<7>(const int startLayer,
13671366
std::array<int*, 5>& neighbours,
13681367
gsl::span<int*> neighboursDeviceLUTs,
13691368
const TrackingFrameInfo** foundTrackingFrameInfo,
1370-
std::vector<CellSeed>& seedsHost,
1369+
bounded_vector<CellSeed>& seedsHost,
13711370
const float bz,
13721371
const float maxChi2ClusterAttachment,
13731372
const float maxChi2NDF,

0 commit comments

Comments
 (0)