2828#include " ITStracking/Constants.h"
2929#include " ITStracking/IndexTableUtils.h"
3030#include " ITStracking/MathUtils.h"
31+ #include " ITStracking/ExternalAllocator.h"
3132#include " DataFormatsITS/TrackITS.h"
3233#include " ReconstructionDataFormats/Vertex.h"
3334
3435#include " ITStrackingGPU/TrackerTraitsGPU.h"
3536#include " ITStrackingGPU/TrackingKernels.h"
3637#include " ITStrackingGPU/Utils.h"
3738
38- #include " GPUCommonHelpers.h"
39-
4039#ifndef __HIPCC__
4140#define THRUST_NAMESPACE thrust::cuda
4241#else
@@ -64,6 +63,37 @@ GPUdii() float Sq(float v)
6463namespace gpu
6564{
6665
66+ template <typename T>
67+ class TypedAllocator : public thrust ::device_allocator<T>
68+ {
69+ public:
70+ using value_type = T;
71+ using pointer = T*;
72+
73+ template <typename U>
74+ struct rebind {
75+ using other = TypedAllocator<U>;
76+ };
77+
78+ explicit TypedAllocator (ExternalAllocator* allocPtr)
79+ : mInternalAllocator(allocPtr) {}
80+
81+ T* allocate (size_t n)
82+ {
83+ return reinterpret_cast <T*>(mInternalAllocator ->allocate (n * sizeof (T)));
84+ }
85+
86+ void deallocate (T* p, size_t n)
87+ {
88+ char * raw_ptr = reinterpret_cast <char *>(p);
89+ size_t bytes = n * sizeof (T);
90+ mInternalAllocator ->deallocate (raw_ptr, bytes); // redundant as internal dealloc is no-op.
91+ }
92+
93+ private:
94+ ExternalAllocator* mInternalAllocator ;
95+ };
96+
6797GPUd () const int4 getBinsRect (const Cluster& currentCluster, const int layerIndex,
6898 const o2::its::IndexTableUtils& utils,
6999 const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
@@ -1117,7 +1147,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11171147
11181148int filterCellNeighboursHandler (gpuPair<int , int >* cellNeighbourPairs,
11191149 int * cellNeighbours,
1120- unsigned int nNeigh)
1150+ unsigned int nNeigh,
1151+ o2::its::ExternalAllocator* allocator)
11211152{
11221153 thrust::device_ptr<gpuPair<int , int >> neighVectorPairs (cellNeighbourPairs);
11231154 thrust::device_ptr<int > validNeighs (cellNeighbours);
@@ -1140,6 +1171,7 @@ void processNeighboursHandler(const int startLayer,
11401171 gsl::span<int *> neighboursDeviceLUTs,
11411172 const TrackingFrameInfo** foundTrackingFrameInfo,
11421173 bounded_vector<CellSeed>& seedsHost,
1174+ o2::its::ExternalAllocator* allocator,
11431175 const float bz,
11441176 const float maxChi2ClusterAttachment,
11451177 const float maxChi2NDF,
@@ -1148,8 +1180,10 @@ void processNeighboursHandler(const int startLayer,
11481180 const int nBlocks,
11491181 const int nThreads)
11501182{
1151- thrust::device_vector<int > foundSeedsTable (nCells[startLayer] + 1 ); // Shortcut: device_vector skips central memory management, we are relying on the contingency.
1152- // TODO: fix this.
1183+ auto allocInt = gpu::TypedAllocator<int >(allocator);
1184+ auto allocCellSeed = gpu::TypedAllocator<CellSeed>(allocator);
1185+ thrust::device_vector<int , gpu::TypedAllocator<int >> foundSeedsTable (nCells[startLayer] + 1 , 0 , allocInt); // Shortcut: device_vector skips central memory management, we are relying on the contingency.
1186+ // TODO: fix this.
11531187
11541188 gpu::processNeighboursKernel<true ><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
11551189 o2::gpu::CAMath::Min (nThreads, GPU_THREADS)>>>(
@@ -1172,8 +1206,8 @@ void processNeighboursHandler(const int startLayer,
11721206 matCorrType);
11731207 gpu::cubExclusiveScanInPlace (foundSeedsTable, nCells[startLayer] + 1 );
11741208
1175- thrust::device_vector<int > updatedCellId (foundSeedsTable.back());
1176- thrust::device_vector<CellSeed> updatedCellSeed (foundSeedsTable.back());
1209+ thrust::device_vector<int , gpu::TypedAllocator< int >> updatedCellId (foundSeedsTable.back(), 0, allocInt );
1210+ thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>> updatedCellSeed (foundSeedsTable.back(), allocCellSeed );
11771211 gpu::processNeighboursKernel<false ><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
11781212 o2::gpu::CAMath::Min (nThreads, GPU_THREADS)>>>(
11791213 startLayer,
@@ -1195,13 +1229,13 @@ void processNeighboursHandler(const int startLayer,
11951229 matCorrType);
11961230
11971231 int level = startLevel;
1198- thrust::device_vector<int > lastCellId;
1199- thrust::device_vector<CellSeed> lastCellSeed;
1232+ thrust::device_vector<int , gpu::TypedAllocator< int >> lastCellId (allocInt) ;
1233+ thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>> lastCellSeed (allocCellSeed) ;
12001234 for (int iLayer{startLayer - 1 }; iLayer > 0 && level > 2 ; --iLayer) {
12011235 lastCellSeed.swap (updatedCellSeed);
12021236 lastCellId.swap (updatedCellId);
1203- thrust::device_vector<CellSeed>( ).swap (updatedCellSeed);
1204- thrust::device_vector<int >( ).swap (updatedCellId);
1237+ thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>>(allocCellSeed ).swap (updatedCellSeed);
1238+ thrust::device_vector<int , gpu::TypedAllocator< int >>(allocInt ).swap (updatedCellId);
12051239 auto lastCellSeedSize{lastCellSeed.size ()};
12061240 foundSeedsTable.resize (lastCellSeedSize + 1 );
12071241 thrust::fill (foundSeedsTable.begin (), foundSeedsTable.end (), 0 );
@@ -1253,8 +1287,7 @@ void processNeighboursHandler(const int startLayer,
12531287 propagator,
12541288 matCorrType);
12551289 }
1256-
1257- thrust::device_vector<CellSeed> outSeeds (updatedCellSeed.size());
1290+ thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>> outSeeds (updatedCellSeed.size(), allocCellSeed);
12581291 auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1 .e3 , maxChi2NDF * ((startLevel + 2 ) * 2 - 5)));
12591292 auto s{end - outSeeds.begin ()};
12601293 seedsHost.reserve(seedsHost.size() + s);
@@ -1367,6 +1400,7 @@ template void processNeighboursHandler<7>(const int startLayer,
13671400 gsl::span<int *> neighboursDeviceLUTs,
13681401 const TrackingFrameInfo** foundTrackingFrameInfo,
13691402 bounded_vector<CellSeed>& seedsHost,
1403+ o2::its::ExternalAllocator*,
13701404 const float bz,
13711405 const float maxChi2ClusterAttachment,
13721406 const float maxChi2NDF,
0 commit comments