@@ -92,6 +92,19 @@ void TimeFrameGPU<nLayers>::setDevicePropagator(const o2::base::PropagatorImpl<f
9292 mPropagatorDevice = propagator;
9393}
9494
95+ template <int nLayers>
96+ void TimeFrameGPU<nLayers>::loadIndexTableUtils(const int iteration)
97+ {
98+ START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " loading indextable utils" );
99+ if (!iteration) {
100+ LOGP (debug, " gpu-allocation: allocating IndexTableUtils buffer, for {} MB." , sizeof (IndexTableUtils) / MB);
101+ allocMemAsync (reinterpret_cast <void **>(&mIndexTableUtilsDevice ), sizeof (IndexTableUtils), nullptr , getExtAllocator ());
102+ }
103+ LOGP (debug, " gpu-transfer: loading IndexTableUtils object, for {} MB." , sizeof (IndexTableUtils) / MB);
104+ checkGPUError (cudaMemcpyAsync (mIndexTableUtilsDevice , &mIndexTableUtils , sizeof (IndexTableUtils), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
105+ STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
106+ }
107+
95108template <int nLayers>
96109void TimeFrameGPU<nLayers>::loadUnsortedClustersDevice(const int iteration)
97110{
@@ -128,13 +141,56 @@ void TimeFrameGPU<nLayers>::loadClustersDevice(const int iteration)
128141 }
129142}
130143
144+ template <int nLayers>
145+ void TimeFrameGPU<nLayers>::loadClustersIndexTables(const int iteration)
146+ {
147+ if (!iteration) {
148+ START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " loading sorted clusters" );
149+ for (auto iLayer{0 }; iLayer < nLayers; ++iLayer) {
150+ LOGP (info, " gpu-transfer: loading clusters indextable for layer {} with {} elements, for {} MB." , iLayer, mIndexTables [iLayer].size (), mIndexTables [iLayer].size () * sizeof (int ) / MB);
151+ allocMemAsync (reinterpret_cast <void **>(&mClustersIndexTablesDevice [iLayer]), mIndexTables [iLayer].size () * sizeof (int ), nullptr , getExtAllocator ());
152+ checkGPUError (cudaMemcpyAsync (mClustersIndexTablesDevice [iLayer], mIndexTables [iLayer].data (), mIndexTables [iLayer].size () * sizeof (int ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
153+ }
154+ allocMemAsync (reinterpret_cast <void **>(&mClustersIndexTablesDeviceArray ), nLayers * sizeof (int ), nullptr , getExtAllocator ());
155+ checkGPUError (cudaMemcpyAsync (mClustersIndexTablesDeviceArray , mClustersIndexTablesDevice .data (), nLayers * sizeof (int ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
156+ STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
157+ }
158+ }
159+
160+ template <int nLayers>
161+ void TimeFrameGPU<nLayers>::createUsedClustersDevice(const int iteration)
162+ {
163+ if (!iteration) {
164+ START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating used clusters flags" );
165+ for (auto iLayer{0 }; iLayer < nLayers; ++iLayer) {
166+ LOGP (debug, " gpu-transfer: creating {} used clusters flags on layer {}, for {} MB." , mUsedClusters [iLayer].size (), iLayer, mUsedClusters [iLayer].size () * sizeof (unsigned char ) / MB);
167+ allocMemAsync (reinterpret_cast <void **>(&mUsedClustersDevice [iLayer]), mUsedClusters [iLayer].size () * sizeof (unsigned char ), nullptr , getExtAllocator ());
168+ checkGPUError (cudaMemsetAsync (mUsedClustersDevice [iLayer], 0 , mUsedClusters [iLayer].size () * sizeof (unsigned char ), mGpuStreams [0 ].get ()));
169+ }
170+ allocMemAsync (reinterpret_cast <void **>(&mUsedClustersDeviceArray ), nLayers * sizeof (unsigned char *), nullptr , getExtAllocator ());
171+ checkGPUError (cudaMemcpyAsync (mUsedClustersDeviceArray , mUsedClustersDevice .data (), nLayers * sizeof (unsigned char *), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
172+ STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
173+ }
174+ }
175+
176+ template <int nLayers>
177+ void TimeFrameGPU<nLayers>::loadUsedClustersDevice()
178+ {
179+ START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating used clusters flags" );
180+ for (auto iLayer{0 }; iLayer < nLayers; ++iLayer) {
181+ LOGP (debug, " gpu-transfer: loading {} used clusters flags on layer {}, for {} MB." , mUsedClusters [iLayer].size (), iLayer, mClusters [iLayer].size () * sizeof (unsigned char ) / MB);
182+ checkGPUError (cudaMemcpyAsync (mUsedClustersDevice [iLayer], mUsedClusters [iLayer].data (), mUsedClusters [iLayer].size () * sizeof (unsigned char ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
183+ }
184+ STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
185+ }
186+
131187template <int nLayers>
132188void TimeFrameGPU<nLayers>::loadROframeClustersDevice(const int iteration)
133189{
134190 if (!iteration) {
135191 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " loading ROframe clusters" );
136192 for (auto iLayer{0 }; iLayer < nLayers; ++iLayer) {
137- LOGP (info , " gpu-transfer: loading {} ROframe clusters info on layer {}, for {} MB." , mROFramesClusters [iLayer].size (), iLayer, mROFramesClusters [iLayer].size () * sizeof (int ) / MB);
193+ LOGP (debug , " gpu-transfer: loading {} ROframe clusters info on layer {}, for {} MB." , mROFramesClusters [iLayer].size (), iLayer, mROFramesClusters [iLayer].size () * sizeof (int ) / MB);
138194 allocMemAsync (reinterpret_cast <void **>(&mROFramesClustersDevice [iLayer]), mROFramesClusters [iLayer].size () * sizeof (int ), nullptr , getExtAllocator ());
139195 checkGPUError (cudaMemcpyAsync (mROFramesClustersDevice [iLayer], mROFramesClusters [iLayer].data (), mROFramesClusters [iLayer].size () * sizeof (int ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
140196 }
@@ -167,7 +223,7 @@ void TimeFrameGPU<nLayers>::loadMultiplicityCutMask(const int iteration)
167223{
168224 if (!iteration) {
169225 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " loading multiplicity cut mask" );
170- LOGP (info , " gpu-transfer: loading multiplicity cut mask with {} elements, for {} MB." , mMultiplicityCutMask .size (), mMultiplicityCutMask .size () * sizeof (bool ) / MB);
226+ LOGP (debug , " gpu-transfer: loading multiplicity cut mask with {} elements, for {} MB." , mMultiplicityCutMask .size (), mMultiplicityCutMask .size () * sizeof (bool ) / MB);
171227 allocMemAsync (reinterpret_cast <void **>(&mMultMaskDevice ), mMultiplicityCutMask .size () * sizeof (uint8_t ), nullptr , getExtAllocator ());
172228 checkGPUError (cudaMemcpyAsync (mMultMaskDevice , mMultiplicityCutMask .data (), mMultiplicityCutMask .size () * sizeof (uint8_t ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
173229 STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
@@ -179,10 +235,10 @@ void TimeFrameGPU<nLayers>::loadVertices(const int iteration)
179235{
180236 if (!iteration) {
181237 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " loading seeding vertices" );
182- LOGP (info , " gpu-transfer: loading {} ROframes vertices, for {} MB." , mROFramesPV .size (), mROFramesPV .size () * sizeof (int ) / MB);
238+ LOGP (debug , " gpu-transfer: loading {} ROframes vertices, for {} MB." , mROFramesPV .size (), mROFramesPV .size () * sizeof (int ) / MB);
183239 allocMemAsync (reinterpret_cast <void **>(&mROFramesPVDevice ), mROFramesPV .size () * sizeof (int ), nullptr , getExtAllocator ());
184240 checkGPUError (cudaMemcpyAsync (mROFramesPVDevice , mROFramesPV .data (), mROFramesPV .size () * sizeof (int ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
185- LOGP (info , " gpu-transfer: loading {} seeding vertices, for {} MB." , mPrimaryVertices .size (), mPrimaryVertices .size () * sizeof (Vertex) / MB);
241+ LOGP (debug , " gpu-transfer: loading {} seeding vertices, for {} MB." , mPrimaryVertices .size (), mPrimaryVertices .size () * sizeof (Vertex) / MB);
186242 allocMemAsync (reinterpret_cast <void **>(&mPrimaryVerticesDevice ), mPrimaryVertices .size () * sizeof (Vertex), nullptr , getExtAllocator ());
187243 checkGPUError (cudaMemcpyAsync (mPrimaryVerticesDevice , mPrimaryVertices .data (), mPrimaryVertices .size () * sizeof (Vertex), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
188244 STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
0 commit comments