@@ -176,7 +176,7 @@ void TimeFrameGPU<nLayers>::createUsedClustersDevice(const int iteration)
176176template <int nLayers>
177177void TimeFrameGPU<nLayers>::loadUsedClustersDevice()
178178{
179- START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating used clusters flags" );
179+ START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " loading used clusters flags" );
180180 for (auto iLayer{0 }; iLayer < nLayers; ++iLayer) {
181181 LOGP (debug, " gpu-transfer: loading {} used clusters flags on layer {}, for {} MB." , mUsedClusters [iLayer].size (), iLayer, mClusters [iLayer].size () * sizeof (unsigned char ) / MB);
182182 checkGPUError (cudaMemcpyAsync (mUsedClustersDevice [iLayer], mUsedClusters [iLayer].data (), mUsedClusters [iLayer].size () * sizeof (unsigned char ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
@@ -246,20 +246,25 @@ void TimeFrameGPU<nLayers>::loadVertices(const int iteration)
246246}
247247
248248template <int nLayers>
249- void TimeFrameGPU<nLayers>::createTrackletsLUTDevice()
249+ void TimeFrameGPU<nLayers>::createTrackletsLUTDevice(const int iteration )
250250{
251- START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating cells LUTs" );
251+ START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating tracklets LUTs" );
252252 for (auto iLayer{0 }; iLayer < nLayers - 1 ; ++iLayer) {
253- LOGP (debug, " gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {} MB." , mClusters [iLayer].size () + 1 , iLayer, (mClusters [iLayer].size () + 1 ) * sizeof (int ) / MB);
254- allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDevice [iLayer]), (mClusters [iLayer].size () + 1 ) * sizeof (int ), nullptr , getExtAllocator ());
253+ if (!iteration) {
254+ LOGP (debug, " gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {} MB." , mClusters [iLayer].size () + 1 , iLayer, (mClusters [iLayer].size () + 1 ) * sizeof (int ) / MB);
255+ allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDevice [iLayer]), (mClusters [iLayer].size () + 1 ) * sizeof (int ), nullptr , getExtAllocator ());
256+ }
255257 checkGPUError (cudaMemsetAsync (mTrackletsLUTDevice [iLayer], 0 , (mClusters [iLayer].size () + 1 ) * sizeof (int ), mGpuStreams [0 ].get ()));
256258 }
257- allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDeviceArray ), (nLayers - 2 ) * sizeof (int *), nullptr , getExtAllocator ());
258- checkGPUError (cudaMemcpyAsync (mTrackletsLUTDeviceArray , mTrackletsLUTDevice .data (), mTrackletsLUTDevice .size () * sizeof (int *), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
259+ if (!iteration) {
260+ allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDeviceArray ), (nLayers - 1 ) * sizeof (int *), nullptr , getExtAllocator ());
261+ checkGPUError (cudaMemcpyAsync (mTrackletsLUTDeviceArray , mTrackletsLUTDevice .data (), mTrackletsLUTDevice .size () * sizeof (int *), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
262+ }
259263 STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
260264}
261265
262- template <int nLayers> void TimeFrameGPU<nLayers>::createTrackletsBuffers()
266+ template <int nLayers>
267+ void TimeFrameGPU<nLayers>::createTrackletsBuffers()
263268{
264269 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating cells buffers" );
265270 for (auto iLayer{0 }; iLayer < nLayers - 1 ; ++iLayer) {
0 commit comments