@@ -341,9 +341,9 @@ void TimeFrameGPU<nLayers>::createCellsLUTDevice()
341341{
342342 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating cells LUTs" );
343343 for (auto iLayer{0 }; iLayer < nLayers - 2 ; ++iLayer) {
344- LOGP (debug, " gpu-transfer: creating cell LUT for {} elements on layer {}, for {} MB." , mTracklets [iLayer]. size () + 1 , iLayer, (mTracklets [iLayer]. size () + 1 ) * sizeof (int ) / MB);
345- allocMemAsync (reinterpret_cast <void **>(&mCellsLUTDevice [iLayer]), (mTracklets [iLayer]. size () + 1 ) * sizeof (int ), nullptr , getExtAllocator ());
346- checkGPUError (cudaMemsetAsync (mCellsLUTDevice [iLayer], 0 , (mTracklets [iLayer]. size () + 1 ) * sizeof (int ), mGpuStreams [0 ].get ()));
344+ LOGP (debug, " gpu-transfer: creating cell LUT for {} elements on layer {}, for {} MB." , mNTracklets [iLayer] + 1 , iLayer, (mNTracklets [iLayer] + 1 ) * sizeof (int ) / MB);
345+ allocMemAsync (reinterpret_cast <void **>(&mCellsLUTDevice [iLayer]), (mNTracklets [iLayer] + 1 ) * sizeof (int ), nullptr , getExtAllocator ());
346+ checkGPUError (cudaMemsetAsync (mCellsLUTDevice [iLayer], 0 , (mNTracklets [iLayer] + 1 ) * sizeof (int ), mGpuStreams [0 ].get ()));
347347 }
348348 allocMemAsync (reinterpret_cast <void **>(&mCellsLUTDeviceArray ), (nLayers - 2 ) * sizeof (int *), nullptr , getExtAllocator ());
349349 checkGPUError (cudaMemcpyAsync (mCellsLUTDeviceArray , mCellsLUTDevice .data (), mCellsLUTDevice .size () * sizeof (int *), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
@@ -355,7 +355,7 @@ void TimeFrameGPU<nLayers>::createCellsBuffers(const int layer)
355355{
356356 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating cells buffers" );
357357 mNCells [layer] = 0 ;
358- checkGPUError (cudaMemcpyAsync (&mNCells [layer], mCellsLUTDevice [layer] + mTracklets [layer]. size () , sizeof (int ), cudaMemcpyDeviceToHost));
358+ checkGPUError (cudaMemcpyAsync (&mNCells [layer], mCellsLUTDevice [layer] + mNTracklets [layer], sizeof (int ), cudaMemcpyDeviceToHost));
359359 LOGP (debug, " gpu-transfer: creating cell buffer for {} elements on layer {}, for {} MB." , mNCells [layer], layer, mNCells [layer] * sizeof (CellSeed) / MB);
360360 allocMemAsync (reinterpret_cast <void **>(&mCellsDevice [layer]), mNCells [layer] * sizeof (CellSeed), nullptr , getExtAllocator ());
361361
@@ -446,9 +446,9 @@ void TimeFrameGPU<nLayers>::downloadCellsLUTDevice()
446446{
447447 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " downloading cell luts" );
448448 for (auto iLayer{0 }; iLayer < nLayers - 3 ; ++iLayer) {
449- LOGP (debug, " gpu-transfer: downloading cells lut on layer {} for {} elements" , iLayer, (mTracklets [iLayer + 1 ]. size () + 1 ));
450- mCellsLookupTable [iLayer].resize (mTracklets [iLayer + 1 ]. size () + 1 );
451- checkGPUError (cudaMemcpyAsync (mCellsLookupTable [iLayer].data (), mCellsLUTDevice [iLayer + 1 ], (mTracklets [iLayer + 1 ]. size () + 1 ) * sizeof (int ), cudaMemcpyDeviceToHost, mGpuStreams [0 ].get ()));
449+ LOGP (debug, " gpu-transfer: downloading cells lut on layer {} for {} elements" , iLayer, (mNTracklets [iLayer + 1 ] + 1 ));
450+ mCellsLookupTable [iLayer].resize (mNTracklets [iLayer + 1 ] + 1 );
451+ checkGPUError (cudaMemcpyAsync (mCellsLookupTable [iLayer].data (), mCellsLUTDevice [iLayer + 1 ], (mNTracklets [iLayer + 1 ] + 1 ) * sizeof (int ), cudaMemcpyDeviceToHost, mGpuStreams [0 ].get ()));
452452 }
453453 STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
454454}
0 commit comments