@@ -147,12 +147,12 @@ void TimeFrameGPU<nLayers>::loadClustersIndexTables(const int iteration)
147147 if (!iteration) {
148148 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " loading sorted clusters" );
149149 for (auto iLayer{0 }; iLayer < nLayers; ++iLayer) {
150- LOGP (info , " gpu-transfer: loading clusters indextable for layer {} with {} elements, for {} MB." , iLayer, mIndexTables [iLayer].size (), mIndexTables [iLayer].size () * sizeof (int ) / MB);
150+ LOGP (debug , " gpu-transfer: loading clusters indextable for layer {} with {} elements, for {} MB." , iLayer, mIndexTables [iLayer].size (), mIndexTables [iLayer].size () * sizeof (int ) / MB);
151151 allocMemAsync (reinterpret_cast <void **>(&mClustersIndexTablesDevice [iLayer]), mIndexTables [iLayer].size () * sizeof (int ), nullptr , getExtAllocator ());
152152 checkGPUError (cudaMemcpyAsync (mClustersIndexTablesDevice [iLayer], mIndexTables [iLayer].data (), mIndexTables [iLayer].size () * sizeof (int ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
153153 }
154154 allocMemAsync (reinterpret_cast <void **>(&mClustersIndexTablesDeviceArray ), nLayers * sizeof (int ), nullptr , getExtAllocator ());
155- checkGPUError (cudaMemcpyAsync (mClustersIndexTablesDeviceArray , mClustersIndexTablesDevice .data (), nLayers * sizeof (int ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
155+ checkGPUError (cudaMemcpyAsync (mClustersIndexTablesDeviceArray , mClustersIndexTablesDevice .data (), nLayers * sizeof (int * ), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
156156 STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
157157 }
158158}
@@ -245,6 +245,32 @@ void TimeFrameGPU<nLayers>::loadVertices(const int iteration)
245245 }
246246}
247247
248+ template <int nLayers>
249+ void TimeFrameGPU<nLayers>::createTrackletsLUTDevice()
250+ {
251+ START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " creating cells LUTs" );
252+ for (auto iLayer{0 }; iLayer < nLayers - 1 ; ++iLayer) {
253+ LOGP (debug, " gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {} MB." , mClusters [iLayer].size () + 1 , iLayer, (mClusters [iLayer].size () + 1 ) * sizeof (int ) / MB);
254+ allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDevice [iLayer]), (mClusters [iLayer].size () + 1 ) * sizeof (int ), nullptr , getExtAllocator ());
255+ checkGPUError (cudaMemsetAsync (mTrackletsLUTDevice [iLayer], 0 , (mClusters [iLayer].size () + 1 ) * sizeof (int ), mGpuStreams [0 ].get ()));
256+ }
257+ allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDeviceArray ), (nLayers - 2 ) * sizeof (int *), nullptr , getExtAllocator ());
258+ checkGPUError (cudaMemcpyAsync (mTrackletsLUTDeviceArray , mTrackletsLUTDevice .data (), mTrackletsLUTDevice .size () * sizeof (int *), cudaMemcpyHostToDevice, mGpuStreams [0 ].get ()));
259+ STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
260+ }
261+
262+ // template<int nLayers> void TimeFrameGPU<nLayers>::createTrackletsBuffers()
263+ // {
264+ // START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells buffers");
265+ // for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
266+ // mNTracklets[iLayer] = 0;
267+ // checkGPUError(cudaMemcpyAsync(&mNTracklets[iLayer], mTrackletsLUTDevice[iLayer] + mClusters[iLayer].size(), sizeof(int), cudaMemcpyDeviceToHost));
268+ // LOGP(debug, "gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {} MB.", mNTracklets[layer], iLayer, mNTracklets[iLayer] * sizeof(CellSeed) / MB);
269+ // allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[iLayer]), mNTracklets[iLayer] * sizeof(Tracklet), nullptr, getExtAllocator());
270+ // }
271+ // STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
272+ // }
273+
248274template <int nLayers>
249275void TimeFrameGPU<nLayers>::loadTrackletsDevice()
250276{
@@ -267,11 +293,11 @@ void TimeFrameGPU<nLayers>::loadTrackletsLUTDevice()
267293 START_GPU_STREAM_TIMER (mGpuStreams [0 ].get (), " loading tracklets" );
268294 for (auto iLayer{0 }; iLayer < nLayers - 2 ; ++iLayer) {
269295 LOGP (debug, " gpu-transfer: loading tracklets LUT for {} elements on layer {}, for {} MB" , mTrackletsLookupTable [iLayer].size (), iLayer, mTrackletsLookupTable [iLayer].size () * sizeof (int ) / MB);
270- allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDevice [iLayer]), mTrackletsLookupTable [iLayer].size () * sizeof (int ), nullptr , getExtAllocator ());
296+ // allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[iLayer]), mTrackletsLookupTable[iLayer].size() * sizeof(int), nullptr, getExtAllocator());
271297 checkGPUError (cudaHostRegister (mTrackletsLookupTable [iLayer].data (), mTrackletsLookupTable [iLayer].size () * sizeof (int ), cudaHostRegisterPortable));
272298 checkGPUError (cudaMemcpyAsync (mTrackletsLUTDevice [iLayer], mTrackletsLookupTable [iLayer].data (), mTrackletsLookupTable [iLayer].size () * sizeof (int ), cudaMemcpyHostToDevice));
273299 }
274- allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDeviceArray ), (nLayers - 2 ) * sizeof (int *), nullptr , getExtAllocator ());
300+ // allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 2) * sizeof(int*), nullptr, getExtAllocator());
275301 checkGPUError (cudaHostRegister (mTrackletsLUTDevice .data (), (nLayers - 2 ) * sizeof (int *), cudaHostRegisterPortable));
276302 checkGPUError (cudaMemcpyAsync (mTrackletsLUTDeviceArray , mTrackletsLUTDevice .data (), (nLayers - 2 ) * sizeof (int *), cudaMemcpyHostToDevice));
277303 STOP_GPU_STREAM_TIMER (mGpuStreams [0 ].get ());
0 commit comments