@@ -265,57 +265,58 @@ void TimeFrameGPU<NLayers>::loadVertices(const int iteration)
265265{
266266 if (!iteration) {
267267 GPUTimer timer (" loading seeding vertices" );
268- // GPULog("gpu-transfer: loading {} ROframes vertices, for {:.2f} MB.", this->mROFramesPV.size(), this->mROFramesPV.size() * sizeof(int) / constants::MB);
269- // allocMem(reinterpret_cast<void**>(&mROFramesPVDevice), this->mROFramesPV.size() * sizeof(int), this->hasFrameworkAllocator());
270- // GPUChkErrS(cudaMemcpy(mROFramesPVDevice, this->mROFramesPV.data(), this->mROFramesPV.size() * sizeof(int), cudaMemcpyHostToDevice));
271268 GPULog (" gpu-transfer: loading {} seeding vertices, for {:.2f} MB." , this ->mPrimaryVertices .size (), this ->mPrimaryVertices .size () * sizeof (Vertex) / constants::MB);
272269 allocMem (reinterpret_cast <void **>(&mPrimaryVerticesDevice ), this ->mPrimaryVertices .size () * sizeof (Vertex), this ->hasFrameworkAllocator ());
273270 GPUChkErrS (cudaMemcpy (mPrimaryVerticesDevice , this ->mPrimaryVertices .data (), this ->mPrimaryVertices .size () * sizeof (Vertex), cudaMemcpyHostToDevice));
274271 }
275272}
276273
277274template <int NLayers>
278- void TimeFrameGPU<NLayers>::loadROFOverlapTable()
275+ void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration )
279276{
280- GPUTimer timer (" initialising device view of ROFOverlapTable" );
281- const auto & hostTable = this ->getROFOverlapTable ();
282- const auto & hostView = this ->getROFOverlapTableView ();
283- using TableEntry = ROFOverlapTable<NLayers>::TableEntry;
284- using TableIndex = ROFOverlapTable<NLayers>::TableIndex;
285- using LayerTiming = o2::its::LayerTiming;
286- TableEntry* d_flatTable{nullptr };
287- TableIndex* d_indices{nullptr };
288- LayerTiming* d_layers{nullptr };
289- size_t flatTableSize = hostTable.getFlatTableSize ();
290- allocMem (reinterpret_cast <void **>(&d_flatTable), flatTableSize * sizeof (TableEntry), this ->hasFrameworkAllocator ());
291- GPUChkErrS (cudaMemcpy (d_flatTable, hostView.mFlatTable , flatTableSize * sizeof (TableEntry), cudaMemcpyHostToDevice));
292- allocMem (reinterpret_cast <void **>(&d_indices), hostTable.getIndicesSize () * sizeof (TableIndex), this ->hasFrameworkAllocator ());
293- GPUChkErrS (cudaMemcpy (d_indices, hostView.mIndices , hostTable.getIndicesSize () * sizeof (TableIndex), cudaMemcpyHostToDevice));
294- allocMem (reinterpret_cast <void **>(&d_layers), NLayers * sizeof (LayerTiming), this ->hasFrameworkAllocator ());
295- GPUChkErrS (cudaMemcpy (d_layers, hostView.mLayers , NLayers * sizeof (LayerTiming), cudaMemcpyHostToDevice));
296- mDeviceROFOverlapTableView = hostTable.getDeviceView (d_flatTable, d_indices, d_layers);
277+ if (!iteration) {
278+ GPUTimer timer (" initialising device view of ROFOverlapTable" );
279+ const auto & hostTable = this ->getROFOverlapTable ();
280+ const auto & hostView = this ->getROFOverlapTableView ();
281+ using TableEntry = ROFOverlapTable<NLayers>::TableEntry;
282+ using TableIndex = ROFOverlapTable<NLayers>::TableIndex;
283+ using LayerTiming = o2::its::LayerTiming;
284+ TableEntry* d_flatTable{nullptr };
285+ TableIndex* d_indices{nullptr };
286+ LayerTiming* d_layers{nullptr };
287+ size_t flatTableSize = hostTable.getFlatTableSize ();
288+ allocMem (reinterpret_cast <void **>(&d_flatTable), flatTableSize * sizeof (TableEntry), this ->hasFrameworkAllocator ());
289+ GPUChkErrS (cudaMemcpy (d_flatTable, hostView.mFlatTable , flatTableSize * sizeof (TableEntry), cudaMemcpyHostToDevice));
290+ allocMem (reinterpret_cast <void **>(&d_indices), hostTable.getIndicesSize () * sizeof (TableIndex), this ->hasFrameworkAllocator ());
291+ GPUChkErrS (cudaMemcpy (d_indices, hostView.mIndices , hostTable.getIndicesSize () * sizeof (TableIndex), cudaMemcpyHostToDevice));
292+ allocMem (reinterpret_cast <void **>(&d_layers), NLayers * sizeof (LayerTiming), this ->hasFrameworkAllocator ());
293+ GPUChkErrS (cudaMemcpy (d_layers, hostView.mLayers , NLayers * sizeof (LayerTiming), cudaMemcpyHostToDevice));
294+ mDeviceROFOverlapTableView = hostTable.getDeviceView (d_flatTable, d_indices, d_layers);
295+ }
297296}
298297
299298template <int NLayers>
300- void TimeFrameGPU<NLayers>::loadROFVertexLookupTable()
299+ void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration )
301300{
302- GPUTimer timer (" initialising device view of ROFVertexLookupTable" );
303- const auto & hostTable = this ->getROFVertexLookupTable ();
304- const auto & hostView = this ->getROFVertexLookupTableView ();
305- using TableEntry = ROFVertexLookupTable<NLayers>::TableEntry;
306- using TableIndex = ROFVertexLookupTable<NLayers>::TableIndex;
307- using LayerTiming = o2::its::LayerTiming;
308- TableEntry* d_flatTable{nullptr };
309- TableIndex* d_indices{nullptr };
310- LayerTiming* d_layers{nullptr };
311- size_t flatTableSize = hostTable.getFlatTableSize ();
312- allocMem (reinterpret_cast <void **>(&d_flatTable), flatTableSize * sizeof (TableEntry), this ->hasFrameworkAllocator ());
313- GPUChkErrS (cudaMemcpy (d_flatTable, hostView.mFlatTable , flatTableSize * sizeof (TableEntry), cudaMemcpyHostToDevice));
314- allocMem (reinterpret_cast <void **>(&d_indices), hostTable.getIndicesSize () * sizeof (TableIndex), this ->hasFrameworkAllocator ());
315- GPUChkErrS (cudaMemcpy (d_indices, hostView.mIndices , hostTable.getIndicesSize () * sizeof (TableIndex), cudaMemcpyHostToDevice));
316- allocMem (reinterpret_cast <void **>(&d_layers), NLayers * sizeof (LayerTiming), this ->hasFrameworkAllocator ());
317- GPUChkErrS (cudaMemcpy (d_layers, hostView.mLayers , NLayers * sizeof (LayerTiming), cudaMemcpyHostToDevice));
318- mDeviceROFVertexLookupTableView = hostTable.getDeviceView (d_flatTable, d_indices, d_layers);
301+ if (!iteration) {
302+ GPUTimer timer (" initialising device view of ROFVertexLookupTable" );
303+ const auto & hostTable = this ->getROFVertexLookupTable ();
304+ const auto & hostView = this ->getROFVertexLookupTableView ();
305+ using TableEntry = ROFVertexLookupTable<NLayers>::TableEntry;
306+ using TableIndex = ROFVertexLookupTable<NLayers>::TableIndex;
307+ using LayerTiming = o2::its::LayerTiming;
308+ TableEntry* d_flatTable{nullptr };
309+ TableIndex* d_indices{nullptr };
310+ LayerTiming* d_layers{nullptr };
311+ size_t flatTableSize = hostTable.getFlatTableSize ();
312+ allocMem (reinterpret_cast <void **>(&d_flatTable), flatTableSize * sizeof (TableEntry), this ->hasFrameworkAllocator ());
313+ GPUChkErrS (cudaMemcpy (d_flatTable, hostView.mFlatTable , flatTableSize * sizeof (TableEntry), cudaMemcpyHostToDevice));
314+ allocMem (reinterpret_cast <void **>(&d_indices), hostTable.getIndicesSize () * sizeof (TableIndex), this ->hasFrameworkAllocator ());
315+ GPUChkErrS (cudaMemcpy (d_indices, hostView.mIndices , hostTable.getIndicesSize () * sizeof (TableIndex), cudaMemcpyHostToDevice));
316+ allocMem (reinterpret_cast <void **>(&d_layers), NLayers * sizeof (LayerTiming), this ->hasFrameworkAllocator ());
317+ GPUChkErrS (cudaMemcpy (d_layers, hostView.mLayers , NLayers * sizeof (LayerTiming), cudaMemcpyHostToDevice));
318+ mDeviceROFVertexLookupTableView = hostTable.getDeviceView (d_flatTable, d_indices, d_layers);
319+ }
319320}
320321
321322template <int NLayers>
@@ -373,6 +374,7 @@ void TimeFrameGPU<NLayers>::createTrackletsBuffers(const int layer)
373374 mGpuStreams [layer].sync (); // ensure number of tracklets is correct
374375 GPULog (" gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {:.2f} MB." , mNTracklets [layer], layer, mNTracklets [layer] * sizeof (Tracklet) / constants::MB);
375376 allocMemAsync (reinterpret_cast <void **>(&mTrackletsDevice [layer]), mNTracklets [layer] * sizeof (Tracklet), mGpuStreams [layer], this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
377+ GPUChkErrS (cudaMemsetAsync (mTrackletsDevice [layer], 0 , mNTracklets [layer] * sizeof (Tracklet), mGpuStreams [layer].get ()));
376378 GPUChkErrS (cudaMemcpyAsync (&mTrackletsDeviceArray [layer], &mTrackletsDevice [layer], sizeof (Tracklet*), cudaMemcpyHostToDevice, mGpuStreams [layer].get ()));
377379}
378380
@@ -468,6 +470,7 @@ void TimeFrameGPU<NLayers>::createCellsBuffers(const int layer)
468470 mGpuStreams [layer].sync (); // ensure number of cells is correct
469471 GPULog (" gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB." , mNCells [layer], layer, mNCells [layer] * sizeof (CellSeedN) / constants::MB);
470472 allocMemAsync (reinterpret_cast <void **>(&mCellsDevice [layer]), mNCells [layer] * sizeof (CellSeedN), mGpuStreams [layer], this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
473+ GPUChkErrS (cudaMemsetAsync (mCellsDevice [layer], 0 , mNCells [layer] * sizeof (CellSeedN), mGpuStreams [layer].get ()));
471474 GPUChkErrS (cudaMemcpyAsync (&mCellsDeviceArray [layer], &mCellsDevice [layer], sizeof (CellSeedN*), cudaMemcpyHostToDevice, mGpuStreams [layer].get ()));
472475}
473476
@@ -637,11 +640,10 @@ void TimeFrameGPU<NLayers>::popMemoryStack(const int iteration)
637640template <int NLayers>
638641void TimeFrameGPU<NLayers>::initialise(const int iteration,
639642 const TrackingParameters& trkParam,
640- const int maxLayers,
641- IndexTableUtilsN* utils)
643+ const int maxLayers)
642644{
643645 mGpuStreams .resize (NLayers);
644- o2::its::TimeFrame<NLayers>::initialise (iteration, trkParam, maxLayers);
646+ o2::its::TimeFrame<NLayers>::initialise (iteration, trkParam, maxLayers, false );
645647}
646648
647649template <int NLayers>
0 commit comments