@@ -532,13 +532,45 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds)
532532 GPUChkErrS (cudaMemset (mTrackITSExtDevice , 0 , mNTracks * sizeof (o2::its::TrackITSExt)));
533533}
534534
535+ template <int NLayers>
536+ void TimeFrameGPU<NLayers>::loadTrackExtensionStartStatesDevice()
537+ {
538+ GPUTimer timer (" loading track extension start states" );
539+ GPULog (" gpu-transfer: loading {} track extension start states, for {:.2f} MB." , this ->mTracks .size (), this ->mTracks .size () * sizeof (o2::its::TrackExtensionStartState<NLayers>) / constants::MB);
540+ mTrackExtensionStartStatesDevice = nullptr ;
541+ mTrackExtensionStartStates = bounded_vector<TrackExtensionStartState<NLayers>>(this ->mTracks .size (), {}, this ->getMemoryPool ().get ());
542+ if (this ->mTracks .empty ()) {
543+ return ;
544+ }
545+ for (size_t iTrack{0 }; iTrack < this ->mTracks .size (); ++iTrack) {
546+ const auto & track = this ->mTracks [iTrack];
547+ auto & state = mTrackExtensionStartStates [iTrack];
548+ state.paramIn = track.getParamIn ();
549+ state.paramOut = track.getParamOut ();
550+ state.time = track.getTimeStamp ();
551+ state.chi2 = track.getChi2 ();
552+ state.nClusters = track.getNClusters ();
553+ state.firstClusterLayer = static_cast <int >(track.getFirstClusterLayer ());
554+ state.lastClusterLayer = static_cast <int >(track.getLastClusterLayer ());
555+ for (int iLayer{0 }; iLayer < NLayers; ++iLayer) {
556+ state.clusters [iLayer] = track.getClusterIndex (iLayer);
557+ }
558+ }
559+ allocMem (reinterpret_cast <void **>(&mTrackExtensionStartStatesDevice ), mTrackExtensionStartStates .size () * sizeof (o2::its::TrackExtensionStartState<NLayers>), this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
560+ GPUChkErrS (cudaMemcpy (mTrackExtensionStartStatesDevice , mTrackExtensionStartStates .data (), mTrackExtensionStartStates .size () * sizeof (o2::its::TrackExtensionStartState<NLayers>), cudaMemcpyHostToDevice));
561+ }
562+
535563template <int NLayers>
536564void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nTracks)
537565{
538566 GPUTimer timer (" reserving track extension candidates" );
539567 const size_t nCandidates = nTracks * MaxTrackExtensionCandidatesPerTrack;
540568 GPULog (" gpu-allocation: reserving {} track extension candidates, for {:.2f} MB." , nCandidates, nCandidates * sizeof (o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
541569 mTrackExtensionCandidates = bounded_vector<TrackExtensionCandidate<NLayers>>(nCandidates, {}, this ->getMemoryPool ().get ());
570+ mTrackExtensionCandidatesDevice = nullptr ;
571+ if (mTrackExtensionCandidates .empty ()) {
572+ return ;
573+ }
542574 allocMem (reinterpret_cast <void **>(&mTrackExtensionCandidatesDevice ), nCandidates * sizeof (o2::its::TrackExtensionCandidate<NLayers>), this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
543575}
544576
@@ -593,6 +625,9 @@ void TimeFrameGPU<NLayers>::downloadTrackExtensionCandidatesDevice()
593625{
594626 GPUTimer timer (" downloading track extension candidates" );
595627 GPULog (" gpu-transfer: downloading {} track extension candidates, for {:.2f} MB." , mTrackExtensionCandidates .size (), mTrackExtensionCandidates .size () * sizeof (o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
628+ if (mTrackExtensionCandidates .empty ()) {
629+ return ;
630+ }
596631 GPUChkErrS (cudaMemcpy (mTrackExtensionCandidates .data (), mTrackExtensionCandidatesDevice , mTrackExtensionCandidates .size () * sizeof (o2::its::TrackExtensionCandidate<NLayers>), cudaMemcpyDeviceToHost));
597632}
598633
0 commit comments