@@ -181,6 +181,11 @@ struct equal_tracklets {
181181 GPUhd () bool operator ()(const Tracklet& a, const Tracklet& b) { return a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex == b.secondClusterIndex ; }
182182};
183183
184+ template <typename T1, typename T2>
185+ struct sort_by_second {
186+ GPUhd () bool operator ()(const gpuPair<T1, T2>& a, const gpuPair<T1, T2>& b) const { return a.second < b.second ; }
187+ };
188+
184189template <typename T1, typename T2>
185190struct pair_to_first {
186191 GPUhd () int operator ()(const gpuPair<T1, T2>& a) const
@@ -1110,32 +1115,18 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11101115 GPUChkErrS (cudaDeviceSynchronize());
11111116}
11121117
1113- int filterCellNeighboursHandler (std::vector<int >& neighHost, // TODO: eventually remove this!
1114- gpuPair<int , int >* cellNeighbourPairs,
1118+ int filterCellNeighboursHandler (gpuPair<int , int >* cellNeighbourPairs,
11151119 int * cellNeighbours,
11161120 unsigned int nNeigh)
11171121{
11181122 thrust::device_ptr<gpuPair<int , int >> neighVectorPairs (cellNeighbourPairs);
11191123 thrust::device_ptr<int > validNeighs (cellNeighbours);
1120- thrust::device_vector<int > keys (nNeigh); // TODO: externally allocate.
1121- thrust::device_vector<int > vals (nNeigh); // TODO: externally allocate.
1122- thrust::copy (thrust::make_transform_iterator (neighVectorPairs, gpu::pair_to_second<int , int >()),
1123- thrust::make_transform_iterator (neighVectorPairs + nNeigh, gpu::pair_to_second<int , int >()),
1124- keys.begin ());
1125- thrust::sequence (vals.begin (), vals.end ());
1126- thrust::sort_by_key (keys.begin (), keys.end (), vals.begin ());
1127- thrust::device_vector<gpuPair<int , int >> sortedNeigh (nNeigh);
1128- thrust::copy (thrust::make_permutation_iterator (neighVectorPairs, vals.begin ()),
1129- thrust::make_permutation_iterator (neighVectorPairs, vals.end ()),
1130- sortedNeigh.begin ());
1131- GPUChkErrS (cudaDeviceSynchronize ());
1132- auto trimmedBegin = thrust::find_if (sortedNeigh.begin (), sortedNeigh.end (), gpu::is_valid_pair<int , int >()); // trim leading -1s
1133- auto trimmedSize = sortedNeigh.end () - trimmedBegin;
1134- neighHost.resize (trimmedSize);
1135- thrust::transform (trimmedBegin, sortedNeigh.end (), validNeighs, gpu::pair_to_first<int , int >());
1136- GPUChkErrS (cudaMemcpy (neighHost.data (), cellNeighbours, trimmedSize * sizeof (int ), cudaMemcpyDeviceToHost));
1124+ auto updatedEnd = thrust::remove_if (neighVectorPairs, neighVectorPairs + nNeigh, gpu::is_invalid_pair<int , int >());
1125+ size_t newSize = updatedEnd - neighVectorPairs;
1126+ thrust::stable_sort (neighVectorPairs, neighVectorPairs + newSize, gpu::sort_by_second<int , int >());
1127+ thrust::transform (neighVectorPairs, neighVectorPairs + newSize, validNeighs, gpu::pair_to_first<int , int >());
11371128
1138- return trimmedSize ;
1129+ return newSize ;
11391130}
11401131
11411132template <int nLayers>
@@ -1267,7 +1258,7 @@ void processNeighboursHandler(const int startLayer,
12671258 auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1 .e3 , maxChi2NDF * ((startLevel + 2 ) * 2 - 5)));
12681259 auto s{end - outSeeds.begin ()};
12691260 std::vector<CellSeed> outSeedsHost (s);
1270- thrust::copy (updatedCellSeed .begin(), updatedCellSeed .begin() + s, outSeedsHost.begin());
1261+ thrust::copy (outSeeds .begin(), outSeeds .begin() + s, outSeedsHost.begin());
12711262 seedsHost.insert(seedsHost.end(), outSeedsHost.begin(), outSeedsHost.end());
12721263}
12731264
0 commit comments