Skip to content

Commit 9a7776a

Browse files
mconcasf3sch
authored andcommitted
Make ITS GPU tracking deterministic again
1 parent d4bc577 commit 9a7776a

File tree

3 files changed

+14
-28
lines changed

3 files changed

+14
-28
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
176176
const int nBlocks,
177177
const int nThreads);
178178

179-
int filterCellNeighboursHandler(std::vector<int>&,
180-
gpuPair<int, int>*,
179+
int filterCellNeighboursHandler(gpuPair<int, int>*,
181180
int*,
182181
unsigned int);
183182

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,7 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighbours(const int iteration)
200200
conf.nBlocks,
201201
conf.nThreads);
202202

203-
filterCellNeighboursHandler(mTimeFrameGPU->getCellsNeighbours()[iLayer],
204-
mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
203+
filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
205204
mTimeFrameGPU->getDeviceNeighbours(iLayer),
206205
nNeigh);
207206
}
@@ -220,9 +219,6 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
220219
if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) {
221220
continue;
222221
}
223-
std::vector<int> lastCellId, updatedCellId;
224-
std::vector<CellSeed> lastCellSeed, updatedCellSeed;
225-
226222
processNeighboursHandler<nLayers>(startLayer,
227223
startLevel,
228224
mTimeFrameGPU->getDeviceArrayCells(),

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,11 @@ struct equal_tracklets {
181181
GPUhd() bool operator()(const Tracklet& a, const Tracklet& b) { return a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex == b.secondClusterIndex; }
182182
};
183183

184+
template <typename T1, typename T2>
185+
struct sort_by_second {
186+
GPUhd() bool operator()(const gpuPair<T1, T2>& a, const gpuPair<T1, T2>& b) const { return a.second < b.second; }
187+
};
188+
184189
template <typename T1, typename T2>
185190
struct pair_to_first {
186191
GPUhd() int operator()(const gpuPair<T1, T2>& a) const
@@ -1110,32 +1115,18 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11101115
GPUChkErrS(cudaDeviceSynchronize());
11111116
}
11121117
1113-
int filterCellNeighboursHandler(std::vector<int>& neighHost, // TODO: eventually remove this!
1114-
gpuPair<int, int>* cellNeighbourPairs,
1118+
int filterCellNeighboursHandler(gpuPair<int, int>* cellNeighbourPairs,
11151119
int* cellNeighbours,
11161120
unsigned int nNeigh)
11171121
{
11181122
thrust::device_ptr<gpuPair<int, int>> neighVectorPairs(cellNeighbourPairs);
11191123
thrust::device_ptr<int> validNeighs(cellNeighbours);
1120-
thrust::device_vector<int> keys(nNeigh); // TODO: externally allocate.
1121-
thrust::device_vector<int> vals(nNeigh); // TODO: externally allocate.
1122-
thrust::copy(thrust::make_transform_iterator(neighVectorPairs, gpu::pair_to_second<int, int>()),
1123-
thrust::make_transform_iterator(neighVectorPairs + nNeigh, gpu::pair_to_second<int, int>()),
1124-
keys.begin());
1125-
thrust::sequence(vals.begin(), vals.end());
1126-
thrust::sort_by_key(keys.begin(), keys.end(), vals.begin());
1127-
thrust::device_vector<gpuPair<int, int>> sortedNeigh(nNeigh);
1128-
thrust::copy(thrust::make_permutation_iterator(neighVectorPairs, vals.begin()),
1129-
thrust::make_permutation_iterator(neighVectorPairs, vals.end()),
1130-
sortedNeigh.begin());
1131-
GPUChkErrS(cudaDeviceSynchronize());
1132-
auto trimmedBegin = thrust::find_if(sortedNeigh.begin(), sortedNeigh.end(), gpu::is_valid_pair<int, int>()); // trim leading -1s
1133-
auto trimmedSize = sortedNeigh.end() - trimmedBegin;
1134-
neighHost.resize(trimmedSize);
1135-
thrust::transform(trimmedBegin, sortedNeigh.end(), validNeighs, gpu::pair_to_first<int, int>());
1136-
GPUChkErrS(cudaMemcpy(neighHost.data(), cellNeighbours, trimmedSize * sizeof(int), cudaMemcpyDeviceToHost));
1124+
auto updatedEnd = thrust::remove_if(neighVectorPairs, neighVectorPairs + nNeigh, gpu::is_invalid_pair<int, int>());
1125+
size_t newSize = updatedEnd - neighVectorPairs;
1126+
thrust::stable_sort(neighVectorPairs, neighVectorPairs + newSize, gpu::sort_by_second<int, int>());
1127+
thrust::transform(neighVectorPairs, neighVectorPairs + newSize, validNeighs, gpu::pair_to_first<int, int>());
11371128
1138-
return trimmedSize;
1129+
return newSize;
11391130
}
11401131
11411132
template <int nLayers>
@@ -1267,7 +1258,7 @@ void processNeighboursHandler(const int startLayer,
12671258
auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5)));
12681259
auto s{end - outSeeds.begin()};
12691260
std::vector<CellSeed> outSeedsHost(s);
1270-
thrust::copy(updatedCellSeed.begin(), updatedCellSeed.begin() + s, outSeedsHost.begin());
1261+
thrust::copy(outSeeds.begin(), outSeeds.begin() + s, outSeedsHost.begin());
12711262
seedsHost.insert(seedsHost.end(), outSeedsHost.begin(), outSeedsHost.end());
12721263
}
12731264

0 commit comments

Comments
 (0)