Skip to content

Commit e54e3f7

Browse files
committed
Fix second iteration
1 parent 80d88a3 commit e54e3f7

File tree

4 files changed

+44
-16
lines changed

4 files changed

+44
-16
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,10 @@ void processNeighboursHandler(const int startLayer,
195195
const float MaxChi2ClusterAttachment,
196196
const o2::base::Propagator* propagator,
197197
const o2::base::PropagatorF::MatCorrType matCorrType,
198-
std::vector<int>& updatedCellIdHost, // temporary host vectors
199-
std::vector<CellSeed>& updatedCellSeedHost, // temporary host vectors
198+
const std::vector<int>& lastCellIdHost, // temporary host vector
199+
const std::vector<CellSeed>& lastCellSeedHost, // temporary host vector
200+
std::vector<int>& updatedCellIdHost, // temporary host vector
201+
std::vector<CellSeed>& updatedCellSeedHost, // temporary host vector
200202
const int nBlocks,
201203
const int nThreads);
202204

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
298298
mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
299299
mTimeFrameGPU->getDevicePropagator(),
300300
mCorrType,
301+
lastCellId, // temporary host vector
302+
lastCellSeed, // temporary host vector
301303
updatedCellId, // temporary host vectors
302304
updatedCellSeed, // temporary host vectors
303305
conf.nBlocks,
@@ -309,7 +311,25 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
309311
lastCellId.swap(updatedCellId);
310312
std::vector<CellSeed>().swap(updatedCellSeed); /// tame the memory peaks
311313
updatedCellId.clear();
312-
processNeighbours(iLayer, --level, lastCellSeed, lastCellId, updatedCellSeed, updatedCellId);
314+
processNeighboursHandler<nLayers>(iLayer,
315+
--level,
316+
mTimeFrameGPU->getDeviceArrayCells(),
317+
mTimeFrameGPU->getDeviceCells()[iLayer],
318+
mTimeFrameGPU->getNCells()[iLayer],
319+
mTimeFrameGPU->getDeviceArrayUsedClusters(),
320+
mTimeFrameGPU->getDeviceNeighbours(iLayer - 1),
321+
mTimeFrameGPU->getDeviceNeighboursLUTs(),
322+
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
323+
mBz,
324+
mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
325+
mTimeFrameGPU->getDevicePropagator(),
326+
mCorrType,
327+
lastCellId, // temporary host vector
328+
lastCellSeed, // temporary host vector
329+
updatedCellId, // temporary host vectors
330+
updatedCellSeed, // temporary host vectors
331+
conf.nBlocks,
332+
conf.nThreads);
313333
}
314334
for (auto& seed : updatedCellSeed) {
315335
if (seed.getQ2Pt() > 1.e3 || seed.getChi2() > mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5)) {

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,18 +1181,22 @@ void processNeighboursHandler(const int startLayer,
11811181
const float MaxChi2ClusterAttachment,
11821182
const o2::base::Propagator* propagator,
11831183
const o2::base::PropagatorF::MatCorrType matCorrType,
1184-
std::vector<int>& updatedCellIdHost, // temporary host vectors
1185-
std::vector<CellSeed>& updatedCellSeedHost, // temporary host vectors
1184+
const std::vector<int>& lastCellIdHost, // temporary host vector
1185+
const std::vector<CellSeed>& lastCellSeedHost, // temporary host vector
1186+
std::vector<int>& updatedCellIdHost, // temporary host vector
1187+
std::vector<CellSeed>& updatedCellSeedHost, // temporary host vector
11861188
const int nBlocks,
11871189
const int nThreads)
11881190
{
11891191
thrust::device_vector<int> foundSeedsTable(nCurrentCells + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. TODO: fix this.
1192+
thrust::device_vector<int> lastCellIds(lastCellIdHost);
1193+
thrust::device_vector<CellSeed> lastCellSeed(lastCellSeedHost);
11901194
gpu::processNeighboursKernel<true><<<nBlocks, nThreads>>>(startLayer,
11911195
startLevel,
11921196
allCellSeeds,
1193-
currentCellSeeds,
1194-
nullptr, // currentCellIds,
1195-
nCurrentCells,
1197+
lastCellIdHost.empty() ? currentCellSeeds : thrust::raw_pointer_cast(&lastCellSeed[0]), // lastCellSeeds
1198+
lastCellIdHost.empty() ? nullptr : thrust::raw_pointer_cast(&lastCellIds[0]), // lastCellIds,
1199+
lastCellIdHost.empty() ? nCurrentCells : lastCellSeedHost.size(),
11961200
nullptr, // updatedCellSeeds,
11971201
nullptr, // updatedCellsIds,
11981202
thrust::raw_pointer_cast(&foundSeedsTable[0]), // auxiliary only in GPU code to compute the number of cells per iteration
@@ -1220,15 +1224,15 @@ void processNeighboursHandler(const int startLayer,
12201224
nCurrentCells + 1, // num_items
12211225
0));
12221226
1223-
thrust::device_vector<int> updatedCellIds(foundSeedsTable.back()), lastCellIds(foundSeedsTable.back());
1224-
thrust::device_vector<CellSeed> updatedCellSeeds(foundSeedsTable.back()), lastCellSeeds(foundSeedsTable.back());
1227+
thrust::device_vector<int> updatedCellIds(foundSeedsTable.back()) /*, lastCellIds(foundSeedsTable.back())*/;
1228+
thrust::device_vector<CellSeed> updatedCellSeeds(foundSeedsTable.back()) /*, lastCellSeeds(foundSeedsTable.back())*/;
12251229
12261230
gpu::processNeighboursKernel<false><<<nBlocks, nThreads>>>(startLayer,
12271231
startLevel,
12281232
allCellSeeds,
1229-
currentCellSeeds,
1230-
nullptr, // currentCellIds,
1231-
nCurrentCells,
1233+
lastCellIdHost.empty() ? currentCellSeeds : thrust::raw_pointer_cast(&lastCellSeed[0]), // lastCellSeeds
1234+
lastCellIdHost.empty() ? nullptr : thrust::raw_pointer_cast(&lastCellIds[0]), // lastCellIds,
1235+
lastCellIdHost.empty() ? nCurrentCells : lastCellSeedHost.size(),
12321236
thrust::raw_pointer_cast(&updatedCellSeeds[0]), // updatedCellSeeds
12331237
thrust::raw_pointer_cast(&updatedCellIds[0]), // updatedCellsIds
12341238
thrust::raw_pointer_cast(&foundSeedsTable[0]), // auxiliary only in GPU code to compute the number of cells per iteration
@@ -1419,8 +1423,10 @@ template void processNeighboursHandler<7>(const int startLayer,
14191423
const float MaxChi2ClusterAttachment,
14201424
const o2::base::Propagator* propagator,
14211425
const o2::base::PropagatorF::MatCorrType matCorrType,
1422-
std::vector<int>& updatedCellIdHost, // temporary host vectors
1423-
std::vector<CellSeed>& updatedCellSeedHost, // temporary host vectors
1426+
const std::vector<int>& lastCellIdHost, // temporary host vector
1427+
const std::vector<CellSeed>& lastCellSeedHost, // temporary host vector
1428+
std::vector<int>& updatedCellIdHost, // temporary host vector
1429+
std::vector<CellSeed>& updatedCellSeedHost, // temporary host vector
14241430
const int nBlocks,
14251431
const int nThreads);
14261432
} // namespace o2::its

Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515

1616
#ifndef TRACKINGITSU_INCLUDE_CACELL_H_
1717
#define TRACKINGITSU_INCLUDE_CACELL_H_
18-
#include <iostream>
1918
#ifndef GPUCA_GPUCODE_DEVICE
2019
#include <array>
2120
#include <vector>
21+
#include <iostream>
2222
#endif
2323

2424
#include "GPUCommonDef.h"

0 commit comments

Comments
 (0)