Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ struct FastMultEst {

static uint32_t getCurrentRandomSeed();
int selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<bool>& sel);
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel);

void fillNClPerLayer(const gsl::span<const o2::itsmft::CompClusterExt>& clusters);
float process(const std::array<int, NLayers> ncl)
Expand Down
2 changes: 1 addition & 1 deletion Detectors/ITSMFT/ITS/reconstruction/src/FastMultEst.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ float FastMultEst::processNoiseImposed(const std::array<int, NLayers> ncl)
}

int FastMultEst::selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<bool>& sel)
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel)
{
int nrof = rofs.size(), nsel = 0;
const auto& multEstConf = FastMultEstConfig::Instance(); // parameters for mult estimation and cuts
Expand Down
63 changes: 46 additions & 17 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,19 @@ class TimeFrameGPU : public TimeFrame
void initialise(const int, const TrackingParameters&, const int, IndexTableUtils* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
void initDevice(IndexTableUtils*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
void initDeviceSAFitting();
void loadIndexTableUtils(const int);
void loadTrackingFrameInfoDevice(const int);
void loadUnsortedClustersDevice(const int);
void loadClustersDevice(const int);
void loadClustersIndexTables(const int iteration);
void createUsedClustersDevice(const int);
void loadUsedClustersDevice();
void loadROframeClustersDevice(const int);
void loadMultiplicityCutMask(const int);
void loadVertices(const int);

///
void createTrackletsLUTDevice(const int);
void loadTrackletsDevice();
void loadTrackletsLUTDevice();
void loadCellsDevice();
Expand All @@ -62,6 +72,7 @@ class TimeFrameGPU : public TimeFrame
void loadTrackSeedsChi2Device();
void loadRoadsDevice();
void loadTrackSeedsDevice(std::vector<CellSeed>&);
void createTrackletsBuffers();
void createCellsBuffers(const int);
void createCellsDevice();
void createCellsLUTDevice();
Expand Down Expand Up @@ -93,7 +104,7 @@ class TimeFrameGPU : public TimeFrame
std::vector<std::vector<o2::MCCompLabel>>& getLabelsInChunks() { return mLabelsInChunks; }
int getNAllocatedROFs() const { return mNrof; } // Allocated means maximum nROF for each chunk while populated is the number of loaded ones.
StaticTrackingParameters<nLayers>* getDeviceTrackingParameters() { return mTrackingParamsDevice; }
Vertex* getDeviceVertices() { return mVerticesDevice; }
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
unsigned char* getDeviceUsedClusters(const int);
const o2::base::Propagator* getChainPropagator();
Expand All @@ -107,26 +118,32 @@ class TimeFrameGPU : public TimeFrame
const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
const Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
const Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
const int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
std::vector<unsigned int> getClusterSizes();
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }

void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;

// Host-specific getters
gsl::span<int> getHostNTracklets(const int chunkId);
gsl::span<int> getHostNCells(const int chunkId);
gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
gsl::span<int, nLayers - 2> getNCells() { return mNCells; }

// Host-available device getters
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
gsl::span<Tracklet*> getDeviceTracklet() { return mTrackletsDevice; }
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
gsl::span<int, nLayers - 2> getNCellsDevice() { return mNCells; }

private:
void allocMemAsync(void**, size_t, Stream*, bool); // Abstract owned and unowned memory allocations
Expand All @@ -136,31 +153,37 @@ class TimeFrameGPU : public TimeFrame
StaticTrackingParameters<nLayers> mStaticTrackingParams;

// Host-available device buffer sizes
std::array<int, nLayers - 1> mNTracklets;
std::array<int, nLayers - 2> mNCells;

// Device pointers
StaticTrackingParameters<nLayers>* mTrackingParamsDevice;
IndexTableUtils* mIndexTableUtilsDevice;
std::array<int*, nLayers> mROFramesClustersDevice;
std::array<unsigned char*, nLayers> mUsedClustersDevice;
Vertex* mVerticesDevice;
int* mROFramesPVDevice;

// Hybrid pref
uint8_t* mMultMaskDevice;
Vertex* mPrimaryVerticesDevice;
int* mROFramesPVDevice;
std::array<Cluster*, nLayers> mClustersDevice;
std::array<Cluster*, nLayers> mUnsortedClustersDevice;
std::array<int*, nLayers> mClustersIndexTablesDevice;
std::array<unsigned char*, nLayers> mUsedClustersDevice;
std::array<int*, nLayers> mROFramesClustersDevice;
const Cluster** mClustersDeviceArray;
const Cluster** mUnsortedClustersDeviceArray;
const int** mClustersIndexTablesDeviceArray;
const unsigned char** mUsedClustersDeviceArray;
const int** mROFrameClustersDeviceArray;
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
const Tracklet** mTrackletsDeviceArray;
const int** mTrackletsLUTDeviceArray;
std::array<int*, nLayers - 2> mTrackletsLUTDevice;
Tracklet** mTrackletsDeviceArray;
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
std::array<int*, nLayers - 2> mCellsLUTDevice;
std::array<int*, nLayers - 3> mNeighboursLUTDevice;

int** mCellsLUTDeviceArray;
int** mNeighboursCellDeviceArray;
int** mNeighboursCellLUTDeviceArray;
int** mTrackletsLUTDeviceArray;
std::array<CellSeed*, nLayers - 2> mCellsDevice;
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
CellSeed* mTrackSeedsDevice;
Expand All @@ -186,10 +209,6 @@ class TimeFrameGPU : public TimeFrame
std::vector<std::vector<int>> mNVerticesInChunks;
std::vector<std::vector<o2::MCCompLabel>> mLabelsInChunks;

// Host memory used only in GPU tracking
std::vector<int> mHostNTracklets;
std::vector<int> mHostNCells;

// Temporary buffer for storing output tracks from GPU tracking
std::vector<TrackITSExt> mTrackITSExt;
};
Expand All @@ -215,6 +234,16 @@ inline int TimeFrameGPU<nLayers>::getNClustersInRofSpan(const int rofIdstart, co
{
return static_cast<int>(mROFramesClusters[layerId][(rofIdstart + rofSpanSize) < mROFramesClusters.size() ? rofIdstart + rofSpanSize : mROFramesClusters.size() - 1] - mROFramesClusters[layerId][rofIdstart]);
}

template <int nLayers>
inline std::vector<unsigned int> TimeFrameGPU<nLayers>::getClusterSizes()
{
std::vector<unsigned int> sizes(mUnsortedClusters.size());
std::transform(mUnsortedClusters.begin(), mUnsortedClusters.end(), sizes.begin(),
[](const auto& v) { return static_cast<unsigned int>(v.size()); });
return sizes;
}

} // namespace gpu
} // namespace its
} // namespace o2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,74 @@ GPUg() void fitTrackSeedsKernel(
#endif
} // namespace gpu

template <int nLayers = 7>
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
const uint8_t* multMask,
const int startROF,
const int endROF,
const int maxROF,
const int deltaROF,
const int vertexId,
const Vertex* vertices,
const int* rofPV,
const int nVertices,
const Cluster** clusters,
std::vector<unsigned int> nClusters,
const int** ROFClusters,
const unsigned char** usedClusters,
const int** clustersIndexTables,
int** trackletsLUTs,
gsl::span<int*> trackletsLUTsHost,
const int iteration,
const float NSigmaCut,
std::vector<float>& phiCuts,
const float resolutionPV,
std::vector<float>& minR,
std::vector<float>& maxR,
std::vector<float>& resolutions,
std::vector<float>& radii,
std::vector<float>& mulScatAng,
const int nBlocks,
const int nThreads);

template <int nLayers = 7>
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
const uint8_t* multMask,
const int startROF,
const int endROF,
const int maxROF,
const int deltaROF,
const int vertexId,
const Vertex* vertices,
const int* rofPV,
const int nVertices,
const Cluster** clusters,
std::vector<unsigned int> nClusters,
const int** ROFClusters,
const unsigned char** usedClusters,
const int** clustersIndexTables,
Tracklet** tracklets,
gsl::span<Tracklet*> spanTracklets,
gsl::span<int> nTracklets,
int** trackletsLUTs,
gsl::span<int*> trackletsLUTsHost,
const int iteration,
const float NSigmaCut,
std::vector<float>& phiCuts,
const float resolutionPV,
std::vector<float>& minR,
std::vector<float>& maxR,
std::vector<float>& resolutions,
std::vector<float>& radii,
std::vector<float>& mulScatAng,
const int nBlocks,
const int nThreads);

void countCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
const int** trackletsLUT,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
CellSeed* cells,
Expand All @@ -70,8 +133,8 @@ void countCellsHandler(const Cluster** sortedClusters,
void computeCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
const int** trackletsLUT,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
CellSeed* cells,
Expand Down
43 changes: 43 additions & 0 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,49 @@ struct gpuPair {

namespace gpu
{
// Poor man implementation of a span-like struct. It is very limited.
template <typename T>
struct gpuSpan {
using value_type = T;
using ptr = T*;
using ref = T&;

GPUd() gpuSpan() : _data(nullptr), _size(0) {}
GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
GPUd() unsigned int size() const { return _size; }
GPUd() bool empty() const { return _size == 0; }
GPUd() ref front() const { return _data[0]; }
GPUd() ref back() const { return _data[_size - 1]; }
GPUd() ptr begin() const { return _data; }
GPUd() ptr end() const { return _data + _size; }

protected:
ptr _data;
unsigned int _size;
};

template <typename T>
struct gpuSpan<const T> {
using value_type = T;
using ptr = const T*;
using ref = const T&;

GPUd() gpuSpan() : _data(nullptr), _size(0) {}
GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
GPUd() gpuSpan(const gpuSpan<T>& other) : _data(other._data), _size(other._size) {}
GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
GPUd() unsigned int size() const { return _size; }
GPUd() bool empty() const { return _size == 0; }
GPUd() ref front() const { return _data[0]; }
GPUd() ref back() const { return _data[_size - 1]; }
GPUd() ptr begin() const { return _data; }
GPUd() ptr end() const { return _data + _size; }

protected:
ptr _data;
unsigned int _size;
};

enum class Task {
Tracker = 0,
Expand Down
Loading
Loading