|
20 | 20 | #include <string> |
21 | 21 | #include <tuple> |
22 | 22 |
|
| 23 | +#include "ITStracking/MathUtils.h" |
| 24 | +#include "ITStracking/ExternalAllocator.h" |
| 25 | + |
23 | 26 | #include "GPUCommonDef.h" |
24 | 27 | #include "GPUCommonHelpers.h" |
25 | 28 | #include "GPUCommonLogger.h" |
| 29 | +#include "GPUCommonDefAPI.h" |
26 | 30 |
|
| 31 | +#ifdef GPUCA_GPUCODE |
| 32 | +#include <thrust/device_ptr.h> |
27 | 33 | #ifndef __HIPCC__ |
28 | 34 | #define THRUST_NAMESPACE thrust::cuda |
29 | 35 | #else |
30 | 36 | #define THRUST_NAMESPACE thrust::hip |
31 | 37 | #endif |
| 38 | +#endif |
32 | 39 |
|
33 | 40 | #ifdef ITS_GPU_LOG |
34 | 41 | #define GPULog(...) LOGP(info, __VA_ARGS__) |
|
38 | 45 |
|
39 | 46 | namespace o2::its |
40 | 47 | { |
| 48 | +// FWD declarations |
| 49 | +template <int> |
| 50 | +class IndexTableUtils; |
| 51 | +class Tracklet; |
41 | 52 |
|
42 | 53 | template <typename T1, typename T2> |
43 | 54 | using gpuPair = std::pair<T1, T2>; |
@@ -282,6 +293,184 @@ class GPUTimer |
282 | 293 | } |
283 | 294 | }; |
284 | 295 | #endif |
| 296 | + |
| 297 | +#ifdef GPUCA_GPUCODE |
| 298 | +template <typename T> |
| 299 | +struct TypedAllocator { |
| 300 | + using value_type = T; |
| 301 | + using pointer = thrust::device_ptr<T>; |
| 302 | + using const_pointer = thrust::device_ptr<const T>; |
| 303 | + using size_type = std::size_t; |
| 304 | + using difference_type = std::ptrdiff_t; |
| 305 | + |
| 306 | + TypedAllocator() noexcept : mInternalAllocator(nullptr) {} |
| 307 | + explicit TypedAllocator(ExternalAllocator* a) noexcept : mInternalAllocator(a) {} |
| 308 | + |
| 309 | + template <typename U> |
| 310 | + TypedAllocator(const TypedAllocator<U>& o) noexcept : mInternalAllocator(o.mInternalAllocator) |
| 311 | + { |
| 312 | + } |
| 313 | + |
| 314 | + pointer allocate(size_type n) |
| 315 | + { |
| 316 | + void* raw = mInternalAllocator->allocate(n * sizeof(T)); |
| 317 | + return thrust::device_pointer_cast(static_cast<T*>(raw)); |
| 318 | + } |
| 319 | + |
| 320 | + void deallocate(pointer p, size_type n) noexcept |
| 321 | + { |
| 322 | + if (!p) { |
| 323 | + return; |
| 324 | + } |
| 325 | + void* raw = thrust::raw_pointer_cast(p); |
| 326 | + mInternalAllocator->deallocate(static_cast<char*>(raw), n * sizeof(T)); |
| 327 | + } |
| 328 | + |
| 329 | + bool operator==(TypedAllocator const& o) const noexcept |
| 330 | + { |
| 331 | + return mInternalAllocator == o.mInternalAllocator; |
| 332 | + } |
| 333 | + bool operator!=(TypedAllocator const& o) const noexcept |
| 334 | + { |
| 335 | + return !(*this == o); |
| 336 | + } |
| 337 | + |
| 338 | + private: |
| 339 | + ExternalAllocator* mInternalAllocator; |
| 340 | +}; |
| 341 | + |
| 342 | +template <int nLayers> |
| 343 | +GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex, |
| 344 | + const o2::its::IndexTableUtils<nLayers>* utils, |
| 345 | + const float z1, const float z2, float maxdeltaz, float maxdeltaphi) |
| 346 | +{ |
| 347 | + const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz; |
| 348 | + const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi; |
| 349 | + const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz; |
| 350 | + const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi; |
| 351 | + |
| 352 | + if (zRangeMax < -utils->getLayerZ(layerIndex) || |
| 353 | + zRangeMin > utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) { |
| 354 | + return {}; |
| 355 | + } |
| 356 | + |
| 357 | + return int4{o2::gpu::CAMath::Max(0, utils->getZBinIndex(layerIndex, zRangeMin)), |
| 358 | + utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), |
| 359 | + o2::gpu::CAMath::Min(utils->getNzBins() - 1, utils->getZBinIndex(layerIndex, zRangeMax)), |
| 360 | + utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; |
| 361 | +} |
| 362 | + |
| 363 | +GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int rof, |
| 364 | + const int* roframesPV, |
| 365 | + const int nROF, |
| 366 | + const uint8_t* mask, |
| 367 | + const Vertex* vertices) |
| 368 | +{ |
| 369 | + const int start_pv_id = roframesPV[rof]; |
| 370 | + const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1; |
| 371 | + size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded |
| 372 | + return gpuSpan<const Vertex>(&vertices[start_pv_id], delta); |
| 373 | +}; |
| 374 | + |
| 375 | +GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int romin, |
| 376 | + const int romax, |
| 377 | + const int* roframesPV, |
| 378 | + const int nROF, |
| 379 | + const Vertex* vertices) |
| 380 | +{ |
| 381 | + const int start_pv_id = roframesPV[romin]; |
| 382 | + const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1; |
| 383 | + return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]); |
| 384 | +}; |
| 385 | + |
| 386 | +GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof, |
| 387 | + const int totROFs, |
| 388 | + const int layer, |
| 389 | + const int** roframesClus, |
| 390 | + const Cluster** clusters) |
| 391 | +{ |
| 392 | + if (rof < 0 || rof >= totROFs) { |
| 393 | + return gpuSpan<const Cluster>(); |
| 394 | + } |
| 395 | + const int start_clus_id{roframesClus[layer][rof]}; |
| 396 | + const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1; |
| 397 | + const unsigned int delta = roframesClus[layer][stop_rof] - start_clus_id; |
| 398 | + return gpuSpan<const Cluster>(&(clusters[layer][start_clus_id]), delta); |
| 399 | +} |
| 400 | + |
| 401 | +GPUdii() gpuSpan<const Tracklet> getTrackletsPerCluster(const int rof, |
| 402 | + const int totROFs, |
| 403 | + const int mode, |
| 404 | + const int** roframesClus, |
| 405 | + const Tracklet** tracklets) |
| 406 | +{ |
| 407 | + if (rof < 0 || rof >= totROFs) { |
| 408 | + return gpuSpan<const Tracklet>(); |
| 409 | + } |
| 410 | + const int start_clus_id{roframesClus[1][rof]}; |
| 411 | + const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1; |
| 412 | + const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id; |
| 413 | + return gpuSpan<const Tracklet>(&(tracklets[mode][start_clus_id]), delta); |
| 414 | +} |
| 415 | + |
| 416 | +GPUdii() gpuSpan<int> getNTrackletsPerCluster(const int rof, |
| 417 | + const int totROFs, |
| 418 | + const int mode, |
| 419 | + const int** roframesClus, |
| 420 | + int** ntracklets) |
| 421 | +{ |
| 422 | + if (rof < 0 || rof >= totROFs) { |
| 423 | + return gpuSpan<int>(); |
| 424 | + } |
| 425 | + const int start_clus_id{roframesClus[1][rof]}; |
| 426 | + const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1; |
| 427 | + const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id; |
| 428 | + return gpuSpan<int>(&(ntracklets[mode][start_clus_id]), delta); |
| 429 | +} |
| 430 | + |
| 431 | +GPUdii() gpuSpan<const int> getNTrackletsPerCluster(const int rof, |
| 432 | + const int totROFs, |
| 433 | + const int mode, |
| 434 | + const int** roframesClus, |
| 435 | + const int** ntracklets) |
| 436 | +{ |
| 437 | + if (rof < 0 || rof >= totROFs) { |
| 438 | + return gpuSpan<const int>(); |
| 439 | + } |
| 440 | + const int start_clus_id{roframesClus[1][rof]}; |
| 441 | + const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1; |
| 442 | + const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id; |
| 443 | + return gpuSpan<const int>(&(ntracklets[mode][start_clus_id]), delta); |
| 444 | +} |
| 445 | + |
| 446 | +GPUdii() gpuSpan<int> getNLinesPerCluster(const int rof, |
| 447 | + const int totROFs, |
| 448 | + const int** roframesClus, |
| 449 | + int* nlines) |
| 450 | +{ |
| 451 | + if (rof < 0 || rof >= totROFs) { |
| 452 | + return gpuSpan<int>(); |
| 453 | + } |
| 454 | + const int start_clus_id{roframesClus[1][rof]}; |
| 455 | + const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1; |
| 456 | + const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id; |
| 457 | + return gpuSpan<int>(&(nlines[start_clus_id]), delta); |
| 458 | +} |
| 459 | + |
| 460 | +GPUdii() gpuSpan<const int> getNLinesPerCluster(const int rof, |
| 461 | + const int totROFs, |
| 462 | + const int** roframesClus, |
| 463 | + const int* nlines) |
| 464 | +{ |
| 465 | + if (rof < 0 || rof >= totROFs) { |
| 466 | + return gpuSpan<const int>(); |
| 467 | + } |
| 468 | + const int start_clus_id{roframesClus[1][rof]}; |
| 469 | + const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1; |
| 470 | + const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id; |
| 471 | + return gpuSpan<const int>(&(nlines[start_clus_id]), delta); |
| 472 | +} |
| 473 | +#endif |
285 | 474 | } // namespace gpu |
286 | 475 | } // namespace o2::its |
287 | 476 |
|
|
0 commit comments