|
9 | 9 | // granted to it by virtue of its status as an Intergovernmental Organization |
10 | 10 | // or submit itself to any jurisdiction. |
11 | 11 |
|
12 | | -/// \file GPUDefParametersDefault.h |
| 12 | +/// \file GPUDefParametersDefaults.h |
13 | 13 | /// \author David Rohr |
14 | 14 |
|
15 | | -// This files contains compile-time constants affecting the GPU performance. |
16 | | -// Many of these constants are GPU-architecture specific. |
17 | | -// This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc. |
18 | | -// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h |
| 15 | +// This file contains compile-time constants affecting the GPU performance. |
19 | 16 |
|
20 | | -#ifndef GPUDEFPARAMETERSDEFAULT_H |
21 | | -#define GPUDEFPARAMETERSDEFAULT_H |
| 17 | +#ifndef GPUDEFPARAMETERSDEFAULTS_H |
| 18 | +#define GPUDEFPARAMETERSDEFAULTS_H |
22 | 19 | // clang-format off |
23 | 20 |
|
24 | | -#include "GPUCommonDef.h" |
25 | | -#include "GPUDefMacros.h" |
26 | | - |
27 | 21 | // Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds) |
28 | 22 |
|
29 | 23 | // GPU Run Configuration |
30 | 24 | #ifdef GPUCA_GPUCODE |
31 | 25 | #if defined(GPUCA_GPUTYPE_MI2xx) |
32 | 26 | #define GPUCA_WARP_SIZE 64 |
33 | | - #define GPUCA_THREAD_COUNT 256 |
| 27 | + #define GPUCA_THREAD_COUNT_DEFAULT 256 |
34 | 28 | #define GPUCA_LB_GPUTPCCreateTrackingData 256 |
35 | 29 | #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 |
36 | 30 | #define GPUCA_LB_GPUTPCStartHitsFinder 1024 |
|
93 | 87 | #define GPUCA_COMP_GATHER_MODE 3 |
94 | 88 | #elif defined(GPUCA_GPUTYPE_VEGA) |
95 | 89 | #define GPUCA_WARP_SIZE 64 |
96 | | - #define GPUCA_THREAD_COUNT 256 |
| 90 | + #define GPUCA_THREAD_COUNT_DEFAULT 256 |
97 | 91 | #define GPUCA_LB_GPUTPCCreateTrackingData 128 |
98 | 92 | #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 |
99 | 93 | #define GPUCA_LB_GPUTPCStartHitsFinder 1024 |
|
156 | 150 | #define GPUCA_COMP_GATHER_MODE 3 |
157 | 151 | #elif defined(GPUCA_GPUTYPE_AMPERE) |
158 | 152 | #define GPUCA_WARP_SIZE 32 |
159 | | - #define GPUCA_THREAD_COUNT 512 |
| 153 | + #define GPUCA_THREAD_COUNT_DEFAULT 512 |
160 | 154 | #define GPUCA_LB_GPUTPCCreateTrackingData 384 |
161 | 155 | #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 |
162 | 156 | #define GPUCA_LB_GPUTPCStartHitsFinder 512 |
|
219 | 213 | #define GPUCA_COMP_GATHER_MODE 3 |
220 | 214 | #elif defined(GPUCA_GPUTYPE_TURING) |
221 | 215 | #define GPUCA_WARP_SIZE 32 |
222 | | - #define GPUCA_THREAD_COUNT 512 |
| 216 | + #define GPUCA_THREAD_COUNT_DEFAULT 512 |
223 | 217 | #define GPUCA_LB_GPUTPCCreateTrackingData 256 |
224 | 218 | #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 |
225 | 219 | #define GPUCA_LB_GPUTPCStartHitsFinder 512 |
|
281 | 275 |
|
282 | 276 | #ifdef GPUCA_GPUCODE |
283 | 277 | // Default settings for GPU, if not already set for selected GPU type |
284 | | - #ifndef GPUCA_THREAD_COUNT |
285 | | - #define GPUCA_THREAD_COUNT 256 |
| 278 | + #ifndef GPUCA_THREAD_COUNT_DEFAULT |
| 279 | + #define GPUCA_THREAD_COUNT_DEFAULT 256 |
286 | 280 | #endif |
287 | 281 | #ifndef GPUCA_LB_GPUTPCCreateTrackingData |
288 | 282 | #define GPUCA_LB_GPUTPCCreateTrackingData 256 |
|
486 | 480 | #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256 |
487 | 481 | #endif |
488 | 482 | #ifndef GPUCA_LB_GPUMemClean16 |
489 | | - #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT, 1 |
| 483 | + #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT_DEFAULT, 1 |
490 | 484 | #endif |
491 | 485 | #ifndef GPUCA_LB_GPUitoa |
492 | | - #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT, 1 |
| 486 | + #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT_DEFAULT, 1 |
493 | 487 | #endif |
494 | | - #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) |
495 | | - |
496 | 488 | // These kernel launch-bounds are derrived from one of the constants set above |
497 | 489 | #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression |
498 | 490 | #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression |
|
516 | 508 | #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER |
517 | 509 | #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER |
518 | 510 | #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER |
519 | | -#else |
520 | | - #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. |
521 | | -#endif |
522 | | - |
523 | | -#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) |
524 | | - |
525 | | -#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! |
526 | | - |
527 | | -#if defined(__CUDACC__) || defined(__HIPCC__) |
528 | | - #define GPUCA_SPECIALIZE_THRUST_SORTS |
529 | | -#endif |
530 | | - |
531 | | -#ifndef GPUCA_NEIGHBORSFINDER_REGS |
532 | | - #define GPUCA_NEIGHBORSFINDER_REGS NONE, 0 |
533 | | -#endif |
534 | | -#ifdef GPUCA_GPUCODE |
535 | | - #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP |
536 | | - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 |
537 | | - #endif |
538 | | - #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE |
539 | | - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 |
540 | | - #endif |
541 | | - #ifndef GPUCA_ALTERNATE_BORDER_SORT |
542 | | - #define GPUCA_ALTERNATE_BORDER_SORT 0 |
543 | | - #endif |
544 | | - #ifndef GPUCA_SORT_BEFORE_FIT |
545 | | - #define GPUCA_SORT_BEFORE_FIT 0 |
546 | | - #endif |
547 | | - #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION |
548 | | - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 |
549 | | - #endif |
550 | | - #ifndef GPUCA_COMP_GATHER_KERNEL |
551 | | - #define GPUCA_COMP_GATHER_KERNEL 0 |
552 | | - #endif |
553 | | - #ifndef GPUCA_COMP_GATHER_MODE |
554 | | - #define GPUCA_COMP_GATHER_MODE 2 |
555 | | - #endif |
556 | | -#else |
557 | | - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 |
558 | | - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 |
559 | | - #define GPUCA_ALTERNATE_BORDER_SORT 0 |
560 | | - #define GPUCA_SORT_BEFORE_FIT 0 |
561 | | - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 |
562 | | - #define GPUCA_THREAD_COUNT_FINDER 1 |
563 | | - #define GPUCA_COMP_GATHER_KERNEL 0 |
564 | | - #define GPUCA_COMP_GATHER_MODE 0 |
565 | | -#endif |
566 | | -#ifndef GPUCA_DEDX_STORAGE_TYPE |
567 | | - #define GPUCA_DEDX_STORAGE_TYPE float |
568 | | -#endif |
569 | | -#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE |
570 | | - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float |
571 | | -#endif |
572 | | -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) |
573 | | -#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) |
574 | | - |
575 | | -#ifndef GPUCA_WARP_SIZE |
576 | | - #ifdef GPUCA_GPUCODE |
577 | | - #define GPUCA_WARP_SIZE 32 |
578 | | - #else |
579 | | - #define GPUCA_WARP_SIZE 1 |
580 | | - #endif |
581 | | -#endif |
582 | | - |
583 | | -#define GPUCA_MAX_THREADS 1024 |
584 | | -#define GPUCA_MAX_STREAMS 36 |
585 | | - |
586 | | -#define GPUCA_SORT_STARTHITS_GPU // Sort the start hits when running on GPU |
587 | | -#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid |
588 | | -#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers |
589 | | -#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks |
590 | | - |
591 | | -// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling |
592 | | - |
593 | | -// Default maximum numbers |
594 | | -#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters |
595 | | -#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets |
596 | | -#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit |
597 | | -#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device |
598 | | -#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host |
599 | | -#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread |
600 | | -#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread |
601 | | - |
602 | | -// #define GPUCA_KERNEL_DEBUGGER_OUTPUT |
603 | | - |
604 | | -// Some assertions to make sure the parameters are not invalid |
605 | | -#if defined(GPUCA_GPUCODE) |
606 | | - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); |
607 | | - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); |
608 | | - static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); |
609 | | -#endif |
610 | | - |
611 | | -// Derived parameters |
612 | | -#ifdef GPUCA_USE_TEXTURES |
613 | | - #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache |
614 | | -#endif |
615 | | -#if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE) |
616 | | - #define GPUCA_SORT_STARTHITS |
617 | 511 | #endif |
618 | 512 |
|
619 | 513 | // clang-format on |
620 | | -#endif // GPUDEFPARAMETERSDEFAULT_H |
| 514 | +#endif // GPUDEFPARAMETERSDEFAULTS_H |
0 commit comments