Skip to content

Commit 013e151

Browse files
committed
Split GPUDefParameters file into constants, and RTC-dynamic parameters with a defaults file, and a wrapper!
1 parent 0f9450e commit 013e151

File tree

9 files changed

+182
-132
lines changed

9 files changed

+182
-132
lines changed

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "GPUReconstructionIncludes.h"
1717
#include "GPUReconstructionThreading.h"
1818
#include "GPUChain.h"
19-
#include "GPUDefParameters.h"
19+
#include "GPUDefParametersRuntime.h"
2020
#include "GPUTPCClusterData.h"
2121
#include "GPUTPCSectorOutCluster.h"
2222
#include "GPUTPCGMMergedTrack.h"

GPU/GPUTracking/CMakeLists.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,9 @@ set(HDRS_INSTALL
129129
DataTypes/GPUTriggerOutputs.h
130130
Debug/GPUROOTDump.h
131131
Definitions/GPUDefConstantsAndSettings.h
132-
Definitions/GPUDefParametersDefault.h
132+
Definitions/GPUDefParametersWrapper.h
133+
Definitions/GPUDefParametersConstants.h
134+
Definitions/GPUDefParametersDefaults.h
133135
Definitions/GPUDef.h
134136
Definitions/GPUDefMacros.h
135137
Definitions/GPULogging.h
@@ -234,7 +236,7 @@ set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h
234236
Base/GPUReconstructionKernelIncludes.template.h
235237
Base/GPUReconstructionIncludesDeviceAll.template.h
236238
cmake/GPUNoFastMathKernels.template.h
237-
Definitions/GPUDefParameters.template.h
239+
Definitions/GPUDefParametersRuntime.template.h
238240
Definitions/GPUDefParametersLoad.template.inc)
239241
set(GENERATED_HEADERS_LIST "")
240242

@@ -258,7 +260,7 @@ add_custom_command(
258260
)
259261
list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h)
260262

261-
set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoad.inc ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h)
263+
set(HDRS_INSTALL ${HDRS_INSTALL} ${GENERATED_HEADERS_LIST})
262264
include(kernels.cmake)
263265

264266
# Optional sources depending on optional dependencies

GPU/GPUTracking/Definitions/GPUDef.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#include "GPUCommonDef.h"
2020
#include "GPUDefConstantsAndSettings.h"
21-
#include "GPUDefParametersDefault.h"
21+
#include "GPUDefParametersWrapper.h"
2222
#include "GPUCommonRtypes.h"
2323

2424
// Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects)
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file GPUDefParametersConstants.h
13+
/// \author David Rohr
14+
15+
// This file contains compile-time constants, independent from the backend
16+
17+
#ifndef GPUDEFPARAMETERSCONSTANTS_H
18+
#define GPUDEFPARAMETERSCONSTANTS_H
19+
// clang-format off
20+
21+
#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!
22+
23+
#if defined(__CUDACC__) || defined(__HIPCC__)
24+
#define GPUCA_SPECIALIZE_THRUST_SORTS
25+
#endif
26+
27+
#define GPUCA_MAX_THREADS 1024
28+
#define GPUCA_MAX_STREAMS 36
29+
30+
#if defined(GPUCA_GPUCODE)
31+
#define GPUCA_SORT_STARTHITS // Sort the start hits when running on GPU
32+
#endif
33+
34+
#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid
35+
#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers
36+
#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks
37+
38+
// Default maximum numbers
39+
#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters
40+
#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets
41+
#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit
42+
#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device
43+
#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host
44+
#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread
45+
#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread
46+
47+
#ifdef GPUCA_GPUCODE
48+
#ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
49+
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
50+
#endif
51+
#ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
52+
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
53+
#endif
54+
#ifndef GPUCA_ALTERNATE_BORDER_SORT
55+
#define GPUCA_ALTERNATE_BORDER_SORT 0
56+
#endif
57+
#ifndef GPUCA_SORT_BEFORE_FIT
58+
#define GPUCA_SORT_BEFORE_FIT 0
59+
#endif
60+
#ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION
61+
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
62+
#endif
63+
#ifndef GPUCA_COMP_GATHER_KERNEL
64+
#define GPUCA_COMP_GATHER_KERNEL 0
65+
#endif
66+
#ifndef GPUCA_COMP_GATHER_MODE
67+
#define GPUCA_COMP_GATHER_MODE 2
68+
#endif
69+
#else
70+
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0
71+
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0
72+
#define GPUCA_ALTERNATE_BORDER_SORT 0
73+
#define GPUCA_SORT_BEFORE_FIT 0
74+
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
75+
#define GPUCA_THREAD_COUNT_FINDER 1
76+
#define GPUCA_COMP_GATHER_KERNEL 0
77+
#define GPUCA_COMP_GATHER_MODE 0
78+
#endif
79+
#ifndef GPUCA_DEDX_STORAGE_TYPE
80+
#define GPUCA_DEDX_STORAGE_TYPE float
81+
#endif
82+
#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
83+
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
84+
#endif
85+
86+
// clang-format on
87+
#endif // GPUDEFPARAMETERSCONSTANTS_H

GPU/GPUTracking/Definitions/GPUDefParametersDefault.h renamed to GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h

Lines changed: 13 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,22 @@
99
// granted to it by virtue of its status as an Intergovernmental Organization
1010
// or submit itself to any jurisdiction.
1111

12-
/// \file GPUDefParametersDefault.h
12+
/// \file GPUDefParametersDefaults.h
1313
/// \author David Rohr
1414

15-
// This files contains compile-time constants affecting the GPU performance.
16-
// Many of these constants are GPU-architecture specific.
17-
// This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc.
18-
// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h
15+
// This file contains compile-time constants affecting the GPU performance.
1916

20-
#ifndef GPUDEFPARAMETERSDEFAULT_H
21-
#define GPUDEFPARAMETERSDEFAULT_H
17+
#ifndef GPUDEFPARAMETERSDEFAULTS_H
18+
#define GPUDEFPARAMETERSDEFAULTS_H
2219
// clang-format off
2320

24-
#include "GPUCommonDef.h"
25-
#include "GPUDefMacros.h"
26-
2721
// Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds)
2822

2923
// GPU Run Configuration
3024
#ifdef GPUCA_GPUCODE
3125
#if defined(GPUCA_GPUTYPE_MI2xx)
3226
#define GPUCA_WARP_SIZE 64
33-
#define GPUCA_THREAD_COUNT 256
27+
#define GPUCA_THREAD_COUNT_DEFAULT 256
3428
#define GPUCA_LB_GPUTPCCreateTrackingData 256
3529
#define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
3630
#define GPUCA_LB_GPUTPCStartHitsFinder 1024
@@ -93,7 +87,7 @@
9387
#define GPUCA_COMP_GATHER_MODE 3
9488
#elif defined(GPUCA_GPUTYPE_VEGA)
9589
#define GPUCA_WARP_SIZE 64
96-
#define GPUCA_THREAD_COUNT 256
90+
#define GPUCA_THREAD_COUNT_DEFAULT 256
9791
#define GPUCA_LB_GPUTPCCreateTrackingData 128
9892
#define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2
9993
#define GPUCA_LB_GPUTPCStartHitsFinder 1024
@@ -156,7 +150,7 @@
156150
#define GPUCA_COMP_GATHER_MODE 3
157151
#elif defined(GPUCA_GPUTYPE_AMPERE)
158152
#define GPUCA_WARP_SIZE 32
159-
#define GPUCA_THREAD_COUNT 512
153+
#define GPUCA_THREAD_COUNT_DEFAULT 512
160154
#define GPUCA_LB_GPUTPCCreateTrackingData 384
161155
#define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
162156
#define GPUCA_LB_GPUTPCStartHitsFinder 512
@@ -219,7 +213,7 @@
219213
#define GPUCA_COMP_GATHER_MODE 3
220214
#elif defined(GPUCA_GPUTYPE_TURING)
221215
#define GPUCA_WARP_SIZE 32
222-
#define GPUCA_THREAD_COUNT 512
216+
#define GPUCA_THREAD_COUNT_DEFAULT 512
223217
#define GPUCA_LB_GPUTPCCreateTrackingData 256
224218
#define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
225219
#define GPUCA_LB_GPUTPCStartHitsFinder 512
@@ -281,8 +275,8 @@
281275

282276
#ifdef GPUCA_GPUCODE
283277
// Default settings for GPU, if not already set for selected GPU type
284-
#ifndef GPUCA_THREAD_COUNT
285-
#define GPUCA_THREAD_COUNT 256
278+
#ifndef GPUCA_THREAD_COUNT_DEFAULT
279+
#define GPUCA_THREAD_COUNT_DEFAULT 256
286280
#endif
287281
#ifndef GPUCA_LB_GPUTPCCreateTrackingData
288282
#define GPUCA_LB_GPUTPCCreateTrackingData 256
@@ -486,13 +480,11 @@
486480
#define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256
487481
#endif
488482
#ifndef GPUCA_LB_GPUMemClean16
489-
#define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT, 1
483+
#define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT_DEFAULT, 1
490484
#endif
491485
#ifndef GPUCA_LB_GPUitoa
492-
#define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT, 1
486+
#define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT_DEFAULT, 1
493487
#endif
494-
#define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__)
495-
496488
// These kernel launch-bounds are derrived from one of the constants set above
497489
#define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression
498490
#define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression
@@ -516,105 +508,7 @@
516508
#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER
517509
#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER
518510
#define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER
519-
#else
520-
#define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block.
521-
#endif
522-
523-
#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE)
524-
525-
#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!
526-
527-
#if defined(__CUDACC__) || defined(__HIPCC__)
528-
#define GPUCA_SPECIALIZE_THRUST_SORTS
529-
#endif
530-
531-
#ifndef GPUCA_NEIGHBORSFINDER_REGS
532-
#define GPUCA_NEIGHBORSFINDER_REGS NONE, 0
533-
#endif
534-
#ifdef GPUCA_GPUCODE
535-
#ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
536-
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
537-
#endif
538-
#ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
539-
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
540-
#endif
541-
#ifndef GPUCA_ALTERNATE_BORDER_SORT
542-
#define GPUCA_ALTERNATE_BORDER_SORT 0
543-
#endif
544-
#ifndef GPUCA_SORT_BEFORE_FIT
545-
#define GPUCA_SORT_BEFORE_FIT 0
546-
#endif
547-
#ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION
548-
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
549-
#endif
550-
#ifndef GPUCA_COMP_GATHER_KERNEL
551-
#define GPUCA_COMP_GATHER_KERNEL 0
552-
#endif
553-
#ifndef GPUCA_COMP_GATHER_MODE
554-
#define GPUCA_COMP_GATHER_MODE 2
555-
#endif
556-
#else
557-
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0
558-
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0
559-
#define GPUCA_ALTERNATE_BORDER_SORT 0
560-
#define GPUCA_SORT_BEFORE_FIT 0
561-
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
562-
#define GPUCA_THREAD_COUNT_FINDER 1
563-
#define GPUCA_COMP_GATHER_KERNEL 0
564-
#define GPUCA_COMP_GATHER_MODE 0
565-
#endif
566-
#ifndef GPUCA_DEDX_STORAGE_TYPE
567-
#define GPUCA_DEDX_STORAGE_TYPE float
568-
#endif
569-
#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
570-
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
571-
#endif
572-
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE)
573-
#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE)
574-
575-
#ifndef GPUCA_WARP_SIZE
576-
#ifdef GPUCA_GPUCODE
577-
#define GPUCA_WARP_SIZE 32
578-
#else
579-
#define GPUCA_WARP_SIZE 1
580-
#endif
581-
#endif
582-
583-
#define GPUCA_MAX_THREADS 1024
584-
#define GPUCA_MAX_STREAMS 36
585-
586-
#define GPUCA_SORT_STARTHITS_GPU // Sort the start hits when running on GPU
587-
#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid
588-
#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers
589-
#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks
590-
591-
// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling
592-
593-
// Default maximum numbers
594-
#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters
595-
#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets
596-
#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit
597-
#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device
598-
#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host
599-
#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread
600-
#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread
601-
602-
// #define GPUCA_KERNEL_DEBUGGER_OUTPUT
603-
604-
// Some assertions to make sure the parameters are not invalid
605-
#if defined(GPUCA_GPUCODE)
606-
static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP");
607-
static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE");
608-
static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE");
609-
#endif
610-
611-
// Derived parameters
612-
#ifdef GPUCA_USE_TEXTURES
613-
#define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache
614-
#endif
615-
#if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE)
616-
#define GPUCA_SORT_STARTHITS
617511
#endif
618512

619513
// clang-format on
620-
#endif // GPUDEFPARAMETERSDEFAULT_H
514+
#endif // GPUDEFPARAMETERSDEFAULTS_H

GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#define GPUCA_M_LB_EMPTY_1(...) __VA_ARGS__
1919
#define GPUCA_M_LB_EMPTY0(...) GPUCA_M_CAT(GPUCA_M_LB_EMPTY_, __VA_OPT__(1))(__VA_ARGS__)
2020

21-
#include "GPUDefParameters.h"
21+
#include "GPUDefParametersRuntime.h"
2222
#include "GPUDefMacros.h"
2323
#include <sstream>
2424
#include <string>

GPU/GPUTracking/Definitions/GPUDefParameters.template.h renamed to GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
// granted to it by virtue of its status as an Intergovernmental Organization
1010
// or submit itself to any jurisdiction.
1111

12-
/// \file GPUDefParameters.h
12+
/// \file GPUDefParametersRuntime.h
1313
/// \author David Rohr
1414

15-
#ifndef GPUDEFPARAMETERS_H
16-
#define GPUDEFPARAMETERS_H
15+
#ifndef GPUDEFPARAMETERSRUNTIME_H
16+
#define GPUDEFPARAMETERSRUNTIME_H
1717

1818
namespace o2::gpu
1919
{
@@ -24,4 +24,4 @@ struct GPUDefParameters { // clang-format off
2424
}; // clang-format on
2525
} // namespace o2::gpu
2626

27-
#endif
27+
#endif // GPUDEFPARAMETERSRUNTIME_H

0 commit comments

Comments
 (0)