Skip to content

Commit ed323ec

Browse files
committed
Adjust for comments
1 parent 24bf104 commit ed323ec

File tree

12 files changed

+288
-287
lines changed

12 files changed

+288
-287
lines changed

Common/ML/include/ML/3rdparty/GPUORTFloat16.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#endif
1717

1818
#include "GPUCommonDef.h"
19+
#include "GPUCommonMath.h"
1920

2021
namespace o2
2122
{
@@ -530,11 +531,14 @@ template <class Derived>
530531
GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
531532
{
532533
uint16_t result;
533-
if (std::isnan(v)) {
534+
if (o2::gpu::CAMath::IsNaN(v)) {
534535
result = kPositiveQNaNBits;
535536
} else {
536537
auto get_msb_half = [](float fl) {
537538
uint16_t result;
539+
#ifdef GPUCA_GPUCODE
540+
result = 0;
541+
#else
538542
#ifdef __cpp_if_constexpr
539543
if constexpr (detail::endian::native == detail::endian::little)
540544
#else
@@ -557,6 +561,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
557561
U32 += (upper_bits & 1) + kRoundToNearest;
558562
result = get_msb_half(F32);
559563
}
564+
#endif
560565
return result;
561566
}
562567

@@ -567,6 +572,9 @@ GPUd() inline float BFloat16Impl<Derived>::ToFloatImpl() const noexcept
567572
return std::numeric_limits<float>::quiet_NaN();
568573
}
569574
float result;
575+
#ifdef GPUCA_GPUCODE
576+
result = 0; // Fixme: implement memcpy
577+
#else
570578
char* const first = reinterpret_cast<char*>(&result);
571579
char* const second = first + sizeof(uint16_t);
572580
#ifdef __cpp_if_constexpr
@@ -581,6 +589,7 @@ GPUd() inline float BFloat16Impl<Derived>::ToFloatImpl() const noexcept
581589
std::memcpy(first, &val, sizeof(uint16_t));
582590
std::memset(second, 0, sizeof(uint16_t));
583591
}
592+
#endif
584593
return result;
585594
}
586595

@@ -872,5 +881,4 @@ static_assert(sizeof(BFloat16_t) == sizeof(uint16_t), "Sizes must match");
872881
} // namespace OrtDataType
873882

874883
} // namespace o2
875-
876884
#endif

GPU/GPUTracking/CMakeLists.txt

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -276,15 +276,11 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
276276
O2::GPUCommon
277277
O2::ReconstructionDataFormats
278278
O2::TPCFastTransformation
279+
O2::ML
279280
PRIVATE_LINK_LIBRARIES O2::DataFormatsTPC
280281
SOURCES ${SRCS_DATATYPES})
281-
if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone")
282-
add_compile_definitions(GPUCA_HAS_ONNX=1)
283-
target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX)
284-
target_link_libraries(${targetName} PUBLIC O2::ML)
285-
else()
286-
target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2)
287-
endif()
282+
add_compile_definitions(GPUCA_HAS_ONNX=1)
283+
target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX)
288284

289285
o2_target_root_dictionary(GPUDataTypes
290286
HEADERS ${HDRS_CINT_DATATYPES} ${HDRS_CINT_O2_ADDITIONAL}
@@ -350,7 +346,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
350346
LABELS its COMPILE_ONLY)
351347

352348
add_subdirectory(Interface)
353-
354349
endif()
355350

356351
# Main CMake part for Standalone
@@ -422,4 +417,4 @@ endif()
422417

423418
if(${GPUCA_NO_FAST_MATH})
424419
target_compile_definitions(${targetName} PUBLIC GPUCA_NO_FAST_MATH)
425-
endif()
420+
endif()

GPU/GPUTracking/Definitions/GPUDefGPUParameters.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@
8181
#define GPUCA_LB_GPUTPCCFNoiseSuppression 512
8282
#define GPUCA_LB_GPUTPCCFDeconvolution 512
8383
#define GPUCA_LB_GPUTPCCFClusterizer 448
84-
#define GPUCA_LB_GPUTPCNNClusterizerKernels 448
8584
#define GPUCA_LB_COMPRESSION_GATHER 1024
8685
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
8786
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
@@ -148,7 +147,6 @@
148147
#define GPUCA_LB_GPUTPCCFNoiseSuppression 512
149148
#define GPUCA_LB_GPUTPCCFDeconvolution 512
150149
#define GPUCA_LB_GPUTPCCFClusterizer 512
151-
#define GPUCA_LB_GPUTPCNNClusterizerKernels 512
152150
#define GPUCA_LB_COMPRESSION_GATHER 1024
153151
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
154152
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
@@ -215,7 +213,6 @@
215213
#define GPUCA_LB_GPUTPCCFNoiseSuppression 448
216214
#define GPUCA_LB_GPUTPCCFDeconvolution 384
217215
#define GPUCA_LB_GPUTPCCFClusterizer 448
218-
#define GPUCA_LB_GPUTPCNNClusterizerKernels 448
219216
#define GPUCA_LB_COMPRESSION_GATHER 1024
220217
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
221218
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
858858
mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) {
859859
uint32_t iSector = iSectorBase + lane;
860860
GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
861+
#ifdef GPUCA_HAS_ONNX
861862
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
863+
#endif
862864
GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
863865

864866
if (doGPU) {
@@ -929,18 +931,18 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
929931

930932
auto stop0 = std::chrono::high_resolution_clock::now();
931933
auto start1 = std::chrono::high_resolution_clock::now();
932-
nnApplication.inferenceNetworkClass(clustererNN, iSize, evalDtype, batchStart);
934+
nnApplication.inferenceNetwork(clustererNN.model_class, clustererNN, iSize, clusterer.modelProbabilities);
933935
if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) {
934936
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, iSector, evalDtype, 0, batchStart); // Assigning class labels
935937
} else {
936938
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, iSector, evalDtype, 0, batchStart); // Assigning class labels
937939
}
938940

939941
if (!clustererNN.nnClusterizerUseCfRegression) {
940-
nnApplication.inferenceNetworkReg1(clustererNN, iSize, evalDtype, batchStart);
942+
nnApplication.inferenceNetwork(clustererNN.model_reg_1, clustererNN, iSize, clusterer.outputDataReg1);
941943
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, iSector, evalDtype, 0, batchStart); // Running the NN for regression class 1
942944
if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.reg_model_paths.size() > 1) {
943-
nnApplication.inferenceNetworkReg2(clustererNN, iSize, evalDtype, batchStart);
945+
nnApplication.inferenceNetwork(clustererNN.model_reg_2, clustererNN, iSize, clusterer.outputDataReg2);
944946
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, iSector, evalDtype, 0, batchStart); // Running the NN for regression class 2
945947
}
946948
}
@@ -1168,4 +1170,4 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11681170

11691171
#endif
11701172
return 0;
1171-
}
1173+
}

0 commit comments

Comments
 (0)