Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions GPU/Common/GPUCommonDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@
#define GPUCA_RTC_SPECIAL_CODE(...)
#endif

#ifndef GPUCA_RTC_CONSTEXPR
#define GPUCA_RTC_CONSTEXPR
#endif

#ifndef GPUCA_DETERMINISTIC_CODE
#ifdef GPUCA_DETERMINISTIC_MODE
#define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path
Expand Down
5 changes: 5 additions & 0 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
{
std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") +
std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") +
#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang
std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") +
#else
std::string("#define GPUCA_RTC_CONSTEXPR\n") +
#endif
GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr);
if (filename == "") {
filename = "/tmp/o2cagpu_rtc_";
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
// Keep some preprocessor calls unprocessed
#define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__)
#define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__)
#define GPUCA_RTC_CONSTEXPR GPUCA_RTC_CONSTEXPR

// GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running
#include "GPUReconstructionCUDADef.h"
Expand Down
5 changes: 5 additions & 0 deletions GPU/GPUTracking/Base/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,8 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe
if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}")
add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED)
endif()

set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx"
TARGET_DIRECTORY O2::GPUTrackingHIP
PROPERTIES
COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR")
5 changes: 3 additions & 2 deletions GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ set(MODULE GPUTracking)

if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2})
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2")
else()
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math")
endif()
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math")
endif()
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}")

Expand Down
4 changes: 3 additions & 1 deletion GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem)
computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks);
if (mRec->GetParam().dodEdxEnabled) {
computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks);
computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks);
if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) {
computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks);
}
}
computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters);
if (mRec->GetParam().par.earlyTpcTransform) {
Expand Down
17 changes: 12 additions & 5 deletions GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ GPUdii() void GPUTPCGMO2Output::Thread<GPUTPCGMO2Output::output>(int32_t nBlocks
const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec);
TrackTPC* outputTracks = merger.OutputTracksTPCO2();
uint32_t* clusRefs = merger.OutputClusRefsTPCO2();
const auto& param = merger.Param();

GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2();
uint2* GPUrestrict() tmpData = merger.ClusRefTmp();
Expand All @@ -130,9 +131,15 @@ GPUdii() void GPUTPCGMO2Output::Thread<GPUTPCGMO2Output::output>(int32_t nBlocks

oTrack.setChi2(tracks[i].GetParam().GetChi2());
auto& outerPar = tracks[i].OuterParam();
if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled) {
oTrack.setdEdx(tracksdEdx[i]);
oTrack.setdEdxAlt(tracksdEdxAlt[i]);
if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) {
if (param.dodEdxEnabled) {
oTrack.setdEdx(tracksdEdx[i]);
if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) {
oTrack.setdEdxAlt(tracksdEdxAlt[i]);
} else {
oTrack.setdEdxAlt(tracksdEdx[i]);
}
}
}

auto snpOut = outerPar.P[2];
Expand All @@ -148,9 +155,9 @@ GPUdii() void GPUTPCGMO2Output::Thread<GPUTPCGMO2Output::output>(int32_t nBlocks
outerPar.C[6], outerPar.C[7], outerPar.C[8], outerPar.C[9], outerPar.C[10], outerPar.C[11],
outerPar.C[12], outerPar.C[13], outerPar.C[14]}));

if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.enablePID) {
if (param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.enablePID) {
PIDResponse pidResponse{};
auto pid = pidResponse.getMostProbablePID(oTrack, merger.Param().rec.tpc.PID_EKrangeMin, merger.Param().rec.tpc.PID_EKrangeMax, merger.Param().rec.tpc.PID_EPrangeMin, merger.Param().rec.tpc.PID_EPrangeMax, merger.Param().rec.tpc.PID_EDrangeMin, merger.Param().rec.tpc.PID_EDrangeMax, merger.Param().rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma);
auto pid = pidResponse.getMostProbablePID(oTrack, param.rec.tpc.PID_EKrangeMin, param.rec.tpc.PID_EKrangeMax, param.rec.tpc.PID_EPrangeMin, param.rec.tpc.PID_EPrangeMax, param.rec.tpc.PID_EDrangeMin, param.rec.tpc.PID_EDrangeMax, param.rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma);
auto pidRemap = merger.Param().rec.tpc.PID_remap[pid];
if (pidRemap >= 0) {
pid = pidRemap;
Expand Down
70 changes: 40 additions & 30 deletions GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,15 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
continue;
}
} else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) {
bool dodEdx = param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg;
dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx);
if (dodEdx) {
dEdx.fillSubThreshold(lastRow - wayDirection);
dEdxAlt.fillSubThreshold(lastRow - wayDirection);
if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) {
bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg;
dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx);
if (dodEdx) {
dEdx.fillSubThreshold(lastRow - wayDirection);
if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) {
dEdxAlt.fillSubThreshold(lastRow - wayDirection);
}
}
}
}

Expand Down Expand Up @@ -367,31 +371,35 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
CADEBUG(printf("Reinit linearization\n"));
prop.SetTrack(this, prop.GetAlpha());
}
if (param.par.dodEdx && param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters
bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0;
if (acc || accAlt) {
float qtot = 0, qmax = 0, pad = 0, relTime = 0;
const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1;
for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) {
if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) {
qtot += clustersXYZ[ihit].amp;
} else {
const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num];
qtot += cl.qTot;
qmax = CAMath::Max<float>(qmax, cl.qMax);
pad += cl.getPad();
relTime += cl.getTime();
if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) {
if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters
bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0;
if (acc || accAlt) {
float qtot = 0, qmax = 0, pad = 0, relTime = 0;
const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1;
for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) {
if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) {
qtot += clustersXYZ[ihit].amp;
} else {
const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num];
qtot += cl.qTot;
qmax = CAMath::Max<float>(qmax, cl.qMax);
pad += cl.getPad();
relTime += cl.getTime();
}
}
qtot /= clusterCount; // TODO: Weighted Average
pad /= clusterCount;
relTime /= clusterCount;
relTime = relTime - CAMath::Round(relTime);
if (acc) {
dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime);
}
if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) {
if (accAlt) {
dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime);
}
}
}
qtot /= clusterCount; // TODO: Weighted Average
pad /= clusterCount;
relTime /= clusterCount;
relTime = relTime - CAMath::Round(relTime);
if (acc) {
dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime);
}
if (accAlt) {
dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime);
}
}
}
Expand Down Expand Up @@ -428,7 +436,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_

if (param.par.dodEdx && param.dodEdxEnabled) {
dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param);
dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param);
if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) {
dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param);
}
}
Alpha = prop.GetAlpha();
MoveToReference(prop, param, Alpha);
Expand Down