Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Common/MathUtils/include/MathUtils/detail/basicMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,11 @@ GPUdi() int nint(double x)
template <>
GPUdi() bool finite(double x)
{
#ifdef __FAST_MATH__
return false;
#else
return std::isfinite(x);
#endif
}
template <>
GPUdi() double log(double x)
Expand Down
22 changes: 15 additions & 7 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,12 @@ int32_t GPUReconstruction::Init()
if (InitDevice()) {
return 1;
}
mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize;
mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize;
if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize;
mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize;
} else {
mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr;
}
if (InitPhasePermanentMemory()) {
return 1;
}
Expand Down Expand Up @@ -860,14 +864,18 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs)
FreeRegisteredMemory(i);
}
}
mHostMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mHostMemoryPermanent);
mDeviceMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mDeviceMemoryPermanent);
mUnmanagedChunks.clear();
mVolatileMemoryStart = nullptr;
mNonPersistentMemoryStack.clear();
mNonPersistentIndividualAllocations.clear();
mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize);
mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize);
mVolatileMemoryStart = nullptr;
if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
mHostMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mHostMemoryPermanent);
mDeviceMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mDeviceMemoryPermanent);
mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize);
mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize);
} else {
mHostMemoryPool = mDeviceMemoryPool = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr;
}
}

void GPUReconstruction::UpdateMaxMemoryUsed()
Expand Down
5 changes: 5 additions & 0 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,12 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean16,
template <class T, int32_t I, typename... Args>
void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs<T, I, Args...>& args)
{
#pragma GCC diagnostic push
#if defined(__clang__)
#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below
#endif
std::apply([this, &args](auto&... vals) { runKernelBackendInternal<T, I, Args...>(args.s, vals...); }, args.v);
#pragma GCC diagnostic push
}

template <class T, int32_t I>
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) {
uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap;
occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128));
mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap);
mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, doGPU && param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap);
}

int32_t streamMap[NSECTORS];
Expand Down
11 changes: 6 additions & 5 deletions GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1706,20 +1706,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
nHits = nFilteredHits;
}

uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits);
const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits);
if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) {
raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters);
CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters);
continue;
}

GPUTPCGMMergedTrackHit* cl = mClusters + iOutTrackFirstCluster;
GPUTPCGMMergedTrackHitXYZ* clXYZ = mClustersXYZ + iOutTrackFirstCluster;
GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster;

for (int32_t i = 0; i < nHits; i++) {
uint8_t state;
if (Param().par.earlyTpcTransform) {
const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()];
GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster;
clXYZ[i].x = c.x;
clXYZ[i].y = c.y;
clXYZ[i].z = c.z;
Expand Down Expand Up @@ -1760,7 +1760,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
mergedTrack.SetCSide(p2.CSide());

GPUTPCGMBorderTrack b;
const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : GPUTPCGeometry::Row2X(cl[0].row);
const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row);
if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) {
p1.X() = toX;
p1.Y() = b.Par()[0];
Expand Down Expand Up @@ -1791,12 +1791,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
if (Param().rec.tpc.mergeCE) {
bool CEside;
if (Param().par.earlyTpcTransform) {
const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster;
CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z);
} else {
auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear;
CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime();
}
MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], &clXYZ[CEside ? (nHits - 1) : 0], iOutputTrack);
MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack);
}
} // itr
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1091,7 +1091,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr
GPUTPCGMTrackParam t = track.Param();
float Alpha = track.Alpha();
CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt());
bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->ClustersXYZ() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam());
bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->Param().par.earlyTpcTransform ? merger->ClustersXYZ() + track.FirstClusterRef() : nullptr, nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam());
CADEBUG(printf("Finished Fit Track %d\n", iTrk));
CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits)));

Expand Down
18 changes: 14 additions & 4 deletions GPU/GPUTracking/Standalone/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)

if(GPUCA_BUILD_DEBUG)
set(CMAKE_CXX_FLAGS "-O0 -ggdb")
if (GPUCA_BUILD_DEBUG_SANITIZE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang
endif()
set(CMAKE_BUILD_TYPE DEBUG)
else()
set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb -minline-all-stringops -funroll-loops -fno-stack-protector")
Expand All @@ -67,7 +64,13 @@ else()
set(CMAKE_BUILD_TYPE RELEASE)
add_definitions(-DNDEBUG)
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings")
if (GPUCA_BUILD_DEBUG_SANITIZE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr")
if(CMAKE_CXX_COMPILER MATCHES "clang\\+\\+")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan")
endif()
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined")

# Find mandatory packages
Expand Down Expand Up @@ -247,6 +250,13 @@ if(GPUCA_CONFIG_ROOT)
endif()
target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation)

if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+")
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" DIRECTORY)
get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" ABSOLUTE)
target_link_options(ca PRIVATE "-Wl,-rpath,${CLANG_ASAN_SO_PATH}")
endif()

# Installation
install(TARGETS ca TPCFastTransformation standalone_support)
install(FILES "cmake/makefile" DESTINATION "${CMAKE_INSTALL_PREFIX}")
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/display/GPUDisplay.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ class GPUDisplay : public GPUDisplayInterface
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-security"
#pragma GCC diagnostic ignored "-Wformat-truncation"
snprintf(mInfoText2, 1024, args...);
#pragma GCC diagnostic pop
GPUInfo("%s", mInfoText2);
Expand Down