Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Detectors/TPC/qc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ o2_add_library(TPCQC
src/SACs.cxx
src/IDCsVsSACs.cxx
src/TrackClusters.cxx
src/GPUErrorQA.cxx
PUBLIC_LINK_LIBRARIES O2::TPCBase
O2::DataFormatsTPC
O2::GPUO2Interface
Expand All @@ -36,7 +37,8 @@ o2_target_root_dictionary(TPCQC
include/TPCQC/DCSPTemperature.h
include/TPCQC/SACs.h
include/TPCQC/IDCsVsSACs.h
include/TPCQC/TrackClusters.h)
include/TPCQC/TrackClusters.h
include/TPCQC/GPUErrorQA.h)

o2_add_test(PID
COMPONENT_NAME tpc
Expand Down
69 changes: 69 additions & 0 deletions Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

///
/// @file GPUErrorQA.h
/// @author Anton Riedel, anton.riedel@cern.ch
///

#ifndef AliceO2_TPC_QC_GPUERRORQA_H
#define AliceO2_TPC_QC_GPUERRORQA_H

#include <memory>
#include <string>
#include <vector>
#include <unordered_map>

// root includes

// o2 includes
// #include "DataFormatsTPC/Defs.h"

class TH1;
namespace o2::tpc::qc
{

/// @brief TPC QC task for errors from GPU reconstruction
///
/// This class is used to retrieve and visualize GPU errors
/// according to corresponding error code and location.
///
/// origin: TPC
/// @author Anton Riedel, anton.riedel@cern.ch
class GPUErrorQA
{
public:
/// \brief Constructor.
GPUErrorQA() = default;

/// process gpu error reported by the reconstruction workflow
void processErrors(std::vector<std::array<uint32_t, 4>> errors);

/// Initialize all histograms
void initializeHistograms();

/// Reset all histograms
void resetHistograms();

/// return histograms
const std::unordered_map<std::string, std::unique_ptr<TH1>>& getMapHist() const { return mMapHist; }

/// Dump results to a file
void dumpToFile(std::string filename);

private:
std::unordered_map<std::string, std::unique_ptr<TH1>> mMapHist;

ClassDefNV(GPUErrorQA, 2);
};
} // namespace o2::tpc::qc

#endif // AliceO2_TPC_QC_GPUERRORQA_H
80 changes: 80 additions & 0 deletions Detectors/TPC/qc/src/GPUErrorQA.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright 2019-2025 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

#define _USE_MATH_DEFINES

// root includes
#include "TFile.h"
#include "TH1I.h"

// o2 includes
#include "TPCQC/GPUErrorQA.h"
#include "GPUErrors.h"

ClassImp(o2::tpc::qc::GPUErrorQA);

using namespace o2::tpc::qc;

//______________________________________________________________________________
void GPUErrorQA::initializeHistograms()
{
TH1::AddDirectory(false);

auto const& errorNames = o2::gpu::GPUErrors::getErrorNames();

int maxErrorCode = 1;
for (const auto& [key, _] : errorNames) {
if (static_cast<int>(key) > maxErrorCode) {
maxErrorCode = key;
}
}

// 1D histogram counting all reported errors
mMapHist["ErrorCounter"] = std::make_unique<TH1I>("ErrorCounter", "ErrorCounter", maxErrorCode, -0.5, maxErrorCode - 0.5);
mMapHist["ErrorCounter"]->GetXaxis()->SetTitle("Error Codes");
mMapHist["ErrorCounter"]->GetYaxis()->SetTitle("Entries");
// for convienence, label each bin with the error name
for (size_t bin = 1; bin <= maxErrorCode; bin++) {
auto const& it = errorNames.find(bin);
if (it != errorNames.end()) {
mMapHist["ErrorCounter"]->GetXaxis()->SetBinLabel(bin, it->second);
} else {
mMapHist["ErrorCounter"]->GetXaxis()->SetBinLabel(bin, "NO_DEF");
}
}
}
//______________________________________________________________________________
void GPUErrorQA::resetHistograms()
{
for (const auto& pair : mMapHist) {
pair.second->Reset();
}
}
//______________________________________________________________________________
void GPUErrorQA::processErrors(std::vector<std::array<uint32_t, 4>> errors)
{
for (const auto& error : errors) {
uint32_t errorCode = error[0];
mMapHist["ErrorCounter"]->AddBinContent(errorCode);
}
}

//______________________________________________________________________________
void GPUErrorQA::dumpToFile(const std::string filename)
{
auto f = std::unique_ptr<TFile>(TFile::Open(filename.data(), "recreate"));
TObjArray arr;
arr.SetName("GPUErrorQA_Hists");
for ([[maybe_unused]] const auto& [name, hist] : mMapHist) {
arr.Add(hist.get());
}
arr.Write(arr.GetName(), TObject::kSingleKey);
}
1 change: 1 addition & 0 deletions Detectors/TPC/qc/src/TPCQCLinkDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#pragma link C++ class o2::tpc::qc::SACs + ;
#pragma link C++ class o2::tpc::qc::IDCsVsSACs + ;
#pragma link C++ class o2::tpc::qc::TrackClusters + ;
#pragma link C++ class o2::tpc::qc::GPUErrorQA + ;
#pragma link C++ function o2::tpc::qc::helpers::makeLogBinning + ;
#pragma link C++ function o2::tpc::qc::helpers::setStyleHistogram1D + ;
#pragma link C++ function o2::tpc::qc::helpers::setStyleHistogram2D + ;
Expand Down
9 changes: 7 additions & 2 deletions GPU/GPUTracking/Global/GPUErrors.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,20 @@ void GPUErrors::clear()
memset(mErrors, 0, GPUCA_MAX_ERRORS * sizeof(*mErrors));
}

static std::unordered_map<uint32_t, const char*> errorNames = {
const std::unordered_map<uint32_t, const char*>& GPUErrors::getErrorNames()
{
static std::unordered_map<uint32_t, const char*> errorNames = {
#define GPUCA_ERROR_CODE(num, name, ...) {num, GPUCA_M_STR(name)},
#include "GPUErrorCodes.h"
#undef GPUCA_ERROR_CODE
};
};
return errorNames;
}

bool GPUErrors::printErrors(bool silent, uint64_t mask)
{
bool retVal = 0;
const auto& errorNames = getErrorNames();
for (uint32_t i = 0; i < std::min(*mErrors, GPUCA_MAX_ERRORS); i++) {
uint32_t errorCode = mErrors[4 * i + 1];
const auto& it = errorNames.find(errorCode);
Expand Down
6 changes: 6 additions & 0 deletions GPU/GPUTracking/Global/GPUErrors.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#define GPUERRORS_H

#include "GPUCommonDef.h"
#ifndef GPUCA_GPUCODE
#include <unordered_map>
#endif

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still, you need to protect unordered_map with #ifndeg GPUCA_GPUCODE_DEVICE.
We do not include any std headers in GPU kernel code.
And I think you don't need GPUDefMacros.h, do you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I also fixed up the includes.

namespace o2::gpu
{
Expand All @@ -34,6 +37,9 @@ class GPUErrors
void setMemory(GPUglobalref() uint32_t* m) { mErrors = m; }
void clear();
bool printErrors(bool silent = false, uint64_t mask = 0);
#ifndef GPUCA_GPUCODE
static const std::unordered_map<uint32_t, const char*>& getErrorNames();
#endif
uint32_t getNErrors() const;
const uint32_t* getErrorPtr() const;
static uint32_t getMaxErrors();
Expand Down