|
65 | 65 | #include <sys/types.h> |
66 | 66 | #include <sys/stat.h> |
67 | 67 | #include <fcntl.h> |
| 68 | +#include <chrono> |
68 | 69 | #include "GPUReconstructionConvert.h" |
69 | 70 | #include "DetectorsRaw/RDHUtils.h" |
70 | 71 | #include <TStopwatch.h> |
@@ -519,16 +520,25 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config |
519 | 520 | using outputBufferType = std::pair<std::optional<std::reference_wrapper<O2CharVectorOutputType>>, char*>; |
520 | 521 | std::vector<outputBufferType> outputBuffers(GPUInterfaceOutputs::count(), {std::nullopt, nullptr}); |
521 | 522 |
|
522 | | - auto setOutputAllocator = [&specconfig, &outputBuffers, &outputRegions, &processAttributes, &pc, verbosity](bool condition, GPUOutputControl& region, auto&& outputSpec, size_t offset = 0) { |
| 523 | + auto setOutputAllocator = [&specconfig, &outputBuffers, &outputRegions, &processAttributes, &pc, verbosity](const char* name, bool condition, GPUOutputControl& region, auto&& outputSpec, size_t offset = 0) { |
523 | 524 | if (condition) { |
524 | 525 | auto& buffer = outputBuffers[outputRegions.getIndex(region)]; |
525 | 526 | if (processAttributes->allocateOutputOnTheFly) { |
526 | | - region.allocator = [&buffer, &pc, outputSpec = std::move(outputSpec), verbosity, offset](size_t size) -> void* { |
| 527 | + region.allocator = [name, &buffer, &pc, outputSpec = std::move(outputSpec), debug = processAttributes->config->configProcessing.debugLevel, verbosity, offset](size_t size) -> void* { |
527 | 528 | size += offset; |
528 | 529 | if (verbosity) { |
529 | 530 | LOG(INFO) << "ALLOCATING " << size << " bytes for " << std::get<DataOrigin>(outputSpec).template as<std::string>() << "/" << std::get<DataDescription>(outputSpec).template as<std::string>() << "/" << std::get<2>(outputSpec); |
530 | 531 | } |
| 532 | + std::chrono::time_point<std::chrono::high_resolution_clock> start, end; |
| 533 | + if (debug) { |
| 534 | + start = std::chrono::high_resolution_clock::now(); |
| 535 | + } |
531 | 536 | buffer.first.emplace(pc.outputs().make<std::vector<char>>(std::make_from_tuple<Output>(outputSpec), size)); |
| 537 | + if (debug) { |
| 538 | + end = std::chrono::high_resolution_clock::now(); |
| 539 | + std::chrono::duration<double> elapsed_seconds = end - start; |
| 540 | + LOG(INFO) << "Allocation time for " << name << " (" << size << "bytes)" << ": " << elapsed_seconds.count() << "s"; |
| 541 | + } |
532 | 542 | return (buffer.second = buffer.first->get().data()) + offset; |
533 | 543 | }; |
534 | 544 | } else { |
@@ -568,12 +578,12 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config |
568 | 578 | downSizeBuffer(buffer, span.size() * sizeof(*span.data())); |
569 | 579 | }; |
570 | 580 |
|
571 | | - setOutputAllocator(specconfig.outputCompClustersFlat, outputRegions.compressedClusters, std::make_tuple(gDataOriginTPC, (DataDescription) "COMPCLUSTERSFLAT", 0)); |
572 | | - setOutputAllocator(processAttributes->clusterOutputIds.size() > 0, outputRegions.clustersNative, std::make_tuple(gDataOriginTPC, specconfig.sendClustersPerSector ? (DataDescription) "CLUSTERNATIVETMP" : (DataDescription) "CLUSTERNATIVE", NSectors, Lifetime::Timeframe, clusterOutputSectorHeader), sizeof(ClusterCountIndex)); |
573 | | - setOutputAllocator(specconfig.outputSharedClusterMap, outputRegions.sharedClusterMap, std::make_tuple(gDataOriginTPC, (DataDescription) "CLSHAREDMAP", 0)); |
574 | | - setOutputAllocator(specconfig.outputTracks, outputRegions.tpcTracksO2, std::make_tuple(gDataOriginTPC, (DataDescription) "TRACKS", 0)); |
575 | | - setOutputAllocator(specconfig.outputTracks, outputRegions.tpcTracksO2ClusRefs, std::make_tuple(gDataOriginTPC, (DataDescription) "CLUSREFS", 0)); |
576 | | - setOutputAllocator(specconfig.outputTracks && specconfig.processMC, outputRegions.tpcTracksO2Labels, std::make_tuple(gDataOriginTPC, (DataDescription) "TRACKSMCLBL", 0)); |
| 581 | + setOutputAllocator("COMPCLUSTERSFLAT", specconfig.outputCompClustersFlat, outputRegions.compressedClusters, std::make_tuple(gDataOriginTPC, (DataDescription) "COMPCLUSTERSFLAT", 0)); |
| 582 | + setOutputAllocator("CLUSTERNATIVE", processAttributes->clusterOutputIds.size() > 0, outputRegions.clustersNative, std::make_tuple(gDataOriginTPC, specconfig.sendClustersPerSector ? (DataDescription) "CLUSTERNATIVETMP" : (DataDescription) "CLUSTERNATIVE", NSectors, Lifetime::Timeframe, clusterOutputSectorHeader), sizeof(ClusterCountIndex)); |
| 583 | + setOutputAllocator("CLSHAREDMAP", specconfig.outputSharedClusterMap, outputRegions.sharedClusterMap, std::make_tuple(gDataOriginTPC, (DataDescription) "CLSHAREDMAP", 0)); |
| 584 | + setOutputAllocator("TRACKS", specconfig.outputTracks, outputRegions.tpcTracksO2, std::make_tuple(gDataOriginTPC, (DataDescription) "TRACKS", 0)); |
| 585 | + setOutputAllocator("CLUSREFS", specconfig.outputTracks, outputRegions.tpcTracksO2ClusRefs, std::make_tuple(gDataOriginTPC, (DataDescription) "CLUSREFS", 0)); |
| 586 | + setOutputAllocator("TRACKSMCLBL", specconfig.outputTracks && specconfig.processMC, outputRegions.tpcTracksO2Labels, std::make_tuple(gDataOriginTPC, (DataDescription) "TRACKSMCLBL", 0)); |
577 | 587 | if (specconfig.processMC && specconfig.caClusterer) { |
578 | 588 | outputRegions.clusterLabels.allocator = [&clustersMCBuffer](size_t size) -> void* { return &clustersMCBuffer; }; |
579 | 589 | } |
|
0 commit comments