Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ find_package(absl REQUIRED)

# libdatadog_profiling
include(Findlibdatadog)
# watcher_sample_types.hpp includes <datadog/common.h>, so all targets need this.
include_directories(${Datadog_INCLUDE_DIR})

# Event Parser
add_subdirectory(src/event_parser)
Expand Down
21 changes: 15 additions & 6 deletions cmake/Findlibdatadog.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,24 @@

# libdatadog : common profiler imported libraries https://github.com/DataDog/libdatadog/releases
set(TAG_LIBDATADOG
"v26.0.0"
"v29.0.0"
CACHE STRING "libdatadog github tag")

set(Datadog_ROOT ${VENDOR_PATH}/libdatadog-${TAG_LIBDATADOG})
# Override with a local build by passing -DDatadog_LOCAL_ROOT=/path/to/libdatadog
set(Datadog_LOCAL_ROOT
""
CACHE PATH "Path to a local libdatadog build (skips GitHub download)")

message(STATUS "${CMAKE_SOURCE_DIR}/tools/fetch_libddprof.sh ${TAG_LIBDATADOG} ${LIBDATADOG_ROOT}")
execute_process(
COMMAND "${CMAKE_SOURCE_DIR}/tools/fetch_libdatadog.sh" ${TAG_LIBDATADOG} ${Datadog_ROOT}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
if(Datadog_LOCAL_ROOT)
message(STATUS "Using local libdatadog override: ${Datadog_LOCAL_ROOT}")
set(Datadog_ROOT ${Datadog_LOCAL_ROOT})
else()
set(Datadog_ROOT ${VENDOR_PATH}/libdatadog-${TAG_LIBDATADOG})
message(STATUS "${CMAKE_SOURCE_DIR}/tools/fetch_libdatadog.sh ${TAG_LIBDATADOG} ${Datadog_ROOT}")
execute_process(
COMMAND "${CMAKE_SOURCE_DIR}/tools/fetch_libdatadog.sh" ${TAG_LIBDATADOG} ${Datadog_ROOT}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
endif()

set(DataDog_DIR "${Datadog_ROOT}/cmake")

Expand Down
91 changes: 32 additions & 59 deletions include/perf_watcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

#include "ddprof_defs.hpp"
#include "event_config.hpp"
#include <string>
#include "watcher_sample_types.hpp"

#include <cstdint>
#include <linux/perf_event.h>
#include <string>

namespace ddprof {

Expand Down Expand Up @@ -39,6 +40,10 @@ struct PerfWatcher {
uint64_t sample_type; // perf sample type: specifies values included in sample
unsigned long config; // specifies which perf event is requested
double value_scale;
union {
int64_t sample_period;
uint64_t sample_frequency;
};
std::string desc;

// tracepoint configuration
Expand All @@ -51,20 +56,16 @@ struct PerfWatcher {
int type; // perf event type (software / hardware / tracepoint / ... or custom
// for non-perf events)

union {
int64_t sample_period;
uint64_t sample_frequency;
};
int sample_type_id; // index into the sample types defined in this header

EventConfValueSource value_source; // how to normalize the sample value
EventAggregationMode aggregation_mode;

// perf_event_open configs
struct PerfWatcherOptions options;

WatcherSampleTypes sample_type_info; // pprof types for each aggregation mode

PProfIndices pprof_indices[kNbEventAggregationModes]; // std and live

EventConfValueSource value_source; // how to normalize the sample value
EventAggregationMode aggregation_mode;

uint8_t regno;
uint8_t raw_off;
uint8_t raw_sz;
Expand All @@ -76,27 +77,6 @@ struct PerfWatcher {
bool instrument_self; // do my own perf_event_open, etc
};

// The Datadog backend only understands pre-configured event types. Those
// types are defined here, and then referenced in the watcher
// The last column is a dependent type which is always aggregated as a count
// whenever the main type is aggregated.
// type, pprof, unit, live-pprof, sample_type,
// a, b, c, d, e,
#define PROFILE_TYPE_TABLE(X) \
X(NOCOUNT, "nocount", nocount, "undef", NOCOUNT) \
X(TRACEPOINT, "tracepoint", events, "undef", NOCOUNT) \
X(CPU_NANOS, "cpu-time", nanoseconds, "undef", CPU_SAMPLE) \
X(CPU_SAMPLE, "cpu-samples", count, "undef", NOCOUNT) \
X(ALLOC_SAMPLE, "alloc-samples", count, "inuse-objects", NOCOUNT) \
X(ALLOC_SPACE, "alloc-space", bytes, "inuse-space", ALLOC_SAMPLE)

// defines enum of profile types
#define X_ENUM(a, b, c, d, e) DDPROF_PWT_##a,
enum DDPROF_SAMPLE_TYPES : uint8_t {
PROFILE_TYPE_TABLE(X_ENUM) DDPROF_PWT_LENGTH,
};
#undef X_ENUM

// Define our own event type on top of perf event types
enum DDProfTypeId : uint8_t { kDDPROF_TYPE_CUSTOM = PERF_TYPE_MAX + 100 };

Expand Down Expand Up @@ -128,29 +108,29 @@ enum DDProfCustomCountId : uint8_t {
// events are marked as tracepoint unless they represent a well-known profiling
// type!
// clang-format off
// short desc perf event type perf event count type period/freq profile sample type addtl. configs
// short desc perf event type perf event count type period/freq sample types addtl. configs
// cppcheck-suppress preprocessorErrorDirective
#define EVENT_CONFIG_TABLE(X) \
X(hCPU, "CPU Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hREF, "Ref. CPU Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hINST, "Instr. Count", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hCREF, "Cache Ref.", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, 999, DDPROF_PWT_TRACEPOINT, {}) \
X(hCMISS, "Cache Miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, 999, DDPROF_PWT_TRACEPOINT, {}) \
X(hBRANCH, "Branche Instr.", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 999, DDPROF_PWT_TRACEPOINT, {}) \
X(hBMISS, "Branch Miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, 999, DDPROF_PWT_TRACEPOINT, {}) \
X(hBUS, "Bus Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hBSTF, "Bus Stalls(F)", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hBSTB, "Bus Stalls(B)", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(sCPU, "CPU Time", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, 99, DDPROF_PWT_CPU_NANOS, IS_FREQ_TRY_KERNEL) \
X(sPF, "Page Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, 1, DDPROF_PWT_TRACEPOINT, USE_KERNEL) \
X(sCS, "Con. Switch", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, 1, DDPROF_PWT_TRACEPOINT, USE_KERNEL) \
X(sMig, "CPU Migrations", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(sPFMAJ, "Major Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, 99, DDPROF_PWT_TRACEPOINT, USE_KERNEL) \
X(sPFMIN, "Minor Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, 99, DDPROF_PWT_TRACEPOINT, USE_KERNEL) \
X(sALGN, "Align. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(sEMU, "Emu. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(sDUM, "Dummy", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY, 1, DDPROF_PWT_NOCOUNT, {}) \
X(sALLOC, "Allocations", kDDPROF_TYPE_CUSTOM, kDDPROF_COUNT_ALLOCATIONS, 524288, DDPROF_PWT_ALLOC_SPACE, SKIP_FRAMES)
X(hCPU, "CPU Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 99, k_stype_tracepoint, IS_FREQ) \
X(hREF, "Ref. CPU Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 1000, k_stype_tracepoint, IS_FREQ) \
X(hINST, "Instr. Count", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 1000, k_stype_tracepoint, IS_FREQ) \
X(hCREF, "Cache Ref.", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, 999, k_stype_tracepoint, {}) \
X(hCMISS, "Cache Miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, 999, k_stype_tracepoint, {}) \
X(hBRANCH, "Branche Instr.", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 999, k_stype_tracepoint, {}) \
X(hBMISS, "Branch Miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, 999, k_stype_tracepoint, {}) \
X(hBUS, "Bus Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, 1000, k_stype_tracepoint, IS_FREQ) \
X(hBSTF, "Bus Stalls(F)", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 1000, k_stype_tracepoint, IS_FREQ) \
X(hBSTB, "Bus Stalls(B)", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 1000, k_stype_tracepoint, IS_FREQ) \
X(sCPU, "CPU Time", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, 99, k_stype_cpu, IS_FREQ_TRY_KERNEL) \
X(sPF, "Page Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, 1, k_stype_tracepoint, USE_KERNEL) \
X(sCS, "Con. Switch", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, 1, k_stype_tracepoint, USE_KERNEL) \
X(sMig, "CPU Migrations", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, 99, k_stype_tracepoint, IS_FREQ) \
X(sPFMAJ, "Major Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, 99, k_stype_tracepoint, USE_KERNEL) \
X(sPFMIN, "Minor Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, 99, k_stype_tracepoint, USE_KERNEL) \
X(sALGN, "Align. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, 99, k_stype_tracepoint, IS_FREQ) \
X(sEMU, "Emu. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, 99, k_stype_tracepoint, IS_FREQ) \
X(sDUM, "Dummy", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY, 1, k_stype_dummy, {}) \
X(sALLOC, "Allocations", kDDPROF_TYPE_CUSTOM, kDDPROF_COUNT_ALLOCATIONS, 524288, k_stype_alloc, SKIP_FRAMES)

// clang-format on

Expand All @@ -165,16 +145,9 @@ enum DDPROF_EVENT_NAMES : int8_t {
const PerfWatcher *ewatcher_from_idx(int idx);
const PerfWatcher *ewatcher_from_str(const char *str);
const PerfWatcher *tracepoint_default_watcher();
bool watcher_has_countable_sample_type(const PerfWatcher *watcher);
bool watcher_has_tracepoint(const PerfWatcher *watcher);
int watcher_to_count_sample_type_id(const PerfWatcher *watcher);
const char *event_type_name_from_idx(int idx);

// Helper functions for sample types
const char *sample_type_name_from_idx(int idx, EventAggregationModePos pos);
const char *sample_type_unit_from_idx(int idx);
int sample_type_id_to_count_sample_type_id(int idx);

// Helper functions, mostly for tests
uint64_t perf_event_default_sample_type();
void log_watcher(const PerfWatcher *w, int idx);
Expand Down
54 changes: 54 additions & 0 deletions include/watcher_sample_types.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0. This product includes software
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present
// Datadog, Inc.

#pragma once

// Maps a watcher's event to pprof sample types for each aggregation mode.
// k_stype_none signals "no sample/count type for this aggregation mode".
// Fields are uint32_t (not the enum) to allow k_stype_none = UINT32_MAX,
// which lies outside the valid enum range.

#include "event_config.hpp"

#include <cstdint>
#include <datadog/common.h>

namespace ddprof {

struct WatcherSampleTypes {
uint32_t sample_types[kNbEventAggregationModes]; // [kSumPos, kLiveSumPos]
uint32_t count_types[kNbEventAggregationModes]; // companion counts
};

// Sentinel: slot is unused for this aggregation mode.
inline constexpr uint32_t k_stype_none = UINT32_MAX;

// Tracepoints: one event = one sample, no count companion, no live mode.
// clang-format off
Comment thread
r1viollet marked this conversation as resolved.
inline constexpr WatcherSampleTypes k_stype_tracepoint = {
{DDOG_PROF_SAMPLE_TYPE_TRACEPOINT, k_stype_none},
{k_stype_none, k_stype_none}};

// CPU: nanoseconds in sum mode only — no live profile for CPU.
inline constexpr WatcherSampleTypes k_stype_cpu = {
{DDOG_PROF_SAMPLE_TYPE_CPU_TIME, k_stype_none},
{DDOG_PROF_SAMPLE_TYPE_CPU_SAMPLES, k_stype_none}};

// Allocation: bytes allocated / live bytes, with object-count companions.
inline constexpr WatcherSampleTypes k_stype_alloc = {
{DDOG_PROF_SAMPLE_TYPE_ALLOC_SPACE, DDOG_PROF_SAMPLE_TYPE_INUSE_SPACE},
{DDOG_PROF_SAMPLE_TYPE_ALLOC_SAMPLES, DDOG_PROF_SAMPLE_TYPE_INUSE_OBJECTS}};

// Dummy: watcher does not contribute to pprof (e.g., sDUM).
inline constexpr WatcherSampleTypes k_stype_dummy = {
{k_stype_none, k_stype_none},
{k_stype_none, k_stype_none}};
// clang-format on

constexpr bool is_pprof_active(const WatcherSampleTypes &st) {
return st.sample_types[0] != k_stype_none;
}

} // namespace ddprof
11 changes: 8 additions & 3 deletions src/exporter/ddprof_exporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ DDRes create_pprof_file(ddog_Timespec start, const char *dbg_pprof_prefix,
strftime(time_start, std::size(time_start), "%Y%m%dT%H%M%SZ", tm_start);

char filename[PATH_MAX];
snprintf(filename, std::size(filename), "%s%s.pprof.lz4", dbg_pprof_prefix,
snprintf(filename, std::size(filename), "%s%s.pprof.zst", dbg_pprof_prefix,
time_start);
LG_NTC("[EXPORTER] Writing pprof to file %s", filename);
constexpr int read_write_user_only = 0600;
Expand Down Expand Up @@ -271,12 +271,17 @@ DDRes ddprof_exporter_new(const UserTags *user_tags, DDProfExporter *exporter) {
fill_stable_tags(user_tags, exporter, tags_exporter);

ddog_CharSlice const base_url = to_CharSlice(exporter->_url);
// ddprof is an out-of-process profiler and does not fork during export,
// so the system DNS resolver (/etc/resolv.conf) is safe and preferred.
constexpr bool k_use_system_resolver = true;
ddog_prof_Endpoint endpoint;
if (exporter->_agent) {
endpoint = ddog_prof_Endpoint_agent(base_url, k_timeout_ms);
endpoint =
ddog_prof_Endpoint_agent(base_url, k_timeout_ms, k_use_system_resolver);
} else {
ddog_CharSlice const api_key = to_CharSlice(exporter->_input.api_key);
endpoint = ddog_prof_Endpoint_agentless(base_url, api_key, k_timeout_ms);
endpoint = ddog_prof_Endpoint_agentless(base_url, api_key, k_timeout_ms,
k_use_system_resolver);
}

ddog_prof_ProfileExporter_Result res_exporter = ddog_prof_Exporter_new(
Expand Down
Loading