Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions include/ddprof_process.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
#include "ddres_def.hpp"
#include "dwfl_wrapper.hpp"
#include "logger.hpp"
#include "native_language.hpp"

// libelf forward declaration
extern "C" {
struct Elf;
}

#include <limits>
#include <memory>
Expand Down Expand Up @@ -40,6 +46,19 @@ class Process {

[[nodiscard]] std::string_view get_or_insert_thread_name(pid_t tid);

// Cached native language of the process' main executable.
// Returns kUnknown until populated by set_language().
NativeLanguage get_language() const { return _language; }

// Detect (only once) the language using an already-opened Elf* (typically
// libdwfl's main-module Elf*). No-op on subsequent calls.
// Returns true if detection was attempted on this call.
bool detect_language_once(::Elf *main_exe_elf);

// Fallback path: detect by opening /proc/<pid>/exe ourselves. Use only when
// no Elf* is available yet.
bool detect_language_once_from_proc(std::string_view path_to_proc);

[[nodiscard]] DwflWrapper *get_or_insert_dwfl();
[[nodiscard]] DwflWrapper *get_dwfl();
[[nodiscard]] const DwflWrapper *get_dwfl() const;
Expand All @@ -57,6 +76,8 @@ class Process {
pid_t _pid;
CGroupId_t _cgroup_ns;
uint64_t _sample_counter{};
NativeLanguage _language{NativeLanguage::kUnknown};
bool _language_detected{false};
};

class ProcessHdr {
Expand All @@ -66,6 +87,7 @@ class ProcessHdr {
void flag_visited(pid_t pid);
Process &get(pid_t pid);
const ContainerId &get_container_id(pid_t pid);

void clear(pid_t pid) { _process_map.erase(pid); }

std::vector<pid_t> get_unvisited() const;
Expand Down
49 changes: 49 additions & 0 deletions include/native_language.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0. This product includes software
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present
// Datadog, Inc.

#pragma once

#include <cstdint>
#include <string_view>
#include <sys/types.h>

// libelf forward declaration: callers that pass an Elf* must include
// <libelf.h>.
extern "C" {
struct Elf;
}

namespace ddprof {

// Heuristic native-language family for a process' main executable.
// Only meant to refine the "native" language tag for cases where a single
// language clearly dominates a process. Mixed-language binaries fall back to
// kUnknown (caller is expected to report "native" in that case).
enum class NativeLanguage : uint8_t {
kUnknown = 0, // fallback -> reported as "native"
kGo,
kRust,
kCpp,
};

// Returns a stable label string for a detected language.
// kUnknown maps to "native" (the existing default tag).
std::string_view to_string(NativeLanguage lang);

// Detect the native language of an already-opened ELF object.
// Intentionally heuristic and cheap:
// * Go -> `.go.buildinfo` / `.gopclntab` ELF section
// * Rust -> `.note.rustc` section or rustc-mangled symbols in
// .dynsym / .symtab (bounded scan)
// * Cpp -> any `_Z`-mangled symbol (Itanium ABI) not matching Rust
// Never reads DWARF.
// Returns kUnknown on null input or unrecognised ELF.
NativeLanguage detect_native_language(::Elf *elf);

// Convenience wrapper: open `/proc/<pid>/exe` ourselves. Prefer the Elf*
// overload above when the caller already has a handle (e.g. via libdwfl).
NativeLanguage detect_native_language(pid_t pid, std::string_view path_to_proc);

} // namespace ddprof
1 change: 1 addition & 0 deletions include/pprof/ddprof_pprof.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct DDProfPProf {
ddog_prof_StringId2 thread_id{};
ddog_prof_StringId2 thread_name{};
ddog_prof_StringId2 tracepoint_type{};
ddog_prof_StringId2 process_language{};
};

/* single profile gathering several value types */
Expand Down
4 changes: 4 additions & 0 deletions include/unwind_output.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,15 @@ struct UnwindOutput {
container_id = k_container_id_unknown;
exe_name = {};
thread_name = {};
language = {};
}
std::vector<FunLoc> locs;
std::string_view container_id;
std::string_view exe_name;
std::string_view thread_name;
// Heuristic native language of the process' main executable
// ("go"/"rust"/"cpp"). Empty -> caller falls back to "native".
std::string_view language;
int pid;
int tid;
friend auto operator<=>(const UnwindOutput &, const UnwindOutput &) = default;
Expand Down
18 changes: 18 additions & 0 deletions src/ddprof_process.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,24 @@ const ContainerId &ProcessHdr::get_container_id(pid_t pid) {
return p.get_container_id(_path_to_proc);
}

bool Process::detect_language_once(::Elf *main_exe_elf) {
if (_language_detected || main_exe_elf == nullptr) {
return false;
}
_language = detect_native_language(main_exe_elf);
_language_detected = true;
return true;
}

bool Process::detect_language_once_from_proc(std::string_view path_to_proc) {
if (_language_detected) {
return false;
}
_language = detect_native_language(_pid, path_to_proc);
_language_detected = true;
return true;
}

void ProcessHdr::flag_visited(pid_t pid) { _visited_pid.insert(pid); }

Process &ProcessHdr::get(pid_t pid) {
Expand Down
203 changes: 203 additions & 0 deletions src/native_language.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0. This product includes software
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present
// Datadog, Inc.

#include "native_language.hpp"

#include "logger.hpp"
#include "unique_fd.hpp"

#include <absl/strings/str_cat.h>
#include <cstring>
#include <fcntl.h>
#include <gelf.h>
#include <libelf.h>
#include <string>
#include <string_view>

namespace ddprof {

namespace {

constexpr size_t k_max_symbols_scanned = 4096;

// Returns true if `name` looks like a rustc-mangled symbol.
// * v0 mangling: starts with "_R"
// * legacy mangling: Itanium "_ZN...17h<16 hex chars>E" tail
bool looks_like_rust_symbol(std::string_view name) {
if (name.size() > 2 && name[0] == '_' && name[1] == 'R') {
return true;
}
// Legacy mangling: ..."17h" + 16 hex + "E" at the very end.
// We don't need to validate the full Itanium prefix, the tail is unique
// enough for a heuristic.
constexpr size_t k_tail = 20; // "17h" + 16 hex + "E"
if (name.size() < k_tail || name.back() != 'E') {
return false;
}
const size_t pos = name.size() - k_tail;
if (name.compare(pos, 3, "17h") != 0) {
return false;
}
for (size_t i = pos + 3; i < name.size() - 1; ++i) {
const char c = name[i];
const bool is_hex = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f');
if (!is_hex) {
return false;
}
}
return true;
}

bool looks_like_cpp_symbol(std::string_view name) {
// Itanium C++ mangling: "_Z..." (and we already ruled out Rust legacy above).
return name.size() > 2 && name[0] == '_' && name[1] == 'Z';
}

// Scan a symbol table section. Returns true and sets `out` if a definitive
// signal is found. Bails after k_max_symbols_scanned entries.
bool scan_symtab(Elf *elf, Elf_Scn *scn, GElf_Shdr const &shdr,
NativeLanguage &out) {
Elf_Data *data = elf_getdata(scn, nullptr);
if (data == nullptr || shdr.sh_entsize == 0) {
return false;
}
const size_t nsyms = shdr.sh_size / shdr.sh_entsize;
bool saw_cpp = false;
size_t scanned = 0;
for (size_t i = 0; i < nsyms && scanned < k_max_symbols_scanned; ++i) {
GElf_Sym sym;
if (gelf_getsym(data, static_cast<int>(i), &sym) == nullptr) {
continue;
}
const char *raw = elf_strptr(elf, shdr.sh_link, sym.st_name);
if (raw == nullptr || raw[0] == '\0') {
continue;
}
++scanned;
std::string_view const name(raw);
if (looks_like_rust_symbol(name)) {
out = NativeLanguage::kRust;
return true; // Rust wins immediately
}
if (!saw_cpp && looks_like_cpp_symbol(name)) {
saw_cpp = true;
}
}
if (saw_cpp) {
out = NativeLanguage::kCpp;
return true;
}
return false;
}

NativeLanguage detect_from_elf_impl(Elf *elf) {
// First pass: section-name probes (cheapest).
size_t shstrndx = 0;
if (elf_getshdrstrndx(elf, &shstrndx) != 0) {
return NativeLanguage::kUnknown;
}

Elf_Scn *symtab_scn = nullptr;
GElf_Shdr symtab_shdr{};
Elf_Scn *dynsym_scn = nullptr;
GElf_Shdr dynsym_shdr{};

Elf_Scn *scn = nullptr;
while ((scn = elf_nextscn(elf, scn)) != nullptr) {
GElf_Shdr shdr;
if (gelf_getshdr(scn, &shdr) == nullptr) {
continue;
}
const char *name = elf_strptr(elf, shstrndx, shdr.sh_name);
if (name == nullptr) {
continue;
}
std::string_view const sname(name);
if (sname == ".go.buildinfo" || sname == ".gopclntab") {
return NativeLanguage::kGo;
}
if (sname == ".note.rustc") {
return NativeLanguage::kRust;
}
if (shdr.sh_type == SHT_SYMTAB) {
symtab_scn = scn;
symtab_shdr = shdr;
} else if (shdr.sh_type == SHT_DYNSYM) {
dynsym_scn = scn;
dynsym_shdr = shdr;
}
}

// Second pass: symbol-table heuristics. Prefer .symtab (richer); fall back
// to .dynsym for stripped binaries.
NativeLanguage out = NativeLanguage::kUnknown;
if (symtab_scn != nullptr && scan_symtab(elf, symtab_scn, symtab_shdr, out)) {
return out;
}
if (dynsym_scn != nullptr && scan_symtab(elf, dynsym_scn, dynsym_shdr, out)) {
return out;
}
return NativeLanguage::kUnknown;
}

} // namespace

NativeLanguage detect_native_language(::Elf *elf) {
if (elf == nullptr) {
return NativeLanguage::kUnknown;
}
// Safety: libelf needs to have been initialised. ddprof calls
// elf_version(EV_CURRENT) in unwind_init(); make it idempotent here too.
elf_version(EV_CURRENT);
if (elf_kind(elf) != ELF_K_ELF) {
return NativeLanguage::kUnknown;
}
NativeLanguage const result = detect_from_elf_impl(elf);
LG_DBG("[NATIVE-LANG] -> %s", std::string(to_string(result)).c_str());
return result;
}

std::string_view to_string(NativeLanguage lang) {
switch (lang) {
case NativeLanguage::kGo:
return "go";
case NativeLanguage::kRust:
return "rust";
case NativeLanguage::kCpp:
return "cpp";
case NativeLanguage::kUnknown:
default:
return "native";
}
}

NativeLanguage detect_native_language(pid_t pid,
std::string_view path_to_proc) {
// libelf must be initialised; ddprof already calls elf_version() in
// unwind_init(), but make it idempotent-safe here in case this is invoked
// from a context where it has not been.
elf_version(EV_CURRENT);

const std::string exe_path =
absl::StrCat(path_to_proc, "/proc/", pid, "/exe");
const UniqueFd fd{::open(exe_path.c_str(), O_RDONLY | O_CLOEXEC)};
if (!fd) {
return NativeLanguage::kUnknown;
}
Elf *elf = elf_begin(fd.get(), ELF_C_READ_MMAP, nullptr);
if (elf == nullptr) {
return NativeLanguage::kUnknown;
}
NativeLanguage result = NativeLanguage::kUnknown;
if (elf_kind(elf) == ELF_K_ELF) {
result = detect_from_elf_impl(elf);
}
elf_end(elf);
LG_DBG("[NATIVE-LANG] (from /proc) pid=%d -> %s", pid,
std::string(to_string(result)).c_str());
return result;
}

} // namespace ddprof
7 changes: 6 additions & 1 deletion src/pprof/ddprof_pprof.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ using namespace std::string_view_literals;
namespace ddprof {

namespace {
constexpr size_t k_max_pprof_labels{8};
constexpr size_t k_max_pprof_labels{9};

constexpr std::string_view k_container_id_label = "container_id"sv;
constexpr std::string_view k_process_id_label = "process_id"sv;
Expand All @@ -41,6 +41,7 @@ constexpr std::string_view k_process_name_label = "process_name"sv;
constexpr std::string_view k_thread_id_label = "thread id"sv;
constexpr std::string_view k_thread_name_label = "thread_name"sv;
constexpr std::string_view k_tracepoint_label = "tracepoint_type"sv;
constexpr std::string_view k_process_language_label = "process_language"sv;

// Maps a ddog_prof_SampleType to the kebab-case name used in debug log output
// (must match what simple_malloc-ut.sh greps for).
Expand Down Expand Up @@ -100,6 +101,7 @@ void init_dict_label_key_ids(DDProfPProf::DictLabelKeyIds &label_keys,
label_keys.thread_id = intern_string(dict, k_thread_id_label);
label_keys.thread_name = intern_string(dict, k_thread_name_label);
label_keys.tracepoint_type = intern_string(dict, k_tracepoint_label);
label_keys.process_language = intern_string(dict, k_process_language_label);
}

size_t prepare_labels2(const UnwindOutput &uw_output,
Expand Down Expand Up @@ -141,6 +143,9 @@ size_t prepare_labels2(const UnwindOutput &uw_output,
if (!uw_output.thread_name.empty()) {
push_label(label_keys.thread_name, uw_output.thread_name);
}
if (!uw_output.language.empty()) {
push_label(label_keys.process_language, uw_output.language);
}

DDPROF_DCHECK_FATAL(labels_num <= labels.size(),
"pprof_aggregate - label buffer exceeded");
Expand Down
Loading