// Copyright 2025 The Abseil Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may // obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "absl/profiling/internal/profile_builder.h" #ifdef __linux__ #include #include #endif // __linux__ #include #include #include #include #include #include #include "absl/base/casts.h" #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/types/span.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace debugging_internal { namespace { // This file contains a simplified implementation of the pprof profile builder, // which avoids a dependency on protobuf. // // The canonical profile proto definition is at // https://github.com/google/pprof/blob/master/proto/profile.proto // // Wire-format encoding is a simple sequence of (tag, value) pairs. The tag // is a varint-encoded integer, where the low 3 bits are the wire type, and the // high bits are the field number. // // For the fields we care about, we'll be using the following wire types: // // Wire Type 0: Varint-encoded integer. // Wire Type 2: Length-delimited. Used for strings and sub-messages. enum class WireType { kVarint = 0, kLengthDelimited = 2, }; #ifdef __linux__ // Returns the Phdr of the first segment of the given type. const ElfW(Phdr) * GetFirstSegment(const dl_phdr_info* const info, const ElfW(Word) segment_type) { for (int i = 0; i < info->dlpi_phnum; ++i) { if (info->dlpi_phdr[i].p_type == segment_type) { return &info->dlpi_phdr[i]; } } return nullptr; } // Return DT_SONAME for the given image. If there is no PT_DYNAMIC or if // PT_DYNAMIC does not contain DT_SONAME, return nullptr. static const char* GetSoName(const dl_phdr_info* const info) { const ElfW(Phdr)* const pt_dynamic = GetFirstSegment(info, PT_DYNAMIC); if (pt_dynamic == nullptr) { return nullptr; } const ElfW(Dyn)* dyn = reinterpret_cast(info->dlpi_addr + pt_dynamic->p_vaddr); const ElfW(Dyn)* dt_strtab = nullptr; const ElfW(Dyn)* dt_strsz = nullptr; const ElfW(Dyn)* dt_soname = nullptr; for (; dyn->d_tag != DT_NULL; ++dyn) { if (dyn->d_tag == DT_SONAME) { dt_soname = dyn; } else if (dyn->d_tag == DT_STRTAB) { dt_strtab = dyn; } else if (dyn->d_tag == DT_STRSZ) { dt_strsz = dyn; } } if (dt_soname == nullptr) { return nullptr; } ABSL_RAW_CHECK(dt_strtab != nullptr, "Unexpected nullptr"); ABSL_RAW_CHECK(dt_strsz != nullptr, "Unexpected nullptr"); const char* const strtab = reinterpret_cast( info->dlpi_addr + static_cast(dt_strtab->d_un.d_val)); ABSL_RAW_CHECK(dt_soname->d_un.d_val < dt_strsz->d_un.d_val, "Unexpected order"); return strtab + dt_soname->d_un.d_val; } // Helper function to get the build ID of a shared object. std::string GetBuildId(const dl_phdr_info* const info) { std::string result; // pt_note contains entries (of type ElfW(Nhdr)) starting at // info->dlpi_addr + pt_note->p_vaddr // with length // pt_note->p_memsz // // The length of each entry is given by // Align(sizeof(ElfW(Nhdr)) + nhdr->n_namesz) + Align(nhdr->n_descsz) for (int i = 0; i < info->dlpi_phnum; ++i) { const ElfW(Phdr)* pt_note = &info->dlpi_phdr[i]; if (pt_note->p_type != PT_NOTE) continue; const char* note = reinterpret_cast(info->dlpi_addr + pt_note->p_vaddr); const char* const last = note + pt_note->p_filesz; const ElfW(Xword) align = pt_note->p_align; while (note < last) { const ElfW(Nhdr)* const nhdr = reinterpret_cast(note); if (note + sizeof(*nhdr) > last) { // Corrupt PT_NOTE break; } // Both the start and end of the descriptor are aligned by sh_addralign // (= p_align). const ElfW(Xword) desc_start = (sizeof(*nhdr) + nhdr->n_namesz + align - 1) & -align; const ElfW(Xword) size = desc_start + ((nhdr->n_descsz + align - 1) & -align); // Beware of wrap-around. if (nhdr->n_namesz >= static_cast(-align) || nhdr->n_descsz >= static_cast(-align) || desc_start < sizeof(*nhdr) || size < desc_start || size > static_cast(last - note)) { // Corrupt PT_NOTE break; } if (nhdr->n_type == NT_GNU_BUILD_ID) { const char* const note_name = note + sizeof(*nhdr); // n_namesz is the length of note_name. if (nhdr->n_namesz == 4 && memcmp(note_name, "GNU\0", 4) == 0) { if (!result.empty()) { // Repeated build-ids. Ignore them. return ""; } result = absl::BytesToHexString( absl::string_view(note + desc_start, nhdr->n_descsz)); } } note += size; } } return result; } #endif // __linux__ // A varint-encoded integer. struct Varint { explicit Varint(uint64_t v) : value(v) {} explicit Varint(StringId v) : value(static_cast(v)) {} explicit Varint(LocationId v) : value(static_cast(v)) {} explicit Varint(MappingId v) : value(static_cast(v)) {} uint64_t value; template friend void AbslStringify(Sink& sink, const Varint& v) { char buf[10]; char* p = buf; uint64_t u = v.value; while (u >= 0x80) { *p++ = static_cast((u & 0x7f) | 0x80); u >>= 7; } *p++ = static_cast(u); sink.Append(absl::string_view(buf, static_cast(p - buf))); } }; struct Tag { int field_number; WireType wire_type; template friend void AbslStringify(Sink& sink, const Tag& t) { absl::Format(&sink, "%v", Varint((static_cast(t.field_number) << 3) | static_cast(t.wire_type))); } }; struct LengthDelimited { int field_number; absl::string_view value; template friend void AbslStringify(Sink& sink, const LengthDelimited& ld) { absl::Format(&sink, "%v%v%v", Tag{ld.field_number, WireType::kLengthDelimited}, Varint(ld.value.size()), ld.value); } }; struct VarintField { int field_number; Varint value; template friend void AbslStringify(Sink& sink, const VarintField& vf) { absl::Format(&sink, "%v%v", Tag{vf.field_number, WireType::kVarint}, vf.value); } }; } // namespace StringId ProfileBuilder::InternString(absl::string_view str) { if (str.empty()) return StringId(0); return string_table_.emplace(str, StringId(string_table_.size())) .first->second; } LocationId ProfileBuilder::InternLocation(const void* address) { return location_table_ .emplace(absl::bit_cast(address), LocationId(location_table_.size() + 1)) .first->second; } void ProfileBuilder::AddSample( int64_t value, absl::Span stack, absl::Span> labels) { std::string sample_proto; absl::StrAppend( &sample_proto, VarintField{SampleProto::kValue, Varint(static_cast(value))}); for (const void* addr : stack) { // Profile addresses are raw stack unwind addresses, so they should be // adjusted by -1 to land inside the call instruction (although potentially // misaligned). absl::StrAppend( &sample_proto, VarintField{SampleProto::kLocationId, Varint(InternLocation(absl::bit_cast( absl::bit_cast(addr) - 1)))}); } for (const auto& label : labels) { std::string label_proto = absl::StrCat(VarintField{LabelProto::kKey, Varint(label.first)}, VarintField{LabelProto::kNum, Varint(static_cast(label.second))}); absl::StrAppend(&sample_proto, LengthDelimited{SampleProto::kLabel, label_proto}); } samples_.push_back(std::move(sample_proto)); } void ProfileBuilder::AddSampleType(StringId type, StringId unit) { std::string sample_type_proto = absl::StrCat(VarintField{ValueTypeProto::kType, Varint(type)}, VarintField{ValueTypeProto::kUnit, Varint(unit)}); sample_types_.push_back(std::move(sample_type_proto)); } MappingId ProfileBuilder::AddMapping(uintptr_t memory_start, uintptr_t memory_limit, uintptr_t file_offset, absl::string_view filename, absl::string_view build_id) { size_t index = mappings_.size() + 1; auto [it, inserted] = mapping_table_.emplace(memory_start, index); if (!inserted) { return static_cast(it->second); } Mapping m; m.start = memory_start; m.limit = memory_limit; m.offset = file_offset; m.filename = std::string(filename); m.build_id = std::string(build_id); mappings_.push_back(std::move(m)); return static_cast(index); } std::string ProfileBuilder::Emit() && { std::string profile_proto; for (const auto& sample_type : sample_types_) { absl::StrAppend(&profile_proto, LengthDelimited{ProfileProto::kSampleType, sample_type}); } for (const auto& sample : samples_) { absl::StrAppend(&profile_proto, LengthDelimited{ProfileProto::kSample, sample}); } // Build mapping table. for (size_t i = 0, n = mappings_.size(); i < n; ++i) { const auto& mapping = mappings_[i]; std::string mapping_proto = absl::StrCat( VarintField{MappingProto::kId, Varint(static_cast(i + 1))}, VarintField{MappingProto::kMemoryStart, Varint(mapping.start)}, VarintField{MappingProto::kMemoryLimit, Varint(mapping.limit)}, VarintField{MappingProto::kFileOffset, Varint(mapping.offset)}, VarintField{MappingProto::kFilename, Varint(InternString(mapping.filename))}, VarintField{MappingProto::kBuildId, Varint(InternString(mapping.build_id))}); absl::StrAppend(&profile_proto, LengthDelimited{ProfileProto::kMapping, mapping_proto}); } // Build location table. for (const auto& [address, id] : location_table_) { std::string location = absl::StrCat(VarintField{LocationProto::kId, Varint(id)}, VarintField{LocationProto::kAddress, Varint(address)}); if (!mappings_.empty()) { // Find the mapping ID. auto it = mapping_table_.upper_bound(address); if (it != mapping_table_.begin()) { --it; } // If *it contains address, add mapping to location. const size_t mapping_index = it->second; const Mapping& mapping = mappings_[mapping_index - 1]; if (it->first <= address && address < mapping.limit) { absl::StrAppend( &location, VarintField{LocationProto::kMappingId, Varint(static_cast(mapping_index))}); } } absl::StrAppend(&profile_proto, LengthDelimited{ProfileProto::kLocation, location}); } std::string string_table_proto; std::vector sorted_strings(string_table_.size()); for (const auto& p : string_table_) { sorted_strings[static_cast(p.second)] = p.first; } for (const auto& s : sorted_strings) { absl::StrAppend(&string_table_proto, LengthDelimited{ProfileProto::kStringTable, s}); } absl::StrAppend(&profile_proto, VarintField{ProfileProto::kDropFrames, Varint(drop_frames_id_)}); absl::StrAppend(&profile_proto, VarintField{ProfileProto::kComment, Varint(comment_id_)}); absl::StrAppend(&profile_proto, VarintField{ProfileProto::kDefaultSampleType, Varint(default_sample_type_id_)}); return absl::StrCat(string_table_proto, profile_proto); } void ProfileBuilder::set_drop_frames_id(StringId drop_frames_id) { drop_frames_id_ = drop_frames_id; } void ProfileBuilder::set_comment_id(StringId comment_id) { comment_id_ = comment_id; } void ProfileBuilder::set_default_sample_type_id( StringId default_sample_type_id) { default_sample_type_id_ = default_sample_type_id; } void ProfileBuilder::AddCurrentMappings() { #ifdef __linux__ dl_iterate_phdr( +[](dl_phdr_info* info, size_t, void* data) { auto& builder = *reinterpret_cast(data); // Skip dummy entry introduced since glibc 2.18. if (info->dlpi_phdr == nullptr && info->dlpi_phnum == 0) { return 0; } const bool is_main_executable = builder.mappings_.empty(); // Storage for path to executable as dlpi_name isn't populated for the // main executable. +1 to allow for the null terminator that readlink // does not add. char self_filename[PATH_MAX + 1]; const char* filename = info->dlpi_name; if (filename == nullptr || filename[0] == '\0') { // This is either the main executable or the VDSO. The main // executable is always the first entry processed by callbacks. if (is_main_executable) { // This is the main executable. ssize_t ret = readlink("/proc/self/exe", self_filename, sizeof(self_filename) - 1); if (ret >= 0 && static_cast(ret) < sizeof(self_filename)) { self_filename[ret] = '\0'; filename = self_filename; } } else { // This is the VDSO. filename = GetSoName(info); } } char resolved_path[PATH_MAX]; absl::string_view resolved_filename; if (realpath(filename, resolved_path)) { resolved_filename = resolved_path; } else { resolved_filename = filename; } const std::string build_id = GetBuildId(info); // Evaluate all the loadable segments. for (int i = 0; i < info->dlpi_phnum; ++i) { if (info->dlpi_phdr[i].p_type != PT_LOAD) { continue; } const ElfW(Phdr)* pt_load = &info->dlpi_phdr[i]; ABSL_RAW_CHECK(pt_load != nullptr, "Unexpected nullptr"); // Extract data. const size_t memory_start = info->dlpi_addr + pt_load->p_vaddr; const size_t memory_limit = memory_start + pt_load->p_memsz; const size_t file_offset = pt_load->p_offset; // Add to profile. builder.AddMapping(memory_start, memory_limit, file_offset, resolved_filename, build_id); } // Keep going. return 0; }, this); #endif // __linux__ } } // namespace debugging_internal ABSL_NAMESPACE_END } // namespace absl