diff options
author | iddqd <iddqd@yandex-team.com> | 2024-12-19 10:46:06 +0300 |
---|---|---|
committer | iddqd <iddqd@yandex-team.com> | 2024-12-19 10:59:56 +0300 |
commit | bb0840c0025a75dd3b85b746ebcec7deb7d9fe1c (patch) | |
tree | 85bc5522e873d9d5c37df278f0300c26fe9e729e /contrib/libs/breakpad/src/common/dwarf | |
parent | 1353077f79bb3547792b2fc86c22a695f0bc76f9 (diff) | |
download | ydb-bb0840c0025a75dd3b85b746ebcec7deb7d9fe1c.tar.gz |
Add contib/libs/breakpad to export
commit_hash:9d85255f8d9249f14105e4626bf4484805b8aed4
Diffstat (limited to 'contrib/libs/breakpad/src/common/dwarf')
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/bytereader-inl.h | 181 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/bytereader.cc | 250 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/bytereader.h | 320 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.cc | 199 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.h | 368 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/dwarf2enums.h | 744 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/dwarf2reader.cc | 3435 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/dwarf2reader.h | 1494 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/elf_reader.cc | 1274 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/elf_reader.h | 167 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/line_state_machine.h | 63 | ||||
-rw-r--r-- | contrib/libs/breakpad/src/common/dwarf/types.h | 41 |
12 files changed, 8536 insertions, 0 deletions
diff --git a/contrib/libs/breakpad/src/common/dwarf/bytereader-inl.h b/contrib/libs/breakpad/src/common/dwarf/bytereader-inl.h new file mode 100644 index 0000000000..dec20e6ffe --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/bytereader-inl.h @@ -0,0 +1,181 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef UTIL_DEBUGINFO_BYTEREADER_INL_H__ +#define UTIL_DEBUGINFO_BYTEREADER_INL_H__ + +#include "common/dwarf/bytereader.h" + +#include <assert.h> +#include <stdint.h> + +namespace google_breakpad { + +inline uint8_t ByteReader::ReadOneByte(const uint8_t* buffer) const { + return buffer[0]; +} + +inline uint16_t ByteReader::ReadTwoBytes(const uint8_t* buffer) const { + const uint16_t buffer0 = buffer[0]; + const uint16_t buffer1 = buffer[1]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8; + } else { + return buffer1 | buffer0 << 8; + } +} + +inline uint64_t ByteReader::ReadThreeBytes(const uint8_t* buffer) const { + const uint32_t buffer0 = buffer[0]; + const uint32_t buffer1 = buffer[1]; + const uint32_t buffer2 = buffer[2]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16; + } else { + return buffer2 | buffer1 << 8 | buffer0 << 16; + } +} + +inline uint64_t ByteReader::ReadFourBytes(const uint8_t* buffer) const { + const uint32_t buffer0 = buffer[0]; + const uint32_t buffer1 = buffer[1]; + const uint32_t buffer2 = buffer[2]; + const uint32_t buffer3 = buffer[3]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; + } else { + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; + } +} + +inline uint64_t ByteReader::ReadEightBytes(const uint8_t* buffer) const { + const uint64_t buffer0 = buffer[0]; + const uint64_t buffer1 = buffer[1]; + const uint64_t buffer2 = buffer[2]; + const uint64_t buffer3 = buffer[3]; + const uint64_t buffer4 = buffer[4]; + const uint64_t buffer5 = buffer[5]; + const uint64_t buffer6 = buffer[6]; + const uint64_t buffer7 = buffer[7]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; + } else { + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; + } +} + +// Read an unsigned LEB128 number. Each byte contains 7 bits of +// information, plus one bit saying whether the number continues or +// not. + +inline uint64_t ByteReader::ReadUnsignedLEB128(const uint8_t* buffer, + size_t* len) const { + uint64_t result = 0; + size_t num_read = 0; + unsigned int shift = 0; + uint8_t byte; + + do { + byte = *buffer++; + num_read++; + + result |= (static_cast<uint64_t>(byte & 0x7f)) << shift; + + shift += 7; + + } while (byte & 0x80); + + *len = num_read; + + return result; +} + +// Read a signed LEB128 number. These are like regular LEB128 +// numbers, except the last byte may have a sign bit set. + +inline int64_t ByteReader::ReadSignedLEB128(const uint8_t* buffer, + size_t* len) const { + int64_t result = 0; + unsigned int shift = 0; + size_t num_read = 0; + uint8_t byte; + + do { + byte = *buffer++; + num_read++; + result |= (static_cast<uint64_t>(byte & 0x7f) << shift); + shift += 7; + } while (byte & 0x80); + + if ((shift < 8 * sizeof (result)) && (byte & 0x40)) + result |= -((static_cast<int64_t>(1)) << shift); + *len = num_read; + return result; +} + +inline uint64_t ByteReader::ReadOffset(const uint8_t* buffer) const { + assert(this->offset_reader_); + return (this->*offset_reader_)(buffer); +} + +inline uint64_t ByteReader::ReadAddress(const uint8_t* buffer) const { + assert(this->address_reader_); + return (this->*address_reader_)(buffer); +} + +inline void ByteReader::SetCFIDataBase(uint64_t section_base, + const uint8_t* buffer_base) { + section_base_ = section_base; + buffer_base_ = buffer_base; + have_section_base_ = true; +} + +inline void ByteReader::SetTextBase(uint64_t text_base) { + text_base_ = text_base; + have_text_base_ = true; +} + +inline void ByteReader::SetDataBase(uint64_t data_base) { + data_base_ = data_base; + have_data_base_ = true; +} + +inline void ByteReader::SetFunctionBase(uint64_t function_base) { + function_base_ = function_base; + have_function_base_ = true; +} + +inline void ByteReader::ClearFunctionBase() { + have_function_base_ = false; +} + +} // namespace google_breakpad + +#endif // UTIL_DEBUGINFO_BYTEREADER_INL_H__ diff --git a/contrib/libs/breakpad/src/common/dwarf/bytereader.cc b/contrib/libs/breakpad/src/common/dwarf/bytereader.cc new file mode 100644 index 0000000000..ea3e4f6bc1 --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/bytereader.cc @@ -0,0 +1,250 @@ +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> + +#include "common/dwarf/bytereader-inl.h" +#include "common/dwarf/bytereader.h" + +namespace google_breakpad { + +ByteReader::ByteReader(enum Endianness endian) + :offset_reader_(NULL), address_reader_(NULL), endian_(endian), + address_size_(0), offset_size_(0), + have_section_base_(), have_text_base_(), have_data_base_(), + have_function_base_() { } + +ByteReader::~ByteReader() { } + +void ByteReader::SetOffsetSize(uint8_t size) { + offset_size_ = size; + assert(size == 4 || size == 8); + if (size == 4) { + this->offset_reader_ = &ByteReader::ReadFourBytes; + } else { + this->offset_reader_ = &ByteReader::ReadEightBytes; + } +} + +void ByteReader::SetAddressSize(uint8_t size) { + address_size_ = size; + assert(size == 4 || size == 8); + if (size == 4) { + this->address_reader_ = &ByteReader::ReadFourBytes; + } else { + this->address_reader_ = &ByteReader::ReadEightBytes; + } +} + +uint64_t ByteReader::ReadInitialLength(const uint8_t* start, size_t* len) { + const uint64_t initial_length = ReadFourBytes(start); + start += 4; + + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length == 0xffffffff) { + SetOffsetSize(8); + *len = 12; + return ReadOffset(start); + } else { + SetOffsetSize(4); + *len = 4; + } + return initial_length; +} + +bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const { + if (encoding == DW_EH_PE_omit) return true; + if (encoding == DW_EH_PE_aligned) return true; + if ((encoding & 0x7) > DW_EH_PE_udata8) + return false; + if ((encoding & 0x70) > DW_EH_PE_funcrel) + return false; + return true; +} + +bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const { + switch (encoding & 0x70) { + case DW_EH_PE_absptr: return true; + case DW_EH_PE_pcrel: return have_section_base_; + case DW_EH_PE_textrel: return have_text_base_; + case DW_EH_PE_datarel: return have_data_base_; + case DW_EH_PE_funcrel: return have_function_base_; + default: return false; + } +} + +uint64_t ByteReader::ReadEncodedPointer(const uint8_t* buffer, + DwarfPointerEncoding encoding, + size_t* len) const { + // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't + // see it here. + assert(encoding != DW_EH_PE_omit); + + // The Linux Standards Base 4.0 does not make this clear, but the + // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c) + // agree that aligned pointers are always absolute, machine-sized, + // machine-signed pointers. + if (encoding == DW_EH_PE_aligned) { + assert(have_section_base_); + + // We don't need to align BUFFER in *our* address space. Rather, we + // need to find the next position in our buffer that would be aligned + // when the .eh_frame section the buffer contains is loaded into the + // program's memory. So align assuming that buffer_base_ gets loaded at + // address section_base_, where section_base_ itself may or may not be + // aligned. + + // First, find the offset to START from the closest prior aligned + // address. + uint64_t skew = section_base_ & (AddressSize() - 1); + // Now find the offset from that aligned address to buffer. + uint64_t offset = skew + (buffer - buffer_base_); + // Round up to the next boundary. + uint64_t aligned = (offset + AddressSize() - 1) & -AddressSize(); + // Convert back to a pointer. + const uint8_t* aligned_buffer = buffer_base_ + (aligned - skew); + // Finally, store the length and actually fetch the pointer. + *len = aligned_buffer - buffer + AddressSize(); + return ReadAddress(aligned_buffer); + } + + // Extract the value first, ignoring whether it's a pointer or an + // offset relative to some base. + uint64_t offset; + switch (encoding & 0x0f) { + case DW_EH_PE_absptr: + // DW_EH_PE_absptr is weird, as it is used as a meaningful value for + // both the high and low nybble of encoding bytes. When it appears in + // the high nybble, it means that the pointer is absolute, not an + // offset from some base address. When it appears in the low nybble, + // as here, it means that the pointer is stored as a normal + // machine-sized and machine-signed address. A low nybble of + // DW_EH_PE_absptr does not imply that the pointer is absolute; it is + // correct for us to treat the value as an offset from a base address + // if the upper nybble is not DW_EH_PE_absptr. + offset = ReadAddress(buffer); + *len = AddressSize(); + break; + + case DW_EH_PE_uleb128: + offset = ReadUnsignedLEB128(buffer, len); + break; + + case DW_EH_PE_udata2: + offset = ReadTwoBytes(buffer); + *len = 2; + break; + + case DW_EH_PE_udata4: + offset = ReadFourBytes(buffer); + *len = 4; + break; + + case DW_EH_PE_udata8: + offset = ReadEightBytes(buffer); + *len = 8; + break; + + case DW_EH_PE_sleb128: + offset = ReadSignedLEB128(buffer, len); + break; + + case DW_EH_PE_sdata2: + offset = ReadTwoBytes(buffer); + // Sign-extend from 16 bits. + offset = (offset ^ 0x8000) - 0x8000; + *len = 2; + break; + + case DW_EH_PE_sdata4: + offset = ReadFourBytes(buffer); + // Sign-extend from 32 bits. + offset = (offset ^ 0x80000000ULL) - 0x80000000ULL; + *len = 4; + break; + + case DW_EH_PE_sdata8: + // No need to sign-extend; this is the full width of our type. + offset = ReadEightBytes(buffer); + *len = 8; + break; + + default: + abort(); + } + + // Find the appropriate base address. + uint64_t base; + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = 0; + break; + + case DW_EH_PE_pcrel: + assert(have_section_base_); + base = section_base_ + (buffer - buffer_base_); + break; + + case DW_EH_PE_textrel: + assert(have_text_base_); + base = text_base_; + break; + + case DW_EH_PE_datarel: + assert(have_data_base_); + base = data_base_; + break; + + case DW_EH_PE_funcrel: + assert(have_function_base_); + base = function_base_; + break; + + default: + abort(); + } + + uint64_t pointer = base + offset; + + // Remove inappropriate upper bits. + if (AddressSize() == 4) + pointer = pointer & 0xffffffff; + else + assert(AddressSize() == sizeof(uint64_t)); + + return pointer; +} + +Endianness ByteReader::GetEndianness() const { + return endian_; +} + +} // namespace google_breakpad diff --git a/contrib/libs/breakpad/src/common/dwarf/bytereader.h b/contrib/libs/breakpad/src/common/dwarf/bytereader.h new file mode 100644 index 0000000000..b0cac43315 --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/bytereader.h @@ -0,0 +1,320 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_DWARF_BYTEREADER_H__ +#define COMMON_DWARF_BYTEREADER_H__ + +#include <stdint.h> + +#include <string> + +#include "common/dwarf/types.h" +#include "common/dwarf/dwarf2enums.h" + +namespace google_breakpad { + +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN +// because it conflicts with a macro +enum Endianness { + ENDIANNESS_BIG, + ENDIANNESS_LITTLE +}; + +// A ByteReader knows how to read single- and multi-byte values of +// various endiannesses, sizes, and encodings, as used in DWARF +// debugging information and Linux C++ exception handling data. +class ByteReader { + public: + // Construct a ByteReader capable of reading one-, two-, four-, and + // eight-byte values according to ENDIANNESS, absolute machine-sized + // addresses, DWARF-style "initial length" values, signed and + // unsigned LEB128 numbers, and Linux C++ exception handling data's + // encoded pointers. + explicit ByteReader(enum Endianness endianness); + virtual ~ByteReader(); + + // Read a single byte from BUFFER and return it as an unsigned 8 bit + // number. + uint8_t ReadOneByte(const uint8_t* buffer) const; + + // Read two bytes from BUFFER and return them as an unsigned 16 bit + // number, using this ByteReader's endianness. + uint16_t ReadTwoBytes(const uint8_t* buffer) const; + + // Read three bytes from BUFFER and return them as an unsigned 64 bit + // number, using this ByteReader's endianness. DWARF 5 uses this encoding + // for various index-related DW_FORMs. + uint64_t ReadThreeBytes(const uint8_t* buffer) const; + + // Read four bytes from BUFFER and return them as an unsigned 32 bit + // number, using this ByteReader's endianness. This function returns + // a uint64_t so that it is compatible with ReadAddress and + // ReadOffset. The number it returns will never be outside the range + // of an unsigned 32 bit integer. + uint64_t ReadFourBytes(const uint8_t* buffer) const; + + // Read eight bytes from BUFFER and return them as an unsigned 64 + // bit number, using this ByteReader's endianness. + uint64_t ReadEightBytes(const uint8_t* buffer) const; + + // Read an unsigned LEB128 (Little Endian Base 128) number from + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to + // the number of bytes read. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + uint64_t ReadUnsignedLEB128(const uint8_t* buffer, size_t* len) const; + + // Read a signed LEB128 number from BUFFER and return it as an + // signed 64 bit integer. Set LEN to the number of bytes read. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N in two's + // complement. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + int64_t ReadSignedLEB128(const uint8_t* buffer, size_t* len) const; + + // Indicate that addresses on this architecture are SIZE bytes long. SIZE + // must be either 4 or 8. (DWARF allows addresses to be any number of + // bytes in length from 1 to 255, but we only support 32- and 64-bit + // addresses at the moment.) You must call this before using the + // ReadAddress member function. + // + // For data in a .debug_info section, or something that .debug_info + // refers to like line number or macro data, the compilation unit + // header's address_size field indicates the address size to use. Call + // frame information doesn't indicate its address size (a shortcoming of + // the spec); you must supply the appropriate size based on the + // architecture of the target machine. + void SetAddressSize(uint8_t size); + + // Return the current address size, in bytes. This is either 4, + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. + uint8_t AddressSize() const { return address_size_; } + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer, respecting this ByteReader's endianness and address size. You + // must call SetAddressSize before calling this function. + uint64_t ReadAddress(const uint8_t* buffer) const; + + // DWARF actually defines two slightly different formats: 32-bit DWARF + // and 64-bit DWARF. This is *not* related to the size of registers or + // addresses on the target machine; it refers only to the size of section + // offsets and data lengths appearing in the DWARF data. One only needs + // 64-bit DWARF when the debugging data itself is larger than 4GiB. + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the + // debugging data itself is very large. + // + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each + // compilation unit and call frame information entry begins with an + // "initial length" field, which, in addition to giving the length of the + // data, also indicates the size of section offsets and lengths appearing + // in that data. The ReadInitialLength member function, below, reads an + // initial length and sets the ByteReader's offset size as a side effect. + // Thus, in the normal process of reading DWARF data, the appropriate + // offset size is set automatically. So, you should only need to call + // SetOffsetSize if you are using the same ByteReader to jump from the + // midst of one block of DWARF data into another. + + // Read a DWARF "initial length" field from START, and return it as + // an unsigned 64 bit integer, respecting this ByteReader's + // endianness. Set *LEN to the length of the initial length in + // bytes, either four or twelve. As a side effect, set this + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF + // initial length) or 8 (if we see a 64-bit DWARF initial length). + // + // A DWARF initial length is either: + // + // - a byte count stored as an unsigned 32-bit value less than + // 0xffffff00, indicating that the data whose length is being + // measured uses the 32-bit DWARF format, or + // + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, + // indicating that the data whose length is being measured uses + // the 64-bit DWARF format. + uint64_t ReadInitialLength(const uint8_t* start, size_t* len); + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes + // long. You must call ReadInitialLength or SetOffsetSize before calling + // this function; see the comments above for details. + uint64_t ReadOffset(const uint8_t* buffer) const; + + // Return the current offset size, in bytes. + // A return value of 4 indicates that we are reading 32-bit DWARF. + // A return value of 8 indicates that we are reading 64-bit DWARF. + uint8_t OffsetSize() const { return offset_size_; } + + // Indicate that section offsets and lengths are SIZE bytes long. SIZE + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). + // Usually, you should not call this function yourself; instead, let a + // call to ReadInitialLength establish the data's offset size + // automatically. + void SetOffsetSize(uint8_t size); + + // The Linux C++ ABI uses a variant of DWARF call frame information + // for exception handling. This data is included in the program's + // address space as the ".eh_frame" section, and intepreted at + // runtime to walk the stack, find exception handlers, and run + // cleanup code. The format is mostly the same as DWARF CFI, with + // some adjustments made to provide the additional + // exception-handling data, and to make the data easier to work with + // in memory --- for example, to allow it to be placed in read-only + // memory even when describing position-independent code. + // + // In particular, exception handling data can select a number of + // different encodings for pointers that appear in the data, as + // described by the DwarfPointerEncoding enum. There are actually + // four axes(!) to the encoding: + // + // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use + // the DWARF LEB128 encoding. + // + // - The pointer's signedness: pointers can be signed or unsigned. + // + // - The pointer's base address: the data stored in the exception + // handling data can be the actual address (that is, an absolute + // pointer), or relative to one of a number of different base + // addreses --- including that of the encoded pointer itself, for + // a form of "pc-relative" addressing. + // + // - The pointer may be indirect: it may be the address where the + // true pointer is stored. (This is used to refer to things via + // global offset table entries, program linkage table entries, or + // other tricks used in position-independent code.) + // + // There are also two options that fall outside that matrix + // altogether: the pointer may be omitted, or it may have padding to + // align it on an appropriate address boundary. (That last option + // may seem like it should be just another axis, but it is not.) + + // Indicate that the exception handling data is loaded starting at + // SECTION_BASE, and that the start of its buffer in our own memory + // is BUFFER_BASE. This allows us to find the address that a given + // byte in our buffer would have when loaded into the program the + // data describes. We need this to resolve DW_EH_PE_pcrel pointers. + void SetCFIDataBase(uint64_t section_base, const uint8_t* buffer_base); + + // Indicate that the base address of the program's ".text" section + // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. + void SetTextBase(uint64_t text_base); + + // Indicate that the base address for DW_EH_PE_datarel pointers is + // DATA_BASE. The proper value depends on the ABI; it is usually the + // address of the global offset table, held in a designated register in + // position-independent code. You will need to look at the startup code + // for the target system to be sure. I tried; my eyes bled. + void SetDataBase(uint64_t data_base); + + // Indicate that the base address for the FDE we are processing is + // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel + // pointers. (This encoding does not seem to be used by the GNU + // toolchain.) + void SetFunctionBase(uint64_t function_base); + + // Indicate that we are no longer processing any FDE, so any use of + // a DW_EH_PE_funcrel encoding is an error. + void ClearFunctionBase(); + + // Return true if ENCODING is a valid pointer encoding. + bool ValidEncoding(DwarfPointerEncoding encoding) const; + + // Return true if we have all the information we need to read a + // pointer that uses ENCODING. This checks that the appropriate + // SetFooBase function for ENCODING has been called. + bool UsableEncoding(DwarfPointerEncoding encoding) const; + + // Read an encoded pointer from BUFFER using ENCODING; return the + // absolute address it represents, and set *LEN to the pointer's + // length in bytes, including any padding for aligned pointers. + // + // This function calls 'abort' if ENCODING is invalid or refers to a + // base address this reader hasn't been given, so you should check + // with ValidEncoding and UsableEncoding first if you would rather + // die in a more helpful way. + uint64_t ReadEncodedPointer(const uint8_t* buffer, + DwarfPointerEncoding encoding, + size_t* len) const; + + Endianness GetEndianness() const; + private: + + // Function pointer type for our address and offset readers. + typedef uint64_t (ByteReader::*AddressReader)(const uint8_t*) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + // This function pointer gets set by SetOffsetSize. + AddressReader offset_reader_; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + // This function pointer gets set by SetAddressSize. + AddressReader address_reader_; + + Endianness endian_; + uint8_t address_size_; + uint8_t offset_size_; + + // Base addresses for Linux C++ exception handling data's encoded pointers. + bool have_section_base_, have_text_base_, have_data_base_; + bool have_function_base_; + uint64_t section_base_, text_base_, data_base_, function_base_; + const uint8_t* buffer_base_; +}; + +} // namespace google_breakpad + +#endif // COMMON_DWARF_BYTEREADER_H__ diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.cc b/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.cc new file mode 100644 index 0000000000..1393c3d7d8 --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.cc @@ -0,0 +1,199 @@ +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dwarf2diehandler.cc: Implement the dwarf2reader::DieDispatcher class. +// See dwarf2diehandler.h for details. + +#include <assert.h> +#include <stdint.h> + +#include <string> + +#include "common/dwarf/dwarf2diehandler.h" +#include "common/using_std_string.h" + +namespace google_breakpad { + +DIEDispatcher::~DIEDispatcher() { + while (!die_handlers_.empty()) { + HandlerStack& entry = die_handlers_.top(); + if (entry.handler_ != root_handler_) + delete entry.handler_; + die_handlers_.pop(); + } +} + +bool DIEDispatcher::StartCompilationUnit(uint64_t offset, uint8_t address_size, + uint8_t offset_size, uint64_t cu_length, + uint8_t dwarf_version) { + return root_handler_->StartCompilationUnit(offset, address_size, + offset_size, cu_length, + dwarf_version); +} + +bool DIEDispatcher::StartDIE(uint64_t offset, enum DwarfTag tag) { + // The stack entry for the parent of this DIE, if there is one. + HandlerStack* parent = die_handlers_.empty() ? NULL : &die_handlers_.top(); + + // Does this call indicate that we're done receiving the parent's + // attributes' values? If so, call its EndAttributes member function. + if (parent && parent->handler_ && !parent->reported_attributes_end_) { + parent->reported_attributes_end_ = true; + if (!parent->handler_->EndAttributes()) { + // Finish off this handler now. and edit *PARENT to indicate that + // we don't want to visit any of the children. + parent->handler_->Finish(); + if (parent->handler_ != root_handler_) + delete parent->handler_; + parent->handler_ = NULL; + return false; + } + } + + // Find a handler for this DIE. + DIEHandler* handler; + if (parent) { + if (parent->handler_) + // Ask the parent to find a handler. + handler = parent->handler_->FindChildHandler(offset, tag); + else + // No parent handler means we're not interested in any of our + // children. + handler = NULL; + } else { + // This is the root DIE. For a non-root DIE, the parent's handler + // decides whether to visit it, but the root DIE has no parent + // handler, so we have a special method on the root DIE handler + // itself to decide. + if (root_handler_->StartRootDIE(offset, tag)) + handler = root_handler_; + else + handler = NULL; + } + + // Push a handler stack entry for this new handler. As an + // optimization, we don't push NULL-handler entries on top of other + // NULL-handler entries; we just let the oldest such entry stand for + // the whole subtree. + if (handler || !parent || parent->handler_) { + HandlerStack entry; + entry.offset_ = offset; + entry.handler_ = handler; + entry.reported_attributes_end_ = false; + die_handlers_.push(entry); + } + + return handler != NULL; +} + +void DIEDispatcher::EndDIE(uint64_t offset) { + assert(!die_handlers_.empty()); + HandlerStack* entry = &die_handlers_.top(); + if (entry->handler_) { + // This entry had better be the handler for this DIE. + assert(entry->offset_ == offset); + // If a DIE has no children, this EndDIE call indicates that we're + // done receiving its attributes' values. + if (!entry->reported_attributes_end_) + entry->handler_->EndAttributes(); // Ignore return value: no children. + entry->handler_->Finish(); + if (entry->handler_ != root_handler_) + delete entry->handler_; + } else { + // If this DIE is within a tree we're ignoring, then don't pop the + // handler stack: that entry stands for the whole tree. + if (entry->offset_ != offset) + return; + } + die_handlers_.pop(); +} + +void DIEDispatcher::ProcessAttributeUnsigned(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data) { + HandlerStack& current = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeUnsigned(attr, form, data); +} + +void DIEDispatcher::ProcessAttributeSigned(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64_t data) { + HandlerStack& current = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeSigned(attr, form, data); +} + +void DIEDispatcher::ProcessAttributeReference(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data) { + HandlerStack& current = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeReference(attr, form, data); +} + +void DIEDispatcher::ProcessAttributeBuffer(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const uint8_t* data, + uint64_t len) { + HandlerStack& current = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeBuffer(attr, form, data, len); +} + +void DIEDispatcher::ProcessAttributeString(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string& data) { + HandlerStack& current = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeString(attr, form, data); +} + +void DIEDispatcher::ProcessAttributeSignature(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t signature) { + HandlerStack& current = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeSignature(attr, form, signature); +} + +} // namespace google_breakpad diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.h b/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.h new file mode 100644 index 0000000000..5e568eb85f --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.h @@ -0,0 +1,368 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dwarf2reader::CompilationUnit is a simple and direct parser for +// DWARF data, but its handler interface is not convenient to use. In +// particular: +// +// - CompilationUnit calls Dwarf2Handler's member functions to report +// every attribute's value, regardless of what sort of DIE it is. +// As a result, the ProcessAttributeX functions end up looking like +// this: +// +// switch (parent_die_tag) { +// case DW_TAG_x: +// switch (attribute_name) { +// case DW_AT_y: +// handle attribute y of DIE type x +// ... +// } break; +// ... +// } +// +// In C++ it's much nicer to use virtual function dispatch to find +// the right code for a given case than to switch on the DIE tag +// like this. +// +// - Processing different kinds of DIEs requires different sets of +// data: lexical block DIEs have start and end addresses, but struct +// type DIEs don't. It would be nice to be able to have separate +// handler classes for separate kinds of DIEs, each with the members +// appropriate to its role, instead of having one handler class that +// needs to hold data for every DIE type. +// +// - There should be a separate instance of the appropriate handler +// class for each DIE, instead of a single object with tables +// tracking all the dies in the compilation unit. +// +// - It's not convenient to take some action after all a DIE's +// attributes have been seen, but before visiting any of its +// children. The only indication you have that a DIE's attribute +// list is complete is that you get either a StartDIE or an EndDIE +// call. +// +// - It's not convenient to make use of the tree structure of the +// DIEs. Skipping all the children of a given die requires +// maintaining state and returning false from StartDIE until we get +// an EndDIE call with the appropriate offset. +// +// This interface tries to take care of all that. (You're shocked, I'm sure.) +// +// Using the classes here, you provide an initial handler for the root +// DIE of the compilation unit. Each handler receives its DIE's +// attributes, and provides fresh handler objects for children of +// interest, if any. The three classes are: +// +// - DIEHandler: the base class for your DIE-type-specific handler +// classes. +// +// - RootDIEHandler: derived from DIEHandler, the base class for your +// root DIE handler class. +// +// - DIEDispatcher: derived from Dwarf2Handler, an instance of this +// invokes your DIE-type-specific handler objects. +// +// In detail: +// +// - Define handler classes specialized for the DIE types you're +// interested in. These handler classes must inherit from +// DIEHandler. Thus: +// +// class My_DW_TAG_X_Handler: public DIEHandler { ... }; +// class My_DW_TAG_Y_Handler: public DIEHandler { ... }; +// +// DIEHandler subclasses needn't correspond exactly to single DIE +// types, as shown here; the point is that you can have several +// different classes appropriate to different kinds of DIEs. +// +// - In particular, define a handler class for the compilation +// unit's root DIE, that inherits from RootDIEHandler: +// +// class My_DW_TAG_compile_unit_Handler: public RootDIEHandler { ... }; +// +// RootDIEHandler inherits from DIEHandler, adding a few additional +// member functions for examining the compilation unit as a whole, +// and other quirks of rootness. +// +// - Then, create a DIEDispatcher instance, passing it an instance of +// your root DIE handler class, and use that DIEDispatcher as the +// dwarf2reader::CompilationUnit's handler: +// +// My_DW_TAG_compile_unit_Handler root_die_handler(...); +// DIEDispatcher die_dispatcher(&root_die_handler); +// CompilationUnit reader(sections, offset, bytereader, &die_dispatcher); +// +// Here, 'die_dispatcher' acts as a shim between 'reader' and the +// various DIE-specific handlers you have defined. +// +// - When you call reader.Start(), die_dispatcher behaves as follows, +// starting with your root die handler and the compilation unit's +// root DIE: +// +// - It calls the handler's ProcessAttributeX member functions for +// each of the DIE's attributes. +// +// - It calls the handler's EndAttributes member function. This +// should return true if any of the DIE's children should be +// visited, in which case: +// +// - For each of the DIE's children, die_dispatcher calls the +// DIE's handler's FindChildHandler member function. If that +// returns a pointer to a DIEHandler instance, then +// die_dispatcher uses that handler to process the child, using +// this procedure recursively. Alternatively, if +// FindChildHandler returns NULL, die_dispatcher ignores that +// child and its descendants. +// +// - When die_dispatcher has finished processing all the DIE's +// children, it invokes the handler's Finish() member function, +// and destroys the handler. (As a special case, it doesn't +// destroy the root DIE handler.) +// +// This allows the code for handling a particular kind of DIE to be +// gathered together in a single class, makes it easy to skip all the +// children or individual children of a particular DIE, and provides +// appropriate parental context for each die. + +#ifndef COMMON_DWARF_DWARF2DIEHANDLER_H__ +#define COMMON_DWARF_DWARF2DIEHANDLER_H__ + +#include <stdint.h> + +#include <stack> +#include <string> + +#include "common/dwarf/types.h" +#include "common/dwarf/dwarf2enums.h" +#include "common/dwarf/dwarf2reader.h" +#include "common/using_std_string.h" + +namespace google_breakpad { + +// A base class for handlers for specific DIE types. The series of +// calls made on a DIE handler is as follows: +// +// - for each attribute of the DIE: +// - ProcessAttributeX() +// - EndAttributes() +// - if that returned true, then for each child: +// - FindChildHandler() +// - if that returns a non-NULL pointer to a new handler: +// - recurse, with the new handler and the child die +// - Finish() +// - destruction +class DIEHandler { + public: + DIEHandler() { } + virtual ~DIEHandler() { } + + // When we visit a DIE, we first use these member functions to + // report the DIE's attributes and their values. These have the + // same restrictions as the corresponding member functions of + // dwarf2reader::Dwarf2Handler. + // + // Since DWARF does not specify in what order attributes must + // appear, avoid making decisions in these functions that would be + // affected by the presence of other attributes. The EndAttributes + // function is a more appropriate place for such work, as all the + // DIE's attributes have been seen at that point. + // + // The default definitions ignore the values they are passed. + virtual void ProcessAttributeUnsigned(enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data) { } + virtual void ProcessAttributeSigned(enum DwarfAttribute attr, + enum DwarfForm form, + int64_t data) { } + virtual void ProcessAttributeReference(enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data) { } + virtual void ProcessAttributeBuffer(enum DwarfAttribute attr, + enum DwarfForm form, + const uint8_t* data, + uint64_t len) { } + virtual void ProcessAttributeString(enum DwarfAttribute attr, + enum DwarfForm form, + const string& data) { } + virtual void ProcessAttributeSignature(enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t signture) { } + + // Once we have reported all the DIE's attributes' values, we call + // this member function. If it returns false, we skip all the DIE's + // children. If it returns true, we call FindChildHandler on each + // child. If that returns a handler object, we use that to visit + // the child; otherwise, we skip the child. + // + // This is a good place to make decisions that depend on more than + // one attribute. DWARF does not specify in what order attributes + // must appear, so only when the EndAttributes function is called + // does the handler have a complete picture of the DIE's attributes. + // + // The default definition elects to ignore the DIE's children. + // You'll need to override this if you override FindChildHandler, + // but at least the default behavior isn't to pass the children to + // FindChildHandler, which then ignores them all. + virtual bool EndAttributes() { return false; } + + // If EndAttributes returns true to indicate that some of the DIE's + // children might be of interest, then we apply this function to + // each of the DIE's children. If it returns a handler object, then + // we use that to visit the child DIE. If it returns NULL, we skip + // that child DIE (and all its descendants). + // + // OFFSET is the offset of the child; TAG indicates what kind of DIE + // it is. + // + // The default definition skips all children. + virtual DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag) { + return NULL; + } + + // When we are done processing a DIE, we call this member function. + // This happens after the EndAttributes call, all FindChildHandler + // calls (if any), and all operations on the children themselves (if + // any). We call Finish on every handler --- even if EndAttributes + // returns false. + virtual void Finish() { }; +}; + +// A subclass of DIEHandler, with additional kludges for handling the +// compilation unit's root die. +class RootDIEHandler : public DIEHandler { + public: + bool handle_inline; + + explicit RootDIEHandler(bool handle_inline = false) + : handle_inline(handle_inline) {} + virtual ~RootDIEHandler() {} + + // We pass the values reported via Dwarf2Handler::StartCompilationUnit + // to this member function, and skip the entire compilation unit if it + // returns false. So the root DIE handler is actually also + // responsible for handling the compilation unit metadata. + // The default definition always visits the compilation unit. + virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size, + uint8_t offset_size, uint64_t cu_length, + uint8_t dwarf_version) { return true; } + + // For the root DIE handler only, we pass the offset, tag and + // attributes of the compilation unit's root DIE. This is the only + // way the root DIE handler can find the root DIE's tag. If this + // function returns true, we will visit the root DIE using the usual + // DIEHandler methods; otherwise, we skip the entire compilation + // unit. + // + // The default definition elects to visit the root DIE. + virtual bool StartRootDIE(uint64_t offset, enum DwarfTag tag) { return true; } +}; + +class DIEDispatcher: public Dwarf2Handler { + public: + // Create a Dwarf2Handler which uses ROOT_HANDLER as the handler for + // the compilation unit's root die, as described for the DIEHandler + // class. + DIEDispatcher(RootDIEHandler* root_handler) : root_handler_(root_handler) { } + // Destroying a DIEDispatcher destroys all active handler objects + // except the root handler. + ~DIEDispatcher(); + bool StartCompilationUnit(uint64_t offset, uint8_t address_size, + uint8_t offset_size, uint64_t cu_length, + uint8_t dwarf_version); + bool StartDIE(uint64_t offset, enum DwarfTag tag); + void ProcessAttributeUnsigned(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data); + void ProcessAttributeSigned(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64_t data); + void ProcessAttributeReference(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data); + void ProcessAttributeBuffer(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const uint8_t* data, + uint64_t len); + void ProcessAttributeString(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string& data); + void ProcessAttributeSignature(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t signature); + void EndDIE(uint64_t offset); + + private: + + // The type of a handler stack entry. This includes some fields + // which don't really need to be on the stack --- they could just be + // single data members of DIEDispatcher --- but putting them here + // makes it easier to see that the code is correct. + struct HandlerStack { + // The offset of the DIE for this handler stack entry. + uint64_t offset_; + + // The handler object interested in this DIE's attributes and + // children. If NULL, we're not interested in either. + DIEHandler* handler_; + + // Have we reported the end of this DIE's attributes to the handler? + bool reported_attributes_end_; + }; + + // Stack of DIE attribute handlers. At StartDIE(D), the top of the + // stack is the handler of D's parent, whom we may ask for a handler + // for D itself. At EndDIE(D), the top of the stack is D's handler. + // Special cases: + // + // - Before we've seen the compilation unit's root DIE, the stack is + // empty; we'll call root_handler_'s special member functions, and + // perhaps push root_handler_ on the stack to look at the root's + // immediate children. + // + // - When we decide to ignore a subtree, we only push an entry on + // the stack for the root of the tree being ignored, rather than + // pushing lots of stack entries with handler_ set to NULL. + std::stack<HandlerStack> die_handlers_; + + // The root handler. We don't push it on die_handlers_ until we + // actually get the StartDIE call for the root. + RootDIEHandler* root_handler_; +}; + +} // namespace google_breakpad +#endif // COMMON_DWARF_DWARF2DIEHANDLER_H__ diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2enums.h b/contrib/libs/breakpad/src/common/dwarf/dwarf2enums.h new file mode 100644 index 0000000000..7d84f35e23 --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2enums.h @@ -0,0 +1,744 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_DWARF_DWARF2ENUMS_H__ +#define COMMON_DWARF_DWARF2ENUMS_H__ + +namespace google_breakpad { + +// These enums do not follow the google3 style only because they are +// known universally (specs, other implementations) by the names in +// exactly this capitalization. +// Tag names and codes. +enum DwarfTag { + DW_TAG_padding = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_param = 0x30, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + // DWARF 3. + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + // DWARF 4. + DW_TAG_type_unit = 0x41, + // DWARF 5. + DW_TAG_skeleton_unit = 0x4a, + // SGI/MIPS Extensions. + DW_TAG_MIPS_loop = 0x4081, + // HP extensions. See: + // ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz + DW_TAG_HP_array_descriptor = 0x4090, + // GNU extensions. + DW_TAG_format_label = 0x4101, // For FORTRAN 77 and Fortran 90. + DW_TAG_function_template = 0x4102, // For C++. + DW_TAG_class_template = 0x4103, // For C++. + DW_TAG_GNU_BINCL = 0x4104, + DW_TAG_GNU_EINCL = 0x4105, + // Extensions for UPC. See: http://upc.gwu.edu/~upc. + DW_TAG_upc_shared_type = 0x8765, + DW_TAG_upc_strict_type = 0x8766, + DW_TAG_upc_relaxed_type = 0x8767, + // PGI (STMicroelectronics) extensions. No documentation available. + DW_TAG_PGI_kanji_type = 0xA000, + DW_TAG_PGI_interface_block = 0xA020 +}; + +enum DwarfUnitHeader { + DW_UT_compile = 0x01, + DW_UT_type = 0x02, + DW_UT_partial = 0x03, + DW_UT_skeleton = 0x04, + DW_UT_split_compile = 0x05, + DW_UT_split_type = 0x06, + DW_UT_lo_user = 0x80, + DW_UT_hi_user = 0xFF +}; + +enum DwarfHasChild { + DW_children_no = 0, + DW_children_yes = 1 +}; + +// Form names and codes. +enum DwarfForm { + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + + // Added in DWARF 4: + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + + // Added in DWARF 5: + DW_FORM_strx = 0x1a, + DW_FORM_addrx = 0x1b, + DW_FORM_ref_sup4 = 0x1c, + DW_FORM_strp_sup = 0x1d, + DW_FORM_data16 = 0x1e, + DW_FORM_line_strp = 0x1f, + + // DWARF 4, but value out of order. + DW_FORM_ref_sig8 = 0x20, + + // Added in DWARF 5: + DW_FORM_implicit_const = 0x21, + DW_FORM_loclistx = 0x22, + DW_FORM_rnglistx = 0x23, + DW_FORM_ref_sup8 = 0x24, + DW_FORM_strx1 = 0x25, + DW_FORM_strx2 = 0x26, + DW_FORM_strx3 = 0x27, + DW_FORM_strx4 = 0x28, + + DW_FORM_addrx1 = 0x29, + DW_FORM_addrx2 = 0x2a, + DW_FORM_addrx3 = 0x2b, + DW_FORM_addrx4 = 0x2c, + + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_FORM_GNU_addr_index = 0x1f01, + DW_FORM_GNU_str_index = 0x1f02 +}; + +// Attribute names and codes +enum DwarfAttribute { + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_items = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + // DWARF 3 values. + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + // DWARF 4 + DW_AT_linkage_name = 0x6e, + // DWARF 5 + DW_AT_str_offsets_base = 0x72, + DW_AT_addr_base = 0x73, + DW_AT_rnglists_base = 0x74, + DW_AT_dwo_name = 0x76, + // SGI/MIPS extensions. + DW_AT_MIPS_fde = 0x2001, + DW_AT_MIPS_loop_begin = 0x2002, + DW_AT_MIPS_tail_loop_begin = 0x2003, + DW_AT_MIPS_epilog_begin = 0x2004, + DW_AT_MIPS_loop_unroll_factor = 0x2005, + DW_AT_MIPS_software_pipeline_depth = 0x2006, + DW_AT_MIPS_linkage_name = 0x2007, + DW_AT_MIPS_stride = 0x2008, + DW_AT_MIPS_abstract_name = 0x2009, + DW_AT_MIPS_clone_origin = 0x200a, + DW_AT_MIPS_has_inlines = 0x200b, + // HP extensions. + DW_AT_HP_block_index = 0x2000, + DW_AT_HP_unmodifiable = 0x2001, // Same as DW_AT_MIPS_fde. + DW_AT_HP_actuals_stmt_list = 0x2010, + DW_AT_HP_proc_per_section = 0x2011, + DW_AT_HP_raw_data_ptr = 0x2012, + DW_AT_HP_pass_by_reference = 0x2013, + DW_AT_HP_opt_level = 0x2014, + DW_AT_HP_prof_version_id = 0x2015, + DW_AT_HP_opt_flags = 0x2016, + DW_AT_HP_cold_region_low_pc = 0x2017, + DW_AT_HP_cold_region_high_pc = 0x2018, + DW_AT_HP_all_variables_modifiable = 0x2019, + DW_AT_HP_linkage_name = 0x201a, + DW_AT_HP_prof_flags = 0x201b, // In comp unit of procs_info for -g. + // GNU extensions. + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_AT_GNU_dwo_name = 0x2130, + DW_AT_GNU_dwo_id = 0x2131, + DW_AT_GNU_ranges_base = 0x2132, + DW_AT_GNU_addr_base = 0x2133, + DW_AT_GNU_pubnames = 0x2134, + DW_AT_GNU_pubtypes = 0x2135, + // VMS extensions. + DW_AT_VMS_rtnbeg_pd_address = 0x2201, + // UPC extension. + DW_AT_upc_threads_scaled = 0x3210, + // PGI (STMicroelectronics) extensions. + DW_AT_PGI_lbase = 0x3a00, + DW_AT_PGI_soffset = 0x3a01, + DW_AT_PGI_lstride = 0x3a02 +}; + +// .debug_rngslist entry types +enum DwarfRngListEntry { + DW_RLE_end_of_list = 0, + DW_RLE_base_addressx = 1, + DW_RLE_startx_endx = 2, + DW_RLE_startx_length = 3, + DW_RLE_offset_pair = 4, + DW_RLE_base_address = 5, + DW_RLE_start_end = 6, + DW_RLE_start_length = 7, +}; + +// Line number content type codes (DWARF 5). +enum DwarfLineNumberContentType { + DW_LNCT_path = 1, + DW_LNCT_directory_index = 2, + DW_LNCT_timestamp = 3, + DW_LNCT_size = 4, + DW_LNCT_MD5 = 5, +}; + +// Line number opcodes. +enum DwarfLineNumberOps { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3, + DW_LNS_set_file = 4, + DW_LNS_set_column = 5, + DW_LNS_negate_stmt = 6, + DW_LNS_set_basic_block = 7, + DW_LNS_const_add_pc = 8, + DW_LNS_fixed_advance_pc = 9, + // DWARF 3. + DW_LNS_set_prologue_end = 10, + DW_LNS_set_epilogue_begin = 11, + DW_LNS_set_isa = 12 +}; + +// Line number extended opcodes. +enum DwarfLineNumberExtendedOps { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2, + DW_LNE_define_file = 3, + // HP extensions. + DW_LNE_HP_negate_is_UV_update = 0x11, + DW_LNE_HP_push_context = 0x12, + DW_LNE_HP_pop_context = 0x13, + DW_LNE_HP_set_file_line_column = 0x14, + DW_LNE_HP_set_routine_name = 0x15, + DW_LNE_HP_set_sequence = 0x16, + DW_LNE_HP_negate_post_semantics = 0x17, + DW_LNE_HP_negate_function_exit = 0x18, + DW_LNE_HP_negate_front_end_logical = 0x19, + DW_LNE_HP_define_proc = 0x20 +}; + +// Type encoding names and codes +enum DwarfEncoding { + DW_ATE_address =0x1, + DW_ATE_boolean =0x2, + DW_ATE_complex_float =0x3, + DW_ATE_float =0x4, + DW_ATE_signed =0x5, + DW_ATE_signed_char =0x6, + DW_ATE_unsigned =0x7, + DW_ATE_unsigned_char =0x8, + // DWARF3/DWARF3f + DW_ATE_imaginary_float =0x9, + DW_ATE_packed_decimal =0xa, + DW_ATE_numeric_string =0xb, + DW_ATE_edited =0xc, + DW_ATE_signed_fixed =0xd, + DW_ATE_unsigned_fixed =0xe, + DW_ATE_decimal_float =0xf, + DW_ATE_lo_user =0x80, + DW_ATE_hi_user =0xff +}; + +// Location virtual machine opcodes +enum DwarfOpcode { + DW_OP_addr =0x03, + DW_OP_deref =0x06, + DW_OP_const1u =0x08, + DW_OP_const1s =0x09, + DW_OP_const2u =0x0a, + DW_OP_const2s =0x0b, + DW_OP_const4u =0x0c, + DW_OP_const4s =0x0d, + DW_OP_const8u =0x0e, + DW_OP_const8s =0x0f, + DW_OP_constu =0x10, + DW_OP_consts =0x11, + DW_OP_dup =0x12, + DW_OP_drop =0x13, + DW_OP_over =0x14, + DW_OP_pick =0x15, + DW_OP_swap =0x16, + DW_OP_rot =0x17, + DW_OP_xderef =0x18, + DW_OP_abs =0x19, + DW_OP_and =0x1a, + DW_OP_div =0x1b, + DW_OP_minus =0x1c, + DW_OP_mod =0x1d, + DW_OP_mul =0x1e, + DW_OP_neg =0x1f, + DW_OP_not =0x20, + DW_OP_or =0x21, + DW_OP_plus =0x22, + DW_OP_plus_uconst =0x23, + DW_OP_shl =0x24, + DW_OP_shr =0x25, + DW_OP_shra =0x26, + DW_OP_xor =0x27, + DW_OP_bra =0x28, + DW_OP_eq =0x29, + DW_OP_ge =0x2a, + DW_OP_gt =0x2b, + DW_OP_le =0x2c, + DW_OP_lt =0x2d, + DW_OP_ne =0x2e, + DW_OP_skip =0x2f, + DW_OP_lit0 =0x30, + DW_OP_lit1 =0x31, + DW_OP_lit2 =0x32, + DW_OP_lit3 =0x33, + DW_OP_lit4 =0x34, + DW_OP_lit5 =0x35, + DW_OP_lit6 =0x36, + DW_OP_lit7 =0x37, + DW_OP_lit8 =0x38, + DW_OP_lit9 =0x39, + DW_OP_lit10 =0x3a, + DW_OP_lit11 =0x3b, + DW_OP_lit12 =0x3c, + DW_OP_lit13 =0x3d, + DW_OP_lit14 =0x3e, + DW_OP_lit15 =0x3f, + DW_OP_lit16 =0x40, + DW_OP_lit17 =0x41, + DW_OP_lit18 =0x42, + DW_OP_lit19 =0x43, + DW_OP_lit20 =0x44, + DW_OP_lit21 =0x45, + DW_OP_lit22 =0x46, + DW_OP_lit23 =0x47, + DW_OP_lit24 =0x48, + DW_OP_lit25 =0x49, + DW_OP_lit26 =0x4a, + DW_OP_lit27 =0x4b, + DW_OP_lit28 =0x4c, + DW_OP_lit29 =0x4d, + DW_OP_lit30 =0x4e, + DW_OP_lit31 =0x4f, + DW_OP_reg0 =0x50, + DW_OP_reg1 =0x51, + DW_OP_reg2 =0x52, + DW_OP_reg3 =0x53, + DW_OP_reg4 =0x54, + DW_OP_reg5 =0x55, + DW_OP_reg6 =0x56, + DW_OP_reg7 =0x57, + DW_OP_reg8 =0x58, + DW_OP_reg9 =0x59, + DW_OP_reg10 =0x5a, + DW_OP_reg11 =0x5b, + DW_OP_reg12 =0x5c, + DW_OP_reg13 =0x5d, + DW_OP_reg14 =0x5e, + DW_OP_reg15 =0x5f, + DW_OP_reg16 =0x60, + DW_OP_reg17 =0x61, + DW_OP_reg18 =0x62, + DW_OP_reg19 =0x63, + DW_OP_reg20 =0x64, + DW_OP_reg21 =0x65, + DW_OP_reg22 =0x66, + DW_OP_reg23 =0x67, + DW_OP_reg24 =0x68, + DW_OP_reg25 =0x69, + DW_OP_reg26 =0x6a, + DW_OP_reg27 =0x6b, + DW_OP_reg28 =0x6c, + DW_OP_reg29 =0x6d, + DW_OP_reg30 =0x6e, + DW_OP_reg31 =0x6f, + DW_OP_breg0 =0x70, + DW_OP_breg1 =0x71, + DW_OP_breg2 =0x72, + DW_OP_breg3 =0x73, + DW_OP_breg4 =0x74, + DW_OP_breg5 =0x75, + DW_OP_breg6 =0x76, + DW_OP_breg7 =0x77, + DW_OP_breg8 =0x78, + DW_OP_breg9 =0x79, + DW_OP_breg10 =0x7a, + DW_OP_breg11 =0x7b, + DW_OP_breg12 =0x7c, + DW_OP_breg13 =0x7d, + DW_OP_breg14 =0x7e, + DW_OP_breg15 =0x7f, + DW_OP_breg16 =0x80, + DW_OP_breg17 =0x81, + DW_OP_breg18 =0x82, + DW_OP_breg19 =0x83, + DW_OP_breg20 =0x84, + DW_OP_breg21 =0x85, + DW_OP_breg22 =0x86, + DW_OP_breg23 =0x87, + DW_OP_breg24 =0x88, + DW_OP_breg25 =0x89, + DW_OP_breg26 =0x8a, + DW_OP_breg27 =0x8b, + DW_OP_breg28 =0x8c, + DW_OP_breg29 =0x8d, + DW_OP_breg30 =0x8e, + DW_OP_breg31 =0x8f, + DW_OP_regX =0x90, + DW_OP_fbreg =0x91, + DW_OP_bregX =0x92, + DW_OP_piece =0x93, + DW_OP_deref_size =0x94, + DW_OP_xderef_size =0x95, + DW_OP_nop =0x96, + // DWARF3/DWARF3f + DW_OP_push_object_address =0x97, + DW_OP_call2 =0x98, + DW_OP_call4 =0x99, + DW_OP_call_ref =0x9a, + DW_OP_form_tls_address =0x9b, + DW_OP_call_frame_cfa =0x9c, + DW_OP_bit_piece =0x9d, + DW_OP_lo_user =0xe0, + DW_OP_hi_user =0xff, + // GNU extensions + DW_OP_GNU_push_tls_address =0xe0, + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_OP_GNU_addr_index =0xfb, + DW_OP_GNU_const_index =0xfc +}; + +// Section identifiers for DWP files +enum DwarfSectionId { + DW_SECT_INFO = 1, + DW_SECT_TYPES = 2, + DW_SECT_ABBREV = 3, + DW_SECT_LINE = 4, + DW_SECT_LOC = 5, + DW_SECT_STR_OFFSETS = 6, + DW_SECT_MACINFO = 7, + DW_SECT_MACRO = 8 +}; + +// Source languages. These are values for DW_AT_language. +enum DwarfLanguage + { + DW_LANG_none =0x0000, + DW_LANG_C89 =0x0001, + DW_LANG_C =0x0002, + DW_LANG_Ada83 =0x0003, + DW_LANG_C_plus_plus =0x0004, + DW_LANG_Cobol74 =0x0005, + DW_LANG_Cobol85 =0x0006, + DW_LANG_Fortran77 =0x0007, + DW_LANG_Fortran90 =0x0008, + DW_LANG_Pascal83 =0x0009, + DW_LANG_Modula2 =0x000a, + DW_LANG_Java =0x000b, + DW_LANG_C99 =0x000c, + DW_LANG_Ada95 =0x000d, + DW_LANG_Fortran95 =0x000e, + DW_LANG_PLI =0x000f, + DW_LANG_ObjC =0x0010, + DW_LANG_ObjC_plus_plus =0x0011, + DW_LANG_UPC =0x0012, + DW_LANG_D =0x0013, + DW_LANG_Rust =0x001c, + DW_LANG_Swift =0x001e, + // Implementation-defined language code range. + DW_LANG_lo_user = 0x8000, + DW_LANG_hi_user = 0xffff, + + // Extensions. + + // MIPS assembly language. The GNU toolchain uses this for all + // assembly languages, since there's no generic DW_LANG_ value for that. + // See include/dwarf2.h in the binutils, gdb, or gcc source trees. + DW_LANG_Mips_Assembler =0x8001, + DW_LANG_Upc =0x8765 // Unified Parallel C + }; + +// Inline codes. These are values for DW_AT_inline. +enum DwarfInline { + DW_INL_not_inlined =0x0, + DW_INL_inlined =0x1, + DW_INL_declared_not_inlined =0x2, + DW_INL_declared_inlined =0x3 +}; + +// Call Frame Info instructions. +enum DwarfCFI + { + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80, + DW_CFA_restore = 0xc0, + DW_CFA_nop = 0x00, + DW_CFA_set_loc = 0x01, + DW_CFA_advance_loc1 = 0x02, + DW_CFA_advance_loc2 = 0x03, + DW_CFA_advance_loc4 = 0x04, + DW_CFA_offset_extended = 0x05, + DW_CFA_restore_extended = 0x06, + DW_CFA_undefined = 0x07, + DW_CFA_same_value = 0x08, + DW_CFA_register = 0x09, + DW_CFA_remember_state = 0x0a, + DW_CFA_restore_state = 0x0b, + DW_CFA_def_cfa = 0x0c, + DW_CFA_def_cfa_register = 0x0d, + DW_CFA_def_cfa_offset = 0x0e, + DW_CFA_def_cfa_expression = 0x0f, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + + // Opcodes in this range are reserved for user extensions. + DW_CFA_lo_user = 0x1c, + DW_CFA_hi_user = 0x3f, + + // SGI/MIPS specific. + DW_CFA_MIPS_advance_loc8 = 0x1d, + + // GNU extensions. + DW_CFA_GNU_window_save = 0x2d, + DW_CFA_GNU_args_size = 0x2e, + DW_CFA_GNU_negative_offset_extended = 0x2f + }; + +// Exception handling 'z' augmentation letters. +enum DwarfZAugmentationCodes { + // If the CFI augmentation string begins with 'z', then the CIE and FDE + // have an augmentation data area just before the instructions, whose + // contents are determined by the subsequent augmentation letters. + DW_Z_augmentation_start = 'z', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, and the FDE + // augmentation data includes a language-specific data area pointer, + // represented using that encoding. + DW_Z_has_LSDA = 'L', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, followed by a pointer + // to a personality routine, represented using that encoding. + DW_Z_has_personality_routine = 'P', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding describing how the FDE's + // initial location, address range, and DW_CFA_set_loc operands are + // encoded. + DW_Z_has_FDE_address_encoding = 'R', + + // If this letter is present in a 'z' augmentation string, then code + // addresses covered by FDEs that cite this CIE are signal delivery + // trampolines. Return addresses of frames in trampolines should not be + // adjusted as described in section 6.4.4 of the DWARF 3 spec. + DW_Z_is_signal_trampoline = 'S' +}; + +// Exception handling frame description pointer formats, as described +// by the Linux Standard Base Core Specification 4.0, section 11.5, +// DWARF Extensions. +enum DwarfPointerEncoding + { + DW_EH_PE_absptr = 0x00, + DW_EH_PE_omit = 0xff, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + + // The GNU toolchain sources define this enum value as well, + // simply to help classify the lower nybble values into signed and + // unsigned groups. + DW_EH_PE_signed = 0x08, + + // This is not documented in LSB 4.0, but it is used in both the + // Linux and OS X toolchains. It can be added to any other + // encoding (except DW_EH_PE_aligned), and indicates that the + // encoded value represents the address at which the true address + // is stored, not the true address itself. + DW_EH_PE_indirect = 0x80 + }; + +} // namespace google_breakpad +#endif // COMMON_DWARF_DWARF2ENUMS_H__ diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.cc b/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.cc new file mode 100644 index 0000000000..6bd2614b42 --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.cc @@ -0,0 +1,3435 @@ +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Implementation of LineInfo, CompilationUnit, +// and CallFrameInfo. See dwarf2reader.h for details. + +#include "common/dwarf/dwarf2reader.h" + +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include <algorithm> +#include <map> +#include <memory> +#include <stack> +#include <string> +#include <utility> + +#include <sys/stat.h> + +#include "common/dwarf/bytereader-inl.h" +#include "common/dwarf/bytereader.h" +#include "common/dwarf/line_state_machine.h" +#include "common/using_std_string.h" +#include "google_breakpad/common/breakpad_types.h" + +namespace google_breakpad { + +const SectionMap::const_iterator GetSectionByName(const SectionMap& + sections, const char *name) { + assert(name[0] == '.'); + auto iter = sections.find(name); + if (iter != sections.end()) + return iter; + std::string macho_name("__"); + macho_name += name + 1; + iter = sections.find(macho_name); + return iter; +} + +CompilationUnit::CompilationUnit(const string& path, + const SectionMap& sections, uint64_t offset, + ByteReader* reader, Dwarf2Handler* handler) + : path_(path), offset_from_section_start_(offset), reader_(reader), + sections_(sections), handler_(handler), abbrevs_(), + string_buffer_(NULL), string_buffer_length_(0), + line_string_buffer_(NULL), line_string_buffer_length_(0), + str_offsets_buffer_(NULL), str_offsets_buffer_length_(0), + addr_buffer_(NULL), addr_buffer_length_(0), + is_split_dwarf_(false), is_type_unit_(false), dwo_id_(0), dwo_name_(), + skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0), + str_offsets_base_(0), have_checked_for_dwp_(false), dwp_path_(), + dwp_byte_reader_(), dwp_reader_() {} + +// Initialize a compilation unit from a .dwo or .dwp file. +// In this case, we need the .debug_addr section from the +// executable file that contains the corresponding skeleton +// compilation unit. We also inherit the Dwarf2Handler from +// the executable file, and call it as if we were still +// processing the original compilation unit. + +void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer, + uint64_t addr_buffer_length, + uint64_t addr_base, + uint64_t ranges_base, + uint64_t dwo_id) { + is_split_dwarf_ = true; + addr_buffer_ = addr_buffer; + addr_buffer_length_ = addr_buffer_length; + addr_base_ = addr_base; + ranges_base_ = ranges_base; + skeleton_dwo_id_ = dwo_id; +} + +// Read a DWARF2/3 abbreviation section. +// Each abbrev consists of a abbreviation number, a tag, a byte +// specifying whether the tag has children, and a list of +// attribute/form pairs. +// The list of forms is terminated by a 0 for the attribute, and a +// zero for the form. The entire abbreviation section is terminated +// by a zero for the code. + +void CompilationUnit::ReadAbbrevs() { + if (abbrevs_) + return; + + // First get the debug_abbrev section. + SectionMap::const_iterator iter = + GetSectionByName(sections_, ".debug_abbrev"); + assert(iter != sections_.end()); + + abbrevs_ = new std::vector<Abbrev>; + abbrevs_->resize(1); + + // The only way to check whether we are reading over the end of the + // buffer would be to first compute the size of the leb128 data by + // reading it, then go back and read it again. + const uint8_t* abbrev_start = iter->second.first + + header_.abbrev_offset; + const uint8_t* abbrevptr = abbrev_start; +#ifndef NDEBUG + const uint64_t abbrev_length = iter->second.second - header_.abbrev_offset; +#endif + + uint64_t highest_number = 0; + + while (1) { + CompilationUnit::Abbrev abbrev; + size_t len; + const uint64_t number = reader_->ReadUnsignedLEB128(abbrevptr, &len); + highest_number = std::max(highest_number, number); + + if (number == 0) + break; + abbrev.number = number; + abbrevptr += len; + + assert(abbrevptr < abbrev_start + abbrev_length); + const uint64_t tag = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + abbrev.tag = static_cast<enum DwarfTag>(tag); + + assert(abbrevptr < abbrev_start + abbrev_length); + abbrev.has_children = reader_->ReadOneByte(abbrevptr); + abbrevptr += 1; + + assert(abbrevptr < abbrev_start + abbrev_length); + + while (1) { + const uint64_t nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + + assert(abbrevptr < abbrev_start + abbrev_length); + const uint64_t formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + if (nametemp == 0 && formtemp == 0) + break; + + uint64_t value = 0; + if (formtemp == DW_FORM_implicit_const) { + value = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + } + AttrForm abbrev_attr(static_cast<enum DwarfAttribute>(nametemp), + static_cast<enum DwarfForm>(formtemp), + value); + abbrev.attributes.push_back(abbrev_attr); + } + abbrevs_->push_back(abbrev); + } + + // Account of cases where entries are out of order. + std::sort(abbrevs_->begin(), abbrevs_->end(), + [](const CompilationUnit::Abbrev& lhs, const CompilationUnit::Abbrev& rhs) { + return lhs.number < rhs.number; + }); + + // Ensure that there are no missing sections. + assert(abbrevs_->size() == highest_number + 1); +} + +// Skips a single DIE's attributes. +const uint8_t* CompilationUnit::SkipDIE(const uint8_t* start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = SkipAttribute(start, i->form_); + } + return start; +} + +// Skips a single attribute form's data. +const uint8_t* CompilationUnit::SkipAttribute(const uint8_t* start, + enum DwarfForm form) { + size_t len; + + switch (form) { + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return SkipAttribute(start, form); + + case DW_FORM_flag_present: + case DW_FORM_implicit_const: + return start; + case DW_FORM_addrx1: + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + case DW_FORM_strx1: + return start + 1; + case DW_FORM_addrx2: + case DW_FORM_ref2: + case DW_FORM_data2: + case DW_FORM_strx2: + return start + 2; + case DW_FORM_addrx3: + case DW_FORM_strx3: + return start + 3; + case DW_FORM_addrx4: + case DW_FORM_ref4: + case DW_FORM_data4: + case DW_FORM_strx4: + case DW_FORM_ref_sup4: + return start + 4; + case DW_FORM_ref8: + case DW_FORM_data8: + case DW_FORM_ref_sig8: + case DW_FORM_ref_sup8: + return start + 8; + case DW_FORM_data16: + return start + 16; + case DW_FORM_string: + return start + strlen(reinterpret_cast<const char*>(start)) + 1; + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_strx: + case DW_FORM_GNU_str_index: + case DW_FORM_GNU_addr_index: + case DW_FORM_addrx: + case DW_FORM_rnglistx: + case DW_FORM_loclistx: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + + case DW_FORM_sdata: + reader_->ReadSignedLEB128(start, &len); + return start + len; + case DW_FORM_addr: + return start + reader_->AddressSize(); + case DW_FORM_ref_addr: + // DWARF2 and 3/4 differ on whether ref_addr is address size or + // offset size. + assert(header_.version >= 2); + if (header_.version == 2) { + return start + reader_->AddressSize(); + } else if (header_.version >= 3) { + return start + reader_->OffsetSize(); + } + break; + + case DW_FORM_block1: + return start + 1 + reader_->ReadOneByte(start); + case DW_FORM_block2: + return start + 2 + reader_->ReadTwoBytes(start); + case DW_FORM_block4: + return start + 4 + reader_->ReadFourBytes(start); + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64_t size = reader_->ReadUnsignedLEB128(start, &len); + return start + size + len; + } + case DW_FORM_strp: + case DW_FORM_line_strp: + case DW_FORM_strp_sup: + case DW_FORM_sec_offset: + return start + reader_->OffsetSize(); + } + fprintf(stderr,"Unhandled form type"); + return NULL; +} + +// Read the abbreviation offset from a compilation unit header. +size_t CompilationUnit::ReadAbbrevOffset(const uint8_t* headerptr) { + assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_); + header_.abbrev_offset = reader_->ReadOffset(headerptr); + return reader_->OffsetSize(); +} + +// Read the address size from a compilation unit header. +size_t CompilationUnit::ReadAddressSize(const uint8_t* headerptr) { + // Compare against less than or equal because this may be the last + // section in the file. + assert(headerptr + 1 <= buffer_ + buffer_length_); + header_.address_size = reader_->ReadOneByte(headerptr); + reader_->SetAddressSize(header_.address_size); + return 1; +} + +// Read the DWO id from a split or skeleton compilation unit header. +size_t CompilationUnit::ReadDwoId(const uint8_t* headerptr) { + assert(headerptr + 8 <= buffer_ + buffer_length_); + dwo_id_ = reader_->ReadEightBytes(headerptr); + return 8; +} + +// Read the type signature from a type or split type compilation unit header. +size_t CompilationUnit::ReadTypeSignature(const uint8_t* headerptr) { + assert(headerptr + 8 <= buffer_ + buffer_length_); + type_signature_ = reader_->ReadEightBytes(headerptr); + return 8; +} + +// Read the DWO id from a split or skeleton compilation unit header. +size_t CompilationUnit::ReadTypeOffset(const uint8_t* headerptr) { + assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_); + type_offset_ = reader_->ReadOffset(headerptr); + return reader_->OffsetSize(); +} + + +// Read a DWARF header. The header is variable length in DWARF3 and DWARF4 +// (and DWARF2 as extended by most compilers), and consists of an length +// field, a version number, the offset in the .debug_abbrev section for our +// abbrevs, and an address size. DWARF5 adds a unit_type to distinguish +// between partial-, full-, skeleton-, split-, and type- compilation units. +void CompilationUnit::ReadHeader() { + const uint8_t* headerptr = buffer_; + size_t initial_length_size; + + assert(headerptr + 4 < buffer_ + buffer_length_); + const uint64_t initial_length + = reader_->ReadInitialLength(headerptr, &initial_length_size); + headerptr += initial_length_size; + header_.length = initial_length; + + assert(headerptr + 2 < buffer_ + buffer_length_); + header_.version = reader_->ReadTwoBytes(headerptr); + headerptr += 2; + + if (header_.version <= 4) { + // Older versions of dwarf have a relatively simple structure. + headerptr += ReadAbbrevOffset(headerptr); + headerptr += ReadAddressSize(headerptr); + } else { + // DWARF5 adds a unit_type field, and various fields based on unit_type. + assert(headerptr + 1 < buffer_ + buffer_length_); + uint8_t unit_type = reader_->ReadOneByte(headerptr); + headerptr += 1; + headerptr += ReadAddressSize(headerptr); + headerptr += ReadAbbrevOffset(headerptr); + switch (unit_type) { + case DW_UT_compile: + case DW_UT_partial: + // nothing else to read + break; + case DW_UT_skeleton: + case DW_UT_split_compile: + headerptr += ReadDwoId(headerptr); + break; + case DW_UT_type: + case DW_UT_split_type: + is_type_unit_ = true; + headerptr += ReadTypeSignature(headerptr); + headerptr += ReadTypeOffset(headerptr); + break; + default: + fprintf(stderr, "Unhandled compilation unit type 0x%x", unit_type); + break; + } + } + after_header_ = headerptr; + + // This check ensures that we don't have to do checking during the + // reading of DIEs. header_.length does not include the size of the + // initial length. + assert(buffer_ + initial_length_size + header_.length <= + buffer_ + buffer_length_); +} + +uint64_t CompilationUnit::Start() { + // First get the debug_info section. + SectionMap::const_iterator iter = + GetSectionByName(sections_, ".debug_info"); + assert(iter != sections_.end()); + + // Set up our buffer + buffer_ = iter->second.first + offset_from_section_start_; + buffer_length_ = iter->second.second - offset_from_section_start_; + + // Read the header + ReadHeader(); + + // Figure out the real length from the end of the initial length to + // the end of the compilation unit, since that is the value we + // return. + uint64_t ourlength = header_.length; + if (reader_->OffsetSize() == 8) + ourlength += 12; + else + ourlength += 4; + + // See if the user wants this compilation unit, and if not, just return. + if (!handler_->StartCompilationUnit(offset_from_section_start_, + reader_->AddressSize(), + reader_->OffsetSize(), + header_.length, + header_.version)) + return ourlength; + else if (header_.version == 5 && is_type_unit_) + return ourlength; + + // Otherwise, continue by reading our abbreviation entries. + ReadAbbrevs(); + + // Set the string section if we have one. + iter = GetSectionByName(sections_, ".debug_str"); + if (iter != sections_.end()) { + string_buffer_ = iter->second.first; + string_buffer_length_ = iter->second.second; + } + + // Set the line string section if we have one. + iter = GetSectionByName(sections_, ".debug_line_str"); + if (iter != sections_.end()) { + line_string_buffer_ = iter->second.first; + line_string_buffer_length_ = iter->second.second; + } + + // Set the string offsets section if we have one. + iter = GetSectionByName(sections_, ".debug_str_offsets"); + if (iter != sections_.end()) { + str_offsets_buffer_ = iter->second.first; + str_offsets_buffer_length_ = iter->second.second; + } + + // Set the address section if we have one. + iter = GetSectionByName(sections_, ".debug_addr"); + if (iter != sections_.end()) { + addr_buffer_ = iter->second.first; + addr_buffer_length_ = iter->second.second; + } + + // Now that we have our abbreviations, start processing DIE's. + ProcessDIEs(); + + // If this is a skeleton compilation unit generated with split DWARF, + // and the client needs the full debug info, we need to find the full + // compilation unit in a .dwo or .dwp file. + if (!is_split_dwarf_ + && dwo_name_ != NULL + && handler_->NeedSplitDebugInfo()) + ProcessSplitDwarf(); + + return ourlength; +} + +void CompilationUnit::ProcessFormStringIndex( + uint64_t dieoffset, enum DwarfAttribute attr, enum DwarfForm form, + uint64_t str_index) { + const size_t kStringOffsetsTableHeaderSize = + header_.version >= 5 ? (reader_->OffsetSize() == 8 ? 16 : 8) : 0; + const uint8_t* str_offsets_table_after_header = str_offsets_base_ ? + str_offsets_buffer_ + str_offsets_base_ : + str_offsets_buffer_ + kStringOffsetsTableHeaderSize; + const uint8_t* offset_ptr = + str_offsets_table_after_header + str_index * reader_->OffsetSize(); + + const uint64_t offset = reader_->ReadOffset(offset_ptr); + if (offset >= string_buffer_length_) { + return; + } + + const char* str = reinterpret_cast<const char*>(string_buffer_) + offset; + ProcessAttributeString(dieoffset, attr, form, str); +} + +// Special function for pre-processing the +// DW_AT_str_offsets_base and DW_AT_addr_base in a DW_TAG_compile_unit die (for +// DWARF v5). We must make sure to find and process the +// DW_AT_str_offsets_base and DW_AT_addr_base attributes before attempting to +// read any string and address attribute in the compile unit. +const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute( + uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr, + enum DwarfForm form, uint64_t implicit_const) { + size_t len; + + switch (form) { + // DW_FORM_indirect is never used because it is such a space + // waster. + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return ProcessOffsetBaseAttribute(dieoffset, start, attr, form, + implicit_const); + + case DW_FORM_flag_present: + return start; + case DW_FORM_data1: + case DW_FORM_flag: + return start + 1; + case DW_FORM_data2: + return start + 2; + case DW_FORM_data4: + return start + 4; + case DW_FORM_data8: + return start + 8; + case DW_FORM_data16: + // This form is designed for an md5 checksum inside line tables. + return start + 16; + case DW_FORM_string: { + const char* str = reinterpret_cast<const char*>(start); + return start + strlen(str) + 1; + } + case DW_FORM_udata: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + case DW_FORM_sdata: + reader_->ReadSignedLEB128(start, &len); + return start + len; + case DW_FORM_addr: + reader_->ReadAddress(start); + return start + reader_->AddressSize(); + + // This is the important one here! + case DW_FORM_sec_offset: + if (attr == DW_AT_str_offsets_base || + attr == DW_AT_addr_base) + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); + else + reader_->ReadOffset(start); + return start + reader_->OffsetSize(); + + case DW_FORM_ref1: + return start + 1; + case DW_FORM_ref2: + return start + 2; + case DW_FORM_ref4: + return start + 4; + case DW_FORM_ref8: + return start + 8; + case DW_FORM_ref_udata: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + case DW_FORM_ref_addr: + // DWARF2 and 3/4 differ on whether ref_addr is address size or + // offset size. + assert(header_.version >= 2); + if (header_.version == 2) { + reader_->ReadAddress(start); + return start + reader_->AddressSize(); + } else if (header_.version >= 3) { + reader_->ReadOffset(start); + return start + reader_->OffsetSize(); + } + break; + case DW_FORM_ref_sig8: + return start + 8; + case DW_FORM_implicit_const: + return start; + case DW_FORM_block1: { + uint64_t datalen = reader_->ReadOneByte(start); + return start + 1 + datalen; + } + case DW_FORM_block2: { + uint64_t datalen = reader_->ReadTwoBytes(start); + return start + 2 + datalen; + } + case DW_FORM_block4: { + uint64_t datalen = reader_->ReadFourBytes(start); + return start + 4 + datalen; + } + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len); + return start + datalen + len; + } + case DW_FORM_strp: { + reader_->ReadOffset(start); + return start + reader_->OffsetSize(); + } + case DW_FORM_line_strp: { + reader_->ReadOffset(start); + return start + reader_->OffsetSize(); + } + case DW_FORM_strp_sup: + return start + 4; + case DW_FORM_ref_sup4: + return start + 4; + case DW_FORM_ref_sup8: + return start + 8; + case DW_FORM_loclistx: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + case DW_FORM_strx: + case DW_FORM_GNU_str_index: { + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + } + case DW_FORM_strx1: { + return start + 1; + } + case DW_FORM_strx2: { + return start + 2; + } + case DW_FORM_strx3: { + return start + 3; + } + case DW_FORM_strx4: { + return start + 4; + } + + case DW_FORM_addrx: + case DW_FORM_GNU_addr_index: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + case DW_FORM_addrx1: + return start + 1; + case DW_FORM_addrx2: + return start + 2; + case DW_FORM_addrx3: + return start + 3; + case DW_FORM_addrx4: + return start + 4; + case DW_FORM_rnglistx: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + } + fprintf(stderr, "Unhandled form type\n"); + return NULL; +} + +// If one really wanted, you could merge SkipAttribute and +// ProcessAttribute +// This is all boring data manipulation and calling of the handler. +const uint8_t* CompilationUnit::ProcessAttribute( + uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr, + enum DwarfForm form, uint64_t implicit_const) { + size_t len; + + switch (form) { + // DW_FORM_indirect is never used because it is such a space + // waster. + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return ProcessAttribute(dieoffset, start, attr, form, implicit_const); + + case DW_FORM_flag_present: + ProcessAttributeUnsigned(dieoffset, attr, form, 1); + return start; + case DW_FORM_data1: + case DW_FORM_flag: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOneByte(start)); + return start + 1; + case DW_FORM_data2: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadTwoBytes(start)); + return start + 2; + case DW_FORM_data4: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadFourBytes(start)); + return start + 4; + case DW_FORM_data8: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadEightBytes(start)); + return start + 8; + case DW_FORM_data16: + // This form is designed for an md5 checksum inside line tables. + fprintf(stderr, "Unhandled form type: DW_FORM_data16\n"); + return start + 16; + case DW_FORM_string: { + const char* str = reinterpret_cast<const char*>(start); + ProcessAttributeString(dieoffset, attr, form, str); + return start + strlen(str) + 1; + } + case DW_FORM_udata: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, &len)); + return start + len; + + case DW_FORM_sdata: + ProcessAttributeSigned(dieoffset, attr, form, + reader_->ReadSignedLEB128(start, &len)); + return start + len; + case DW_FORM_addr: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + case DW_FORM_sec_offset: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + + case DW_FORM_ref1: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadOneByte(start) + + offset_from_section_start_); + return start + 1; + case DW_FORM_ref2: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadTwoBytes(start) + + offset_from_section_start_); + return start + 2; + case DW_FORM_ref4: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadFourBytes(start) + + offset_from_section_start_); + return start + 4; + case DW_FORM_ref8: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadEightBytes(start) + + offset_from_section_start_); + return start + 8; + case DW_FORM_ref_udata: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, + &len) + + offset_from_section_start_); + return start + len; + case DW_FORM_ref_addr: + // DWARF2 and 3/4 differ on whether ref_addr is address size or + // offset size. + assert(header_.version >= 2); + if (header_.version == 2) { + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + } else if (header_.version >= 3) { + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + } + break; + case DW_FORM_ref_sig8: + handler_->ProcessAttributeSignature(dieoffset, attr, form, + reader_->ReadEightBytes(start)); + return start + 8; + case DW_FORM_implicit_const: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + implicit_const); + return start; + case DW_FORM_block1: { + uint64_t datalen = reader_->ReadOneByte(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1, + datalen); + return start + 1 + datalen; + } + case DW_FORM_block2: { + uint64_t datalen = reader_->ReadTwoBytes(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2, + datalen); + return start + 2 + datalen; + } + case DW_FORM_block4: { + uint64_t datalen = reader_->ReadFourBytes(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4, + datalen); + return start + 4 + datalen; + } + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len, + datalen); + return start + datalen + len; + } + case DW_FORM_strp: { + assert(string_buffer_ != NULL); + + const uint64_t offset = reader_->ReadOffset(start); + assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_); + + const char* str = reinterpret_cast<const char*>(string_buffer_ + offset); + ProcessAttributeString(dieoffset, attr, form, str); + return start + reader_->OffsetSize(); + } + case DW_FORM_line_strp: { + assert(line_string_buffer_ != NULL); + + const uint64_t offset = reader_->ReadOffset(start); + assert(line_string_buffer_ + offset < + line_string_buffer_ + line_string_buffer_length_); + + const char* str = + reinterpret_cast<const char*>(line_string_buffer_ + offset); + ProcessAttributeString(dieoffset, attr, form, str); + return start + reader_->OffsetSize(); + } + case DW_FORM_strp_sup: + // No support currently for suplementary object files. + fprintf(stderr, "Unhandled form type: DW_FORM_strp_sup\n"); + return start + 4; + case DW_FORM_ref_sup4: + // No support currently for suplementary object files. + fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup4\n"); + return start + 4; + case DW_FORM_ref_sup8: + // No support currently for suplementary object files. + fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup8\n"); + return start + 8; + case DW_FORM_loclistx: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, &len)); + return start + len; + case DW_FORM_strx: + case DW_FORM_GNU_str_index: { + uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len); + ProcessFormStringIndex(dieoffset, attr, form, str_index); + return start + len; + } + case DW_FORM_strx1: { + uint64_t str_index = reader_->ReadOneByte(start); + ProcessFormStringIndex(dieoffset, attr, form, str_index); + return start + 1; + } + case DW_FORM_strx2: { + uint64_t str_index = reader_->ReadTwoBytes(start); + ProcessFormStringIndex(dieoffset, attr, form, str_index); + return start + 2; + } + case DW_FORM_strx3: { + uint64_t str_index = reader_->ReadThreeBytes(start); + ProcessFormStringIndex(dieoffset, attr, form, str_index); + return start + 3; + } + case DW_FORM_strx4: { + uint64_t str_index = reader_->ReadFourBytes(start); + ProcessFormStringIndex(dieoffset, attr, form, str_index); + return start + 4; + } + + case DW_FORM_addrx: + case DW_FORM_GNU_addr_index: + ProcessAttributeAddrIndex( + dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len)); + return start + len; + case DW_FORM_addrx1: + ProcessAttributeAddrIndex( + dieoffset, attr, form, reader_->ReadOneByte(start)); + return start + 1; + case DW_FORM_addrx2: + ProcessAttributeAddrIndex( + dieoffset, attr, form, reader_->ReadTwoBytes(start)); + return start + 2; + case DW_FORM_addrx3: + ProcessAttributeAddrIndex( + dieoffset, attr, form, reader_->ReadThreeBytes(start)); + return start + 3; + case DW_FORM_addrx4: + ProcessAttributeAddrIndex( + dieoffset, attr, form, reader_->ReadFourBytes(start)); + return start + 4; + case DW_FORM_rnglistx: + ProcessAttributeUnsigned( + dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len)); + return start + len; + } + fprintf(stderr, "Unhandled form type\n"); + return NULL; +} + +const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset, + const uint8_t* start, + const Abbrev& abbrev) { + // With DWARF v5, the compile_unit die may contain a + // DW_AT_str_offsets_base or DW_AT_addr_base. If it does, that attribute must + // be found and processed before trying to process the other attributes; + // otherwise the string or address values will all come out incorrect. + if (abbrev.tag == DW_TAG_compile_unit && header_.version == 5) { + uint64_t dieoffset_copy = dieoffset; + const uint8_t* start_copy = start; + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start_copy = ProcessOffsetBaseAttribute(dieoffset_copy, start_copy, + i->attr_, i->form_, + i->value_); + } + } + + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = ProcessAttribute(dieoffset, start, i->attr_, i->form_, i->value_); + } + + // If this is a compilation unit in a split DWARF object, verify that + // the dwo_id matches. If it does not match, we will ignore this + // compilation unit. + if (abbrev.tag == DW_TAG_compile_unit + && is_split_dwarf_ + && dwo_id_ != skeleton_dwo_id_) { + return NULL; + } + + return start; +} + +void CompilationUnit::ProcessDIEs() { + const uint8_t* dieptr = after_header_; + size_t len; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const uint8_t* lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + std::stack<uint64_t> die_stack; + + while (dieptr < (lengthstart + header_.length)) { + // We give the user the absolute offset from the beginning of + // debug_info, since they need it to deal with ref_addr forms. + uint64_t absolute_offset = (dieptr - buffer_) + offset_from_section_start_; + + uint64_t abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len); + + dieptr += len; + + // Abbrev == 0 represents the end of a list of children, or padding + // at the end of the compilation unit. + if (abbrev_num == 0) { + if (die_stack.size() == 0) + // If it is padding, then we are done with the compilation unit's DIEs. + return; + const uint64_t offset = die_stack.top(); + die_stack.pop(); + handler_->EndDIE(offset); + continue; + } + + const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num)); + const enum DwarfTag tag = abbrev.tag; + if (!handler_->StartDIE(absolute_offset, tag)) { + dieptr = SkipDIE(dieptr, abbrev); + } else { + dieptr = ProcessDIE(absolute_offset, dieptr, abbrev); + } + + if (abbrev.has_children) { + die_stack.push(absolute_offset); + } else { + handler_->EndDIE(absolute_offset); + } + } +} + +// Check for a valid ELF file and return the Address size. +// Returns 0 if not a valid ELF file. +inline int GetElfWidth(const ElfReader& elf) { + if (elf.IsElf32File()) + return 4; + if (elf.IsElf64File()) + return 8; + return 0; +} + +void CompilationUnit::ProcessSplitDwarf() { + struct stat statbuf; + if (!have_checked_for_dwp_) { + // Look for a .dwp file in the same directory as the executable. + have_checked_for_dwp_ = true; + string dwp_suffix(".dwp"); + dwp_path_ = path_ + dwp_suffix; + if (stat(dwp_path_.c_str(), &statbuf) != 0) { + // Fall back to a split .debug file in the same directory. + string debug_suffix(".debug"); + dwp_path_ = path_; + size_t found = path_.rfind(debug_suffix); + if (found + debug_suffix.length() == path_.length()) + dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix); + } + if (stat(dwp_path_.c_str(), &statbuf) == 0) { + ElfReader* elf = new ElfReader(dwp_path_); + int width = GetElfWidth(*elf); + if (width != 0) { + dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness())); + dwp_byte_reader_->SetAddressSize(width); + dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf)); + dwp_reader_->Initialize(); + } else { + delete elf; + } + } + } + bool found_in_dwp = false; + if (dwp_reader_) { + // If we have a .dwp file, read the debug sections for the requested CU. + SectionMap sections; + dwp_reader_->ReadDebugSectionsForCU(dwo_id_, §ions); + if (!sections.empty()) { + found_in_dwp = true; + CompilationUnit dwp_comp_unit(dwp_path_, sections, 0, + dwp_byte_reader_.get(), handler_); + dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_, + ranges_base_, dwo_id_); + dwp_comp_unit.Start(); + } + } + if (!found_in_dwp) { + // If no .dwp file, try to open the .dwo file. + if (stat(dwo_name_, &statbuf) == 0) { + ElfReader elf(dwo_name_); + int width = GetElfWidth(elf); + if (width != 0) { + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(width); + SectionMap sections; + ReadDebugSectionsFromDwo(&elf, §ions); + CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader, + handler_); + dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, + addr_base_, ranges_base_, dwo_id_); + dwo_comp_unit.Start(); + } + } + } +} + +void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader, + SectionMap* sections) { + static const char* const section_names[] = { + ".debug_abbrev", + ".debug_info", + ".debug_str_offsets", + ".debug_str" + }; + for (unsigned int i = 0u; + i < sizeof(section_names)/sizeof(*(section_names)); ++i) { + string base_name = section_names[i]; + string dwo_name = base_name + ".dwo"; + size_t section_size; + const char* section_data = elf_reader->GetSectionByName(dwo_name, + §ion_size); + if (section_data != NULL) + sections->insert(std::make_pair( + base_name, std::make_pair( + reinterpret_cast<const uint8_t*>(section_data), + section_size))); + } +} + +DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader) + : elf_reader_(elf_reader), byte_reader_(byte_reader), + cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL), + string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0), + nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL), + offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL), + abbrev_size_(0), info_data_(NULL), info_size_(0), + str_offsets_data_(NULL), str_offsets_size_(0) {} + +DwpReader::~DwpReader() { + if (elf_reader_) delete elf_reader_; +} + +void DwpReader::Initialize() { + cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index", + &cu_index_size_); + if (cu_index_ == NULL) { + return; + } + // The .debug_str.dwo section is shared by all CUs in the file. + string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo", + &string_buffer_size_); + + version_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(cu_index_)); + + if (version_ == 1) { + nslots_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(cu_index_) + + 3 * sizeof(uint32_t)); + phash_ = cu_index_ + 4 * sizeof(uint32_t); + pindex_ = phash_ + nslots_ * sizeof(uint64_t); + shndx_pool_ = pindex_ + nslots_ * sizeof(uint32_t); + if (shndx_pool_ >= cu_index_ + cu_index_size_) { + version_ = 0; + } + } else if (version_ == 2 || version_ == 5) { + ncolumns_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(cu_index_) + sizeof(uint32_t)); + nunits_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(cu_index_) + 2 * sizeof(uint32_t)); + nslots_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(cu_index_) + 3 * sizeof(uint32_t)); + phash_ = cu_index_ + 4 * sizeof(uint32_t); + pindex_ = phash_ + nslots_ * sizeof(uint64_t); + offset_table_ = pindex_ + nslots_ * sizeof(uint32_t); + size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32_t); + abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo", + &abbrev_size_); + info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_); + str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo", + &str_offsets_size_); + if (size_table_ >= cu_index_ + cu_index_size_) { + version_ = 0; + } + } +} + +void DwpReader::ReadDebugSectionsForCU(uint64_t dwo_id, + SectionMap* sections) { + if (version_ == 1) { + int slot = LookupCU(dwo_id); + if (slot == -1) { + return; + } + + // The index table points to the section index pool, where we + // can read a list of section indexes for the debug sections + // for the CU whose dwo_id we are looking for. + int index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(pindex_) + + slot * sizeof(uint32_t)); + const char* shndx_list = shndx_pool_ + index * sizeof(uint32_t); + for (;;) { + if (shndx_list >= cu_index_ + cu_index_size_) { + version_ = 0; + return; + } + unsigned int shndx = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(shndx_list)); + shndx_list += sizeof(uint32_t); + if (shndx == 0) + break; + const char* section_name = elf_reader_->GetSectionName(shndx); + size_t section_size; + const char* section_data; + // We're only interested in these four debug sections. + // The section names in the .dwo file end with ".dwo", but we + // add them to the sections table with their normal names. + if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_abbrev", + std::make_pair(reinterpret_cast<const uint8_t*> (section_data), + section_size))); + } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_info", + std::make_pair(reinterpret_cast<const uint8_t*> (section_data), + section_size))); + } else if (!strncmp(section_name, ".debug_str_offsets", + strlen(".debug_str_offsets"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_str_offsets", + std::make_pair(reinterpret_cast<const uint8_t*> (section_data), + section_size))); + } + } + sections->insert(std::make_pair( + ".debug_str", + std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_), + string_buffer_size_))); + } else if (version_ == 2 || version_ == 5) { + uint32_t index = LookupCUv2(dwo_id); + if (index == 0) { + return; + } + + // The index points to a row in each of the section offsets table + // and the section size table, where we can read the offsets and sizes + // of the contributions to each debug section from the CU whose dwo_id + // we are looking for. Row 0 of the section offsets table has the + // section ids for each column of the table. The size table begins + // with row 1. + const char* id_row = offset_table_; + const char* offset_row = offset_table_ + + index * ncolumns_ * sizeof(uint32_t); + const char* size_row = + size_table_ + (index - 1) * ncolumns_ * sizeof(uint32_t); + if (size_row + ncolumns_ * sizeof(uint32_t) > cu_index_ + cu_index_size_) { + version_ = 0; + return; + } + for (unsigned int col = 0u; col < ncolumns_; ++col) { + uint32_t section_id = + byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t*>(id_row) + + col * sizeof(uint32_t)); + uint32_t offset = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(offset_row) + + col * sizeof(uint32_t)); + uint32_t size = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(size_row) + col * sizeof(uint32_t)); + if (section_id == DW_SECT_ABBREV) { + sections->insert(std::make_pair( + ".debug_abbrev", + std::make_pair(reinterpret_cast<const uint8_t*> (abbrev_data_) + + offset, size))); + } else if (section_id == DW_SECT_INFO) { + sections->insert(std::make_pair( + ".debug_info", + std::make_pair(reinterpret_cast<const uint8_t*> (info_data_) + + offset, size))); + } else if (section_id == DW_SECT_STR_OFFSETS) { + sections->insert(std::make_pair( + ".debug_str_offsets", + std::make_pair(reinterpret_cast<const uint8_t*> (str_offsets_data_) + + offset, size))); + } + } + sections->insert(std::make_pair( + ".debug_str", + std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_), + string_buffer_size_))); + } +} + +int DwpReader::LookupCU(uint64_t dwo_id) { + uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1); + uint64_t probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t)); + if (probe != 0 && probe != dwo_id) { + uint32_t secondary_hash = + (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1; + do { + slot = (slot + secondary_hash) & (nslots_ - 1); + probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t)); + } while (probe != 0 && probe != dwo_id); + } + if (probe == 0) + return -1; + return slot; +} + +uint32_t DwpReader::LookupCUv2(uint64_t dwo_id) { + uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1); + uint64_t probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t)); + uint32_t index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(pindex_) + slot * sizeof(uint32_t)); + if (index != 0 && probe != dwo_id) { + uint32_t secondary_hash = + (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1; + do { + slot = (slot + secondary_hash) & (nslots_ - 1); + probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t)); + index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t*>(pindex_) + slot * sizeof(uint32_t)); + } while (index != 0 && probe != dwo_id); + } + return index; +} + +LineInfo::LineInfo(const uint8_t* buffer, uint64_t buffer_length, + ByteReader* reader, const uint8_t* string_buffer, + size_t string_buffer_length, + const uint8_t* line_string_buffer, + size_t line_string_buffer_length, LineInfoHandler* handler): + handler_(handler), reader_(reader), buffer_(buffer), + string_buffer_(string_buffer), + line_string_buffer_(line_string_buffer) { +#ifndef NDEBUG + buffer_length_ = buffer_length; + string_buffer_length_ = string_buffer_length; + line_string_buffer_length_ = line_string_buffer_length; +#endif + header_.std_opcode_lengths = NULL; +} + +uint64_t LineInfo::Start() { + ReadHeader(); + ReadLines(); + return after_header_ - buffer_; +} + +void LineInfo::ReadTypesAndForms(const uint8_t** lineptr, + uint32_t* content_types, + uint32_t* content_forms, + uint32_t max_types, + uint32_t* format_count) { + size_t len; + + uint32_t count = reader_->ReadUnsignedLEB128(*lineptr, &len); + *lineptr += len; + if (count < 1 || count > max_types) { + return; + } + for (uint32_t col = 0; col < count; ++col) { + content_types[col] = reader_->ReadUnsignedLEB128(*lineptr, &len); + *lineptr += len; + content_forms[col] = reader_->ReadUnsignedLEB128(*lineptr, &len); + *lineptr += len; + } + *format_count = count; +} + +const char* LineInfo::ReadStringForm(uint32_t form, const uint8_t** lineptr) { + const char* name = nullptr; + if (form == DW_FORM_string) { + name = reinterpret_cast<const char*>(*lineptr); + *lineptr += strlen(name) + 1; + return name; + } else if (form == DW_FORM_strp) { + uint64_t offset = reader_->ReadOffset(*lineptr); + assert(offset < string_buffer_length_); + *lineptr += reader_->OffsetSize(); + if (string_buffer_ != nullptr) { + name = reinterpret_cast<const char*>(string_buffer_) + offset; + return name; + } + } else if (form == DW_FORM_line_strp) { + uint64_t offset = reader_->ReadOffset(*lineptr); + assert(offset < line_string_buffer_length_); + *lineptr += reader_->OffsetSize(); + if (line_string_buffer_ != nullptr) { + name = reinterpret_cast<const char*>(line_string_buffer_) + offset; + return name; + } + } + // Shouldn't be called with a non-string-form, and + // if there is a string form but no string buffer, + // that is a problem too. + assert(0); + return nullptr; +} + +uint64_t LineInfo::ReadUnsignedData(uint32_t form, const uint8_t** lineptr) { + size_t len; + uint64_t value; + + switch (form) { + case DW_FORM_data1: + value = reader_->ReadOneByte(*lineptr); + *lineptr += 1; + return value; + case DW_FORM_data2: + value = reader_->ReadTwoBytes(*lineptr); + *lineptr += 2; + return value; + case DW_FORM_data4: + value = reader_->ReadFourBytes(*lineptr); + *lineptr += 4; + return value; + case DW_FORM_data8: + value = reader_->ReadEightBytes(*lineptr); + *lineptr += 8; + return value; + case DW_FORM_udata: + value = reader_->ReadUnsignedLEB128(*lineptr, &len); + *lineptr += len; + return value; + default: + fprintf(stderr, "Unrecognized data form."); + return 0; + } +} + +void LineInfo::ReadFileRow(const uint8_t** lineptr, + const uint32_t* content_types, + const uint32_t* content_forms, uint32_t row, + uint32_t format_count) { + const char* filename = nullptr; + uint64_t dirindex = 0; + uint64_t mod_time = 0; + uint64_t filelength = 0; + + for (uint32_t col = 0; col < format_count; ++col) { + switch (content_types[col]) { + case DW_LNCT_path: + filename = ReadStringForm(content_forms[col], lineptr); + break; + case DW_LNCT_directory_index: + dirindex = ReadUnsignedData(content_forms[col], lineptr); + break; + case DW_LNCT_timestamp: + mod_time = ReadUnsignedData(content_forms[col], lineptr); + break; + case DW_LNCT_size: + filelength = ReadUnsignedData(content_forms[col], lineptr); + break; + case DW_LNCT_MD5: + // MD5 entries help a debugger sort different versions of files with + // the same name. It is always paired with a DW_FORM_data16 and is + // unused in this case. + *lineptr += 16; + break; + default: + fprintf(stderr, "Unrecognized form in line table header. %d\n", + content_types[col]); + assert(false); + break; + } + } + assert(filename != nullptr); + handler_->DefineFile(filename, row, dirindex, mod_time, filelength); +} + +// The header for a debug_line section is mildly complicated, because +// the line info is very tightly encoded. +void LineInfo::ReadHeader() { + const uint8_t* lineptr = buffer_; + size_t initial_length_size; + + const uint64_t initial_length + = reader_->ReadInitialLength(lineptr, &initial_length_size); + + lineptr += initial_length_size; + header_.total_length = initial_length; + assert(buffer_ + initial_length_size + header_.total_length <= + buffer_ + buffer_length_); + + + header_.version = reader_->ReadTwoBytes(lineptr); + lineptr += 2; + + if (header_.version >= 5) { + uint8_t address_size = reader_->ReadOneByte(lineptr); + reader_->SetAddressSize(address_size); + lineptr += 1; + uint8_t segment_selector_size = reader_->ReadOneByte(lineptr); + if (segment_selector_size != 0) { + fprintf(stderr,"No support for segmented memory."); + } + lineptr += 1; + } else { + // Address size *must* be set by CU ahead of time. + assert(reader_->AddressSize() != 0); + } + + header_.prologue_length = reader_->ReadOffset(lineptr); + lineptr += reader_->OffsetSize(); + + header_.min_insn_length = reader_->ReadOneByte(lineptr); + lineptr += 1; + + if (header_.version >= 4) { + __attribute__((unused)) uint8_t max_ops_per_insn = + reader_->ReadOneByte(lineptr); + ++lineptr; + assert(max_ops_per_insn == 1); + } + + header_.default_is_stmt = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.line_base = *reinterpret_cast<const int8_t*>(lineptr); + lineptr += 1; + + header_.line_range = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.opcode_base = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.std_opcode_lengths = new std::vector<unsigned char>; + header_.std_opcode_lengths->resize(header_.opcode_base + 1); + (*header_.std_opcode_lengths)[0] = 0; + for (int i = 1; i < header_.opcode_base; i++) { + (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr); + lineptr += 1; + } + + if (header_.version <= 4) { + // Directory zero is assumed to be the compilation directory and special + // cased where used. It is not actually stored in the dwarf data. But an + // empty entry here avoids off-by-one errors elsewhere in the code. + handler_->DefineDir("", 0); + // It is legal for the directory entry table to be empty. + if (*lineptr) { + uint32_t dirindex = 1; + while (*lineptr) { + const char* dirname = reinterpret_cast<const char*>(lineptr); + handler_->DefineDir(dirname, dirindex); + lineptr += strlen(dirname) + 1; + dirindex++; + } + } + lineptr++; + // It is also legal for the file entry table to be empty. + + // Similarly for file zero. + handler_->DefineFile("", 0, 0, 0, 0); + if (*lineptr) { + uint32_t fileindex = 1; + size_t len; + while (*lineptr) { + const char* filename = ReadStringForm(DW_FORM_string, &lineptr); + + uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + handler_->DefineFile(filename, fileindex, + static_cast<uint32_t>(dirindex), mod_time, + filelength); + fileindex++; + } + } + lineptr++; + } else { + // Read the DWARF-5 directory table. + + // Dwarf5 supports five different types and forms per directory- and + // file-table entry. Theoretically, there could be duplicate entries + // in this table, but that would be quite unusual. + static const uint32_t kMaxTypesAndForms = 5; + uint32_t content_types[kMaxTypesAndForms]; + uint32_t content_forms[kMaxTypesAndForms]; + uint32_t format_count; + size_t len; + + ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms, + &format_count); + uint32_t entry_count = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + for (uint32_t row = 0; row < entry_count; ++row) { + const char* dirname = nullptr; + for (uint32_t col = 0; col < format_count; ++col) { + // The path is the only relevant content type for this implementation. + if (content_types[col] == DW_LNCT_path) { + dirname = ReadStringForm(content_forms[col], &lineptr); + } + } + handler_->DefineDir(dirname, row); + } + + // Read the DWARF-5 filename table. + ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms, + &format_count); + entry_count = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + for (uint32_t row = 0; row < entry_count; ++row) { + ReadFileRow(&lineptr, content_types, content_forms, row, format_count); + } + } + after_header_ = lineptr; +} + +/* static */ +bool LineInfo::ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader& header, + const uint8_t* start, + struct LineStateMachine* lsm, + size_t* len, + uintptr pc, + bool* lsm_passes_pc) { + size_t oplen = 0; + size_t templen; + uint8_t opcode = reader->ReadOneByte(start); + oplen++; + start++; + + // If the opcode is great than the opcode_base, it is a special + // opcode. Most line programs consist mainly of special opcodes. + if (opcode >= header.opcode_base) { + opcode -= header.opcode_base; + const int64_t advance_address = (opcode / header.line_range) + * header.min_insn_length; + const int32_t advance_line = (opcode % header.line_range) + + header.line_base; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + lsm->line_num += advance_line; + lsm->basic_block = true; + *len = oplen; + return true; + } + + // Otherwise, we have the regular opcodes + switch (opcode) { + case DW_LNS_copy: { + lsm->basic_block = false; + *len = oplen; + return true; + } + + case DW_LNS_advance_pc: { + uint64_t advance_address = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && lsm->address <= pc && + pc < lsm->address + header.min_insn_length * advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += header.min_insn_length * advance_address; + } + break; + case DW_LNS_advance_line: { + const int64_t advance_line = reader->ReadSignedLEB128(start, &templen); + oplen += templen; + lsm->line_num += static_cast<int32_t>(advance_line); + + // With gcc 4.2.1, we can get the line_no here for the first time + // since DW_LNS_advance_line is called after DW_LNE_set_address is + // called. So we check if the lsm passes "pc" here, not in + // DW_LNE_set_address. + if (lsm_passes_pc && lsm->address == pc) { + *lsm_passes_pc = true; + } + } + break; + case DW_LNS_set_file: { + const uint64_t fileno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->file_num = static_cast<uint32_t>(fileno); + } + break; + case DW_LNS_set_column: { + const uint64_t colno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->column_num = static_cast<uint32_t>(colno); + } + break; + case DW_LNS_negate_stmt: { + lsm->is_stmt = !lsm->is_stmt; + } + break; + case DW_LNS_set_basic_block: { + lsm->basic_block = true; + } + break; + case DW_LNS_fixed_advance_pc: { + const uint16_t advance_address = reader->ReadTwoBytes(start); + oplen += 2; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + } + break; + case DW_LNS_const_add_pc: { + const int64_t advance_address = header.min_insn_length + * ((255 - header.opcode_base) + / header.line_range); + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + } + break; + case DW_LNS_extended_op: { + const uint64_t extended_op_len = reader->ReadUnsignedLEB128(start, + &templen); + start += templen; + oplen += templen + extended_op_len; + + const uint64_t extended_op = reader->ReadOneByte(start); + start++; + + switch (extended_op) { + case DW_LNE_end_sequence: { + lsm->end_sequence = true; + *len = oplen; + return true; + } + break; + case DW_LNE_set_address: { + // With gcc 4.2.1, we cannot tell the line_no here since + // DW_LNE_set_address is called before DW_LNS_advance_line is + // called. So we do not check if the lsm passes "pc" here. See + // also the comment in DW_LNS_advance_line. + uint64_t address = reader->ReadAddress(start); + lsm->address = address; + } + break; + case DW_LNE_define_file: { + const char* filename = reinterpret_cast<const char*>(start); + + templen = strlen(filename) + 1; + start += templen; + + uint64_t dirindex = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + + const uint64_t mod_time = reader->ReadUnsignedLEB128(start, + &templen); + oplen += templen; + + const uint64_t filelength = reader->ReadUnsignedLEB128(start, + &templen); + oplen += templen; + + if (handler) { + handler->DefineFile(filename, -1, static_cast<uint32_t>(dirindex), + mod_time, filelength); + } + } + break; + } + } + break; + + default: { + // Ignore unknown opcode silently + if (header.std_opcode_lengths) { + for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) { + reader->ReadUnsignedLEB128(start, &templen); + start += templen; + oplen += templen; + } + } + } + break; + } + *len = oplen; + return false; +} + +void LineInfo::ReadLines() { + struct LineStateMachine lsm; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const uint8_t* lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + const uint8_t* lineptr = after_header_; + lsm.Reset(header_.default_is_stmt); + + // The LineInfoHandler interface expects each line's length along + // with its address, but DWARF only provides addresses (sans + // length), and an end-of-sequence address; one infers the length + // from the next address. So we report a line only when we get the + // next line's address, or the end-of-sequence address. + bool have_pending_line = false; + uint64_t pending_address = 0; + uint32_t pending_file_num = 0, pending_line_num = 0, pending_column_num = 0; + + while (lineptr < lengthstart + header_.total_length) { + size_t oplength; + bool add_row = ProcessOneOpcode(reader_, handler_, header_, + lineptr, &lsm, &oplength, (uintptr)-1, + NULL); + if (add_row) { + if (have_pending_line) + handler_->AddLine(pending_address, lsm.address - pending_address, + pending_file_num, pending_line_num, + pending_column_num); + if (lsm.end_sequence) { + lsm.Reset(header_.default_is_stmt); + have_pending_line = false; + } else { + pending_address = lsm.address; + pending_file_num = lsm.file_num; + pending_line_num = lsm.line_num; + pending_column_num = lsm.column_num; + have_pending_line = true; + } + } + lineptr += oplength; + } + + after_header_ = lengthstart + header_.total_length; +} + +bool RangeListReader::ReadRanges(enum DwarfForm form, uint64_t data) { + if (form == DW_FORM_sec_offset) { + if (cu_info_->version_ <= 4) { + return ReadDebugRanges(data); + } else { + return ReadDebugRngList(data); + } + } else if (form == DW_FORM_rnglistx) { + offset_array_ = cu_info_->ranges_base_; + uint64_t index_offset = reader_->OffsetSize() * data; + uint64_t range_list_offset = + reader_->ReadOffset(cu_info_->buffer_ + offset_array_ + index_offset); + + return ReadDebugRngList(offset_array_ + range_list_offset); + } + return false; +} + +bool RangeListReader::ReadDebugRanges(uint64_t offset) { + const uint64_t max_address = + (reader_->AddressSize() == 4) ? 0xffffffffUL + : 0xffffffffffffffffULL; + const uint64_t entry_size = reader_->AddressSize() * 2; + bool list_end = false; + + do { + if (offset > cu_info_->size_ - entry_size) { + return false; // Invalid range detected + } + + uint64_t start_address = reader_->ReadAddress(cu_info_->buffer_ + offset); + uint64_t end_address = reader_->ReadAddress( + cu_info_->buffer_ + offset + reader_->AddressSize()); + + if (start_address == max_address) { // Base address selection + cu_info_->base_address_ = end_address; + } else if (start_address == 0 && end_address == 0) { // End-of-list + handler_->Finish(); + list_end = true; + } else { // Add a range entry + handler_->AddRange(start_address + cu_info_->base_address_, + end_address + cu_info_->base_address_); + } + + offset += entry_size; + } while (!list_end); + + return true; +} + +bool RangeListReader::ReadDebugRngList(uint64_t offset) { + uint64_t start = 0; + uint64_t end = 0; + uint64_t range_len = 0; + uint64_t index = 0; + // A uleb128's length isn't known until after it has been read, so overruns + // are only caught after an entire entry. + while (offset < cu_info_->size_) { + uint8_t entry_type = reader_->ReadOneByte(cu_info_->buffer_ + offset); + offset += 1; + // Handle each entry type per Dwarf 5 Standard, section 2.17.3. + switch (entry_type) { + case DW_RLE_end_of_list: + handler_->Finish(); + return true; + case DW_RLE_base_addressx: + offset += ReadULEB(offset, &index); + cu_info_->base_address_ = GetAddressAtIndex(index); + break; + case DW_RLE_startx_endx: + offset += ReadULEB(offset, &index); + start = GetAddressAtIndex(index); + offset += ReadULEB(offset, &index); + end = GetAddressAtIndex(index); + handler_->AddRange(start, end); + break; + case DW_RLE_startx_length: + offset += ReadULEB(offset, &index); + start = GetAddressAtIndex(index); + offset += ReadULEB(offset, &range_len); + handler_->AddRange(start, start + range_len); + break; + case DW_RLE_offset_pair: + offset += ReadULEB(offset, &start); + offset += ReadULEB(offset, &end); + handler_->AddRange(start + cu_info_->base_address_, + end + cu_info_->base_address_); + break; + case DW_RLE_base_address: + offset += ReadAddress(offset, &cu_info_->base_address_); + break; + case DW_RLE_start_end: + offset += ReadAddress(offset, &start); + offset += ReadAddress(offset, &end); + handler_->AddRange(start, end); + break; + case DW_RLE_start_length: + offset += ReadAddress(offset, &start); + offset += ReadULEB(offset, &end); + handler_->AddRange(start, start + end); + break; + } + } + return false; +} + +// A DWARF rule for recovering the address or value of a register, or +// computing the canonical frame address. There is one subclass of this for +// each '*Rule' member function in CallFrameInfo::Handler. +// +// It's annoying that we have to handle Rules using pointers (because +// the concrete instances can have an arbitrary size). They're small, +// so it would be much nicer if we could just handle them by value +// instead of fretting about ownership and destruction. +// +// It seems like all these could simply be instances of std::tr1::bind, +// except that we need instances to be EqualityComparable, too. +// +// This could logically be nested within State, but then the qualified names +// get horrendous. +class CallFrameInfo::Rule { + public: + virtual ~Rule() { } + + // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using + // this rule. If REG is kCFARegister, then this rule describes how to compute + // the canonical frame address. Return what the HANDLER member function + // returned. + virtual bool Handle(Handler* handler, + uint64_t address, int reg) const = 0; + + // Equality on rules. We use these to decide which rules we need + // to report after a DW_CFA_restore_state instruction. + virtual bool operator==(const Rule& rhs) const = 0; + + bool operator!=(const Rule& rhs) const { return ! (*this == rhs); } + + // Return a pointer to a copy of this rule. + virtual Rule* Copy() const = 0; + + // If this is a base+offset rule, change its base register to REG. + // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.) + virtual void SetBaseRegister(unsigned reg) { } + + // If this is a base+offset rule, change its offset to OFFSET. Otherwise, + // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.) + virtual void SetOffset(long long offset) { } +}; + +// Rule: the value the register had in the caller cannot be recovered. +class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule { + public: + UndefinedRule() { } + ~UndefinedRule() { } + bool Handle(Handler* handler, uint64_t address, int reg) const { + return handler->UndefinedRule(address, reg); + } + bool operator==(const Rule& rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const UndefinedRule* our_rhs = dynamic_cast<const UndefinedRule*>(&rhs); + return (our_rhs != NULL); + } + Rule* Copy() const { return new UndefinedRule(*this); } +}; + +// Rule: the register's value is the same as that it had in the caller. +class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule { + public: + SameValueRule() { } + ~SameValueRule() { } + bool Handle(Handler* handler, uint64_t address, int reg) const { + return handler->SameValueRule(address, reg); + } + bool operator==(const Rule& rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const SameValueRule* our_rhs = dynamic_cast<const SameValueRule*>(&rhs); + return (our_rhs != NULL); + } + Rule* Copy() const { return new SameValueRule(*this); } +}; + +// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER +// may be CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule { + public: + OffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) { } + ~OffsetRule() { } + bool Handle(Handler* handler, uint64_t address, int reg) const { + return handler->OffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule& rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const OffsetRule* our_rhs = dynamic_cast<const OffsetRule*>(&rhs); + return (our_rhs && + base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule* Copy() const { return new OffsetRule(*this); } + // We don't actually need SetBaseRegister or SetOffset here, since they + // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it + // doesn't make sense to use OffsetRule for computing the CFA: it + // computes the address at which a register is saved, not a value. + private: + int base_register_; + long offset_; +}; + +// Rule: the value the register had in the caller is the value of +// BASE_REGISTER plus offset. BASE_REGISTER may be +// CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule { + public: + ValOffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) { } + ~ValOffsetRule() { } + bool Handle(Handler* handler, uint64_t address, int reg) const { + return handler->ValOffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule& rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ValOffsetRule* our_rhs = dynamic_cast<const ValOffsetRule*>(&rhs); + return (our_rhs && + base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule* Copy() const { return new ValOffsetRule(*this); } + void SetBaseRegister(unsigned reg) { base_register_ = reg; } + void SetOffset(long long offset) { offset_ = offset; } + private: + int base_register_; + long offset_; +}; + +// Rule: the register has been saved in another register REGISTER_NUMBER_. +class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule { + public: + explicit RegisterRule(int register_number) + : register_number_(register_number) { } + ~RegisterRule() { } + bool Handle(Handler* handler, uint64_t address, int reg) const { + return handler->RegisterRule(address, reg, register_number_); + } + bool operator==(const Rule& rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const RegisterRule* our_rhs = dynamic_cast<const RegisterRule*>(&rhs); + return (our_rhs && register_number_ == our_rhs->register_number_); + } + Rule* Copy() const { return new RegisterRule(*this); } + private: + int register_number_; +}; + +// Rule: EXPRESSION evaluates to the address at which the register is saved. +class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule { + public: + explicit ExpressionRule(const string& expression) + : expression_(expression) { } + ~ExpressionRule() { } + bool Handle(Handler* handler, uint64_t address, int reg) const { + return handler->ExpressionRule(address, reg, expression_); + } + bool operator==(const Rule& rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ExpressionRule* our_rhs = dynamic_cast<const ExpressionRule*>(&rhs); + return (our_rhs && expression_ == our_rhs->expression_); + } + Rule* Copy() const { return new ExpressionRule(*this); } + private: + string expression_; +}; + +// Rule: EXPRESSION evaluates to the address at which the register is saved. +class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule { + public: + explicit ValExpressionRule(const string& expression) + : expression_(expression) { } + ~ValExpressionRule() { } + bool Handle(Handler* handler, uint64_t address, int reg) const { + return handler->ValExpressionRule(address, reg, expression_); + } + bool operator==(const Rule& rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ValExpressionRule* our_rhs = + dynamic_cast<const ValExpressionRule*>(&rhs); + return (our_rhs && expression_ == our_rhs->expression_); + } + Rule* Copy() const { return new ValExpressionRule(*this); } + private: + string expression_; +}; + +// A map from register numbers to rules. +class CallFrameInfo::RuleMap { + public: + RuleMap() : cfa_rule_(NULL) { } + RuleMap(const RuleMap& rhs) : cfa_rule_(NULL) { *this = rhs; } + ~RuleMap() { Clear(); } + + RuleMap& operator=(const RuleMap& rhs); + + // Set the rule for computing the CFA to RULE. Take ownership of RULE. + void SetCFARule(Rule* rule) { delete cfa_rule_; cfa_rule_ = rule; } + + // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains + // ownership of the rule. We use this for DW_CFA_def_cfa_offset and + // DW_CFA_def_cfa_register, and for detecting references to the CFA before + // a rule for it has been established. + Rule* CFARule() const { return cfa_rule_; } + + // Return the rule for REG, or NULL if there is none. The caller takes + // ownership of the result. + Rule* RegisterRule(int reg) const; + + // Set the rule for computing REG to RULE. Take ownership of RULE. + void SetRegisterRule(int reg, Rule* rule); + + // Make all the appropriate calls to HANDLER as if we were changing from + // this RuleMap to NEW_RULES at ADDRESS. We use this to implement + // DW_CFA_restore_state, where lots of rules can change simultaneously. + // Return true if all handlers returned true; otherwise, return false. + bool HandleTransitionTo(Handler* handler, uint64_t address, + const RuleMap& new_rules) const; + + private: + // A map from register numbers to Rules. + typedef std::map<int, Rule*> RuleByNumber; + + // Remove all register rules and clear cfa_rule_. + void Clear(); + + // The rule for computing the canonical frame address. This RuleMap owns + // this rule. + Rule* cfa_rule_; + + // A map from register numbers to postfix expressions to recover + // their values. This RuleMap owns the Rules the map refers to. + RuleByNumber registers_; +}; + +CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) { + Clear(); + // Since each map owns the rules it refers to, assignment must copy them. + if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy(); + for (RuleByNumber::const_iterator it = rhs.registers_.begin(); + it != rhs.registers_.end(); it++) + registers_[it->first] = it->second->Copy(); + return *this; +} + +CallFrameInfo::Rule* CallFrameInfo::RuleMap::RegisterRule(int reg) const { + assert(reg != Handler::kCFARegister); + RuleByNumber::const_iterator it = registers_.find(reg); + if (it != registers_.end()) + return it->second->Copy(); + else + return NULL; +} + +void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule* rule) { + assert(reg != Handler::kCFARegister); + assert(rule); + Rule** slot = ®isters_[reg]; + delete *slot; + *slot = rule; +} + +bool CallFrameInfo::RuleMap::HandleTransitionTo( + Handler* handler, + uint64_t address, + const RuleMap& new_rules) const { + // Transition from cfa_rule_ to new_rules.cfa_rule_. + if (cfa_rule_ && new_rules.cfa_rule_) { + if (*cfa_rule_ != *new_rules.cfa_rule_ && + !new_rules.cfa_rule_->Handle(handler, address, + Handler::kCFARegister)) + return false; + } else if (cfa_rule_) { + // this RuleMap has a CFA rule but new_rules doesn't. + // CallFrameInfo::Handler has no way to handle this --- and shouldn't; + // it's garbage input. The instruction interpreter should have + // detected this and warned, so take no action here. + } else if (new_rules.cfa_rule_) { + // This shouldn't be possible: NEW_RULES is some prior state, and + // there's no way to remove entries. + assert(0); + } else { + // Both CFA rules are empty. No action needed. + } + + // Traverse the two maps in order by register number, and report + // whatever differences we find. + RuleByNumber::const_iterator old_it = registers_.begin(); + RuleByNumber::const_iterator new_it = new_rules.registers_.begin(); + while (old_it != registers_.end() && new_it != new_rules.registers_.end()) { + if (old_it->first < new_it->first) { + // This RuleMap has an entry for old_it->first, but NEW_RULES + // doesn't. + // + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + if (!handler->SameValueRule(address, old_it->first)) + return false; + old_it++; + } else if (old_it->first > new_it->first) { + // NEW_RULES has entry for new_it->first, but this RuleMap + // doesn't. This shouldn't be possible: NEW_RULES is some prior + // state, and there's no way to remove entries. + assert(0); + } else { + // Both maps have an entry for this register. Report the new + // rule if it is different. + if (*old_it->second != *new_it->second && + !new_it->second->Handle(handler, address, new_it->first)) + return false; + new_it++, old_it++; + } + } + // Finish off entries from this RuleMap with no counterparts in new_rules. + while (old_it != registers_.end()) { + if (!handler->SameValueRule(address, old_it->first)) + return false; + old_it++; + } + // Since we only make transitions from a rule set to some previously + // saved rule set, and we can only add rules to the map, NEW_RULES + // must have fewer rules than *this. + assert(new_it == new_rules.registers_.end()); + + return true; +} + +// Remove all register rules and clear cfa_rule_. +void CallFrameInfo::RuleMap::Clear() { + delete cfa_rule_; + cfa_rule_ = NULL; + for (RuleByNumber::iterator it = registers_.begin(); + it != registers_.end(); it++) + delete it->second; + registers_.clear(); +} + +// The state of the call frame information interpreter as it processes +// instructions from a CIE and FDE. +class CallFrameInfo::State { + public: + // Create a call frame information interpreter state with the given + // reporter, reader, handler, and initial call frame info address. + State(ByteReader* reader, Handler* handler, Reporter* reporter, + uint64_t address) + : reader_(reader), handler_(handler), reporter_(reporter), + address_(address), entry_(NULL), cursor_(NULL) { } + + // Interpret instructions from CIE, save the resulting rule set for + // DW_CFA_restore instructions, and return true. On error, report + // the problem to reporter_ and return false. + bool InterpretCIE(const CIE& cie); + + // Interpret instructions from FDE, and return true. On error, + // report the problem to reporter_ and return false. + bool InterpretFDE(const FDE& fde); + + private: + // The operands of a CFI instruction, for ParseOperands. + struct Operands { + unsigned register_number; // A register number. + uint64_t offset; // An offset or address. + long signed_offset; // A signed offset. + string expression; // A DWARF expression. + }; + + // Parse CFI instruction operands from STATE's instruction stream as + // described by FORMAT. On success, populate OPERANDS with the + // results, and return true. On failure, report the problem and + // return false. + // + // Each character of FORMAT should be one of the following: + // + // 'r' unsigned LEB128 register number (OPERANDS->register_number) + // 'o' unsigned LEB128 offset (OPERANDS->offset) + // 's' signed LEB128 offset (OPERANDS->signed_offset) + // 'a' machine-size address (OPERANDS->offset) + // (If the CIE has a 'z' augmentation string, 'a' uses the + // encoding specified by the 'R' argument.) + // '1' a one-byte offset (OPERANDS->offset) + // '2' a two-byte offset (OPERANDS->offset) + // '4' a four-byte offset (OPERANDS->offset) + // '8' an eight-byte offset (OPERANDS->offset) + // 'e' a DW_FORM_block holding a (OPERANDS->expression) + // DWARF expression + bool ParseOperands(const char* format, Operands* operands); + + // Interpret one CFI instruction from STATE's instruction stream, update + // STATE, report any rule changes to handler_, and return true. On + // failure, report the problem and return false. + bool DoInstruction(); + + // The following Do* member functions are subroutines of DoInstruction, + // factoring out the actual work of operations that have several + // different encodings. + + // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and + // return true. On failure, report and return false. (Used for + // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.) + bool DoDefCFA(unsigned base_register, long offset); + + // Change the offset of the CFA rule to OFFSET, and return true. On + // failure, report and return false. (Subroutine for + // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.) + bool DoDefCFAOffset(long offset); + + // Specify that REG can be recovered using RULE, and return true. On + // failure, report and return false. + bool DoRule(unsigned reg, Rule* rule); + + // Specify that REG can be found at OFFSET from the CFA, and return true. + // On failure, report and return false. (Subroutine for DW_CFA_offset, + // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.) + bool DoOffset(unsigned reg, long offset); + + // Specify that the caller's value for REG is the CFA plus OFFSET, + // and return true. On failure, report and return false. (Subroutine + // for DW_CFA_val_offset and DW_CFA_val_offset_sf.) + bool DoValOffset(unsigned reg, long offset); + + // Restore REG to the rule established in the CIE, and return true. On + // failure, report and return false. (Subroutine for DW_CFA_restore and + // DW_CFA_restore_extended.) + bool DoRestore(unsigned reg); + + // Return the section offset of the instruction at cursor. For use + // in error messages. + uint64_t CursorOffset() { return entry_->offset + (cursor_ - entry_->start); } + + // Report that entry_ is incomplete, and return false. For brevity. + bool ReportIncomplete() { + reporter_->Incomplete(entry_->offset, entry_->kind); + return false; + } + + // For reading multi-byte values with the appropriate endianness. + ByteReader* reader_; + + // The handler to which we should report the data we find. + Handler* handler_; + + // For reporting problems in the info we're parsing. + Reporter* reporter_; + + // The code address to which the next instruction in the stream applies. + uint64_t address_; + + // The entry whose instructions we are currently processing. This is + // first a CIE, and then an FDE. + const Entry* entry_; + + // The next instruction to process. + const uint8_t* cursor_; + + // The current set of rules. + RuleMap rules_; + + // The set of rules established by the CIE, used by DW_CFA_restore + // and DW_CFA_restore_extended. We set this after interpreting the + // CIE's instructions. + RuleMap cie_rules_; + + // A stack of saved states, for DW_CFA_remember_state and + // DW_CFA_restore_state. + std::stack<RuleMap> saved_rules_; +}; + +bool CallFrameInfo::State::InterpretCIE(const CIE& cie) { + entry_ = &cie; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) + return false; + // Note the rules established by the CIE, for use by DW_CFA_restore + // and DW_CFA_restore_extended. + cie_rules_ = rules_; + return true; +} + +bool CallFrameInfo::State::InterpretFDE(const FDE& fde) { + entry_ = &fde; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) + return false; + return true; +} + +bool CallFrameInfo::State::ParseOperands(const char* format, + Operands* operands) { + size_t len; + const char* operand; + + for (operand = format; *operand; operand++) { + size_t bytes_left = entry_->end - cursor_; + switch (*operand) { + case 'r': + operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'o': + operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 's': + operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'a': + operands->offset = + reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding, + &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case '1': + if (1 > bytes_left) return ReportIncomplete(); + operands->offset = static_cast<unsigned char>(*cursor_++); + break; + + case '2': + if (2 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadTwoBytes(cursor_); + cursor_ += 2; + break; + + case '4': + if (4 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadFourBytes(cursor_); + cursor_ += 4; + break; + + case '8': + if (8 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadEightBytes(cursor_); + cursor_ += 8; + break; + + case 'e': { + size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left || expression_length > bytes_left - len) + return ReportIncomplete(); + cursor_ += len; + operands->expression = string(reinterpret_cast<const char*>(cursor_), + expression_length); + cursor_ += expression_length; + break; + } + + default: + assert(0); + } + } + + return true; +} + +bool CallFrameInfo::State::DoInstruction() { + CIE* cie = entry_->cie; + Operands ops; + + // Our entry's kind should have been set by now. + assert(entry_->kind != kUnknown); + + // We shouldn't have been invoked unless there were more + // instructions to parse. + assert(cursor_ < entry_->end); + + unsigned opcode = *cursor_++; + if ((opcode & 0xc0) != 0) { + switch (opcode & 0xc0) { + // Advance the address. + case DW_CFA_advance_loc: { + size_t code_offset = opcode & 0x3f; + address_ += code_offset * cie->code_alignment_factor; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset: + if (!ParseOperands("o", &ops) || + !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor)) + return false; + break; + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore: + if (!DoRestore(opcode & 0x3f)) return false; + break; + + // The 'if' above should have excluded this possibility. + default: + assert(0); + } + + // Return here, so the big switch below won't be indented. + return true; + } + + switch (opcode) { + // Set the address. + case DW_CFA_set_loc: + if (!ParseOperands("a", &ops)) return false; + address_ = ops.offset; + break; + + // Advance the address. + case DW_CFA_advance_loc1: + if (!ParseOperands("1", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc2: + if (!ParseOperands("2", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc4: + if (!ParseOperands("4", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_MIPS_advance_loc8: + if (!ParseOperands("8", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa: + if (!ParseOperands("ro", &ops) || + !DoDefCFA(ops.register_number, ops.offset)) + return false; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa_sf: + if (!ParseOperands("rs", &ops) || + !DoDefCFA(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Change the base register used to compute the CFA. + case DW_CFA_def_cfa_register: { + if (!ParseOperands("r", &ops)) return false; + Rule* cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + if (!DoDefCFA(ops.register_number, ops.offset)) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } else { + cfa_rule->SetBaseRegister(ops.register_number); + if (!cfa_rule->Handle(handler_, address_, + Handler::kCFARegister)) + return false; + } + break; + } + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset: + if (!ParseOperands("o", &ops) || + !DoDefCFAOffset(ops.offset)) + return false; + break; + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset_sf: + if (!ParseOperands("s", &ops) || + !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Specify an expression whose value is the CFA. + case DW_CFA_def_cfa_expression: { + if (!ParseOperands("e", &ops)) + return false; + Rule* rule = new ValExpressionRule(ops.expression); + rules_.SetCFARule(rule); + if (!rule->Handle(handler_, address_, + Handler::kCFARegister)) + return false; + break; + } + + // The register's value cannot be recovered. + case DW_CFA_undefined: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new UndefinedRule())) + return false; + break; + } + + // The register's value is unchanged from its value in the caller. + case DW_CFA_same_value: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new SameValueRule())) + return false; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_offset_extended_sf: + if (!ParseOperands("rs", &ops) || + !DoOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_GNU_negative_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + -ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset: + if (!ParseOperands("ro", &ops) || + !DoValOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset_sf: + if (!ParseOperands("rs", &ops) || + !DoValOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register has been saved in another register. + case DW_CFA_register: { + if (!ParseOperands("ro", &ops) || + !DoRule(ops.register_number, new RegisterRule(ops.offset))) + return false; + break; + } + + // An expression yields the address at which the register is saved. + case DW_CFA_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ExpressionRule(ops.expression))) + return false; + break; + } + + // An expression yields the caller's value for the register. + case DW_CFA_val_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ValExpressionRule(ops.expression))) + return false; + break; + } + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore_extended: + if (!ParseOperands("r", &ops) || + !DoRestore( ops.register_number)) + return false; + break; + + // Save the current set of rules on a stack. + case DW_CFA_remember_state: + saved_rules_.push(rules_); + break; + + // Pop the current set of rules off the stack. + case DW_CFA_restore_state: { + if (saved_rules_.empty()) { + reporter_->EmptyStateStack(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + const RuleMap& new_rules = saved_rules_.top(); + if (rules_.CFARule() && !new_rules.CFARule()) { + reporter_->ClearingCFARule(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + rules_.HandleTransitionTo(handler_, address_, new_rules); + rules_ = new_rules; + saved_rules_.pop(); + break; + } + + // No operation. (Padding instruction.) + case DW_CFA_nop: + break; + + // A SPARC register window save: Registers 8 through 15 (%o0-%o7) + // are saved in registers 24 through 31 (%i0-%i7), and registers + // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets + // (0-15 * the register size). The register numbers must be + // hard-coded. A GNU extension, and not a pretty one. + case DW_CFA_GNU_window_save: { + // Save %o0-%o7 in %i0-%i7. + for (int i = 8; i < 16; i++) + if (!DoRule(i, new RegisterRule(i + 16))) + return false; + // Save %l0-%l7 and %i0-%i7 at the CFA. + for (int i = 16; i < 32; i++) + // Assume that the byte reader's address size is the same as + // the architecture's register size. !@#%*^ hilarious. + if (!DoRule(i, new OffsetRule(Handler::kCFARegister, + (i - 16) * reader_->AddressSize()))) + return false; + break; + } + + // I'm not sure what this is. GDB doesn't use it for unwinding. + case DW_CFA_GNU_args_size: + if (!ParseOperands("o", &ops)) return false; + break; + + // An opcode we don't recognize. + default: { + reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } + + return true; +} + +bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) { + Rule* rule = new ValOffsetRule(base_register, offset); + rules_.SetCFARule(rule); + return rule->Handle(handler_, address_, + Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoDefCFAOffset(long offset) { + Rule* cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + cfa_rule->SetOffset(offset); + return cfa_rule->Handle(handler_, address_, + Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoRule(unsigned reg, Rule* rule) { + rules_.SetRegisterRule(reg, rule); + return rule->Handle(handler_, address_, reg); +} + +bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, + new OffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, + new ValOffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoRestore(unsigned reg) { + // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE. + if (entry_->kind == kCIE) { + reporter_->RestoreInCIE(entry_->offset, CursorOffset()); + return false; + } + Rule* rule = cie_rules_.RegisterRule(reg); + if (!rule) { + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + rule = new SameValueRule(); + } + return DoRule(reg, rule); +} + +bool CallFrameInfo::ReadEntryPrologue(const uint8_t* cursor, Entry* entry) { + const uint8_t* buffer_end = buffer_ + buffer_length_; + + // Initialize enough of ENTRY for use in error reporting. + entry->offset = cursor - buffer_; + entry->start = cursor; + entry->kind = kUnknown; + entry->end = NULL; + + // Read the initial length. This sets reader_'s offset size. + size_t length_size; + uint64_t length = reader_->ReadInitialLength(cursor, &length_size); + if (length_size > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); + cursor += length_size; + + // In a .eh_frame section, a length of zero marks the end of the series + // of entries. + if (length == 0 && eh_frame_) { + entry->kind = kTerminator; + entry->end = cursor; + return true; + } + + // Validate the length. + if (length > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); + + // The length is the number of bytes after the initial length field; + // we have that position handy at this point, so compute the end + // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine, + // and the length didn't fit in a size_t, we would have rejected it + // above.) + entry->end = cursor + length; + + // Parse the next field: either the offset of a CIE or a CIE id. + size_t offset_size = reader_->OffsetSize(); + if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); + entry->id = reader_->ReadOffset(cursor); + + // Don't advance cursor past id field yet; in .eh_frame data we need + // the id's position to compute the section offset of an FDE's CIE. + + // Now we can decide what kind of entry this is. + if (eh_frame_) { + // In .eh_frame data, an ID of zero marks the entry as a CIE, and + // anything else is an offset from the id field of the FDE to the start + // of the CIE. + if (entry->id == 0) { + entry->kind = kCIE; + } else { + entry->kind = kFDE; + // Turn the offset from the id into an offset from the buffer's start. + entry->id = (cursor - buffer_) - entry->id; + } + } else { + // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the + // offset size for the entry) marks the entry as a CIE, and anything + // else is the offset of the CIE from the beginning of the section. + if (offset_size == 4) + entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; + else { + assert(offset_size == 8); + entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + } + } + + // Now advance cursor past the id. + cursor += offset_size; + + // The fields specific to this kind of entry start here. + entry->fields = cursor; + + entry->cie = NULL; + + return true; +} + +bool CallFrameInfo::ReadCIEFields(CIE* cie) { + const uint8_t* cursor = cie->fields; + size_t len; + + assert(cie->kind == kCIE); + + // Prepare for early exit. + cie->version = 0; + cie->augmentation.clear(); + cie->code_alignment_factor = 0; + cie->data_alignment_factor = 0; + cie->return_address_register = 0; + cie->has_z_augmentation = false; + cie->pointer_encoding = DW_EH_PE_absptr; + cie->instructions = 0; + + // Parse the version number. + if (cie->end - cursor < 1) + return ReportIncomplete(cie); + cie->version = reader_->ReadOneByte(cursor); + cursor++; + + // If we don't recognize the version, we can't parse any more fields of the + // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a + // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well; + // the difference between those versions seems to be the same as for + // .debug_frame. + if (cie->version < 1 || cie->version > 4) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } + + const uint8_t* augmentation_start = cursor; + const uint8_t* augmentation_end = + reinterpret_cast<const uint8_t*>(memchr(augmentation_start, '\0', + cie->end - augmentation_start)); + if (! augmentation_end) return ReportIncomplete(cie); + cursor = augmentation_end; + cie->augmentation = string(reinterpret_cast<const char*>(augmentation_start), + cursor - augmentation_start); + // Skip the terminating '\0'. + cursor++; + + // Is this CFI augmented? + if (!cie->augmentation.empty()) { + // Is it an augmentation we recognize? + if (cie->augmentation[0] == DW_Z_augmentation_start) { + // Linux C++ ABI 'z' augmentation, used for exception handling data. + cie->has_z_augmentation = true; + } else { + // Not an augmentation we recognize. Augmentations can have arbitrary + // effects on the form of rest of the content, so we have to give up. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + + if (cie->version >= 4) { + cie->address_size = *cursor++; + if (cie->address_size != 8 && cie->address_size != 4) { + reporter_->UnexpectedAddressSize(cie->offset, cie->address_size); + return false; + } + + cie->segment_size = *cursor++; + if (cie->segment_size != 0) { + reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size); + return false; + } + } + + // Parse the code alignment factor. + cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the data alignment factor. + cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the return address register. This is a ubyte in version 1, and + // a ULEB128 in version 3. + if (cie->version == 1) { + if (cursor >= cie->end) return ReportIncomplete(cie); + cie->return_address_register = uint8_t(*cursor++); + } else { + cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + } + + // If we have a 'z' augmentation string, find the augmentation data and + // use the augmentation string to parse it. + if (cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len + data_size) + return ReportIncomplete(cie); + cursor += len; + const uint8_t* data = cursor; + cursor += data_size; + const uint8_t* data_end = cursor; + + cie->has_z_lsda = false; + cie->has_z_personality = false; + cie->has_z_signal_frame = false; + + // Walk the augmentation string, and extract values from the + // augmentation data as the string directs. + for (size_t i = 1; i < cie->augmentation.size(); i++) { + switch (cie->augmentation[i]) { + case DW_Z_has_LSDA: + // The CIE's augmentation data holds the language-specific data + // area pointer's encoding, and the FDE's augmentation data holds + // the pointer itself. + cie->has_z_lsda = true; + // Fetch the LSDA encoding from the augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->lsda_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->lsda_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); + return false; + } + // Don't check if the encoding is usable here --- we haven't + // read the FDE's fields yet, so we're not prepared for + // DW_EH_PE_funcrel, although that's a fine encoding for the + // LSDA to use, since it appears in the FDE. + break; + + case DW_Z_has_personality_routine: + // The CIE's augmentation data holds the personality routine + // pointer's encoding, followed by the pointer itself. + cie->has_z_personality = true; + // Fetch the personality routine pointer's encoding from the + // augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->personality_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->personality_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->personality_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + // Fetch the personality routine's pointer itself from the data. + cie->personality_address = + reader_->ReadEncodedPointer(data, cie->personality_encoding, + &len); + if (len > size_t(data_end - data)) + return ReportIncomplete(cie); + data += len; + break; + + case DW_Z_has_FDE_address_encoding: + // The CIE's augmentation data holds the pointer encoding to use + // for addresses in the FDE. + if (data >= data_end) return ReportIncomplete(cie); + cie->pointer_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->pointer_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->pointer_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + break; + + case DW_Z_is_signal_trampoline: + // Frames using this CIE are signal delivery frames. + cie->has_z_signal_frame = true; + break; + + default: + // An augmentation we don't recognize. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + } + + // The CIE's instructions start here. + cie->instructions = cursor; + + return true; +} + +bool CallFrameInfo::ReadFDEFields(FDE* fde) { + const uint8_t* cursor = fde->fields; + size_t size; + + fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, + &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + reader_->SetFunctionBase(fde->address); + + // For the length, we strip off the upper nybble of the encoding used for + // the starting address. + DwarfPointerEncoding length_encoding = + DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); + fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + + // If the CIE has a 'z' augmentation string, then augmentation data + // appears here. + if (fde->cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); + if (size_t(fde->end - cursor) < size + data_size) + return ReportIncomplete(fde); + cursor += size; + + // In the abstract, we should walk the augmentation string, and extract + // items from the FDE's augmentation data as we encounter augmentation + // string characters that specify their presence: the ordering of items + // in the augmentation string determines the arrangement of values in + // the augmentation data. + // + // In practice, there's only ever one value in FDE augmentation data + // that we support --- the LSDA pointer --- and we have to bail if we + // see any unrecognized augmentation string characters. So if there is + // anything here at all, we know what it is, and where it starts. + if (fde->cie->has_z_lsda) { + // Check whether the LSDA's pointer encoding is usable now: only once + // we've parsed the FDE's starting address do we call reader_-> + // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes + // usable. + if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { + reporter_->UnusablePointerEncoding(fde->cie->offset, + fde->cie->lsda_encoding); + return false; + } + + fde->lsda_address = + reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); + if (size > data_size) + return ReportIncomplete(fde); + // Ideally, we would also complain here if there were unconsumed + // augmentation data. + } + + cursor += data_size; + } + + // The FDE's instructions start after those. + fde->instructions = cursor; + + return true; +} + +bool CallFrameInfo::Start() { + const uint8_t* buffer_end = buffer_ + buffer_length_; + const uint8_t* cursor; + bool all_ok = true; + const uint8_t* entry_end; + bool ok; + + // Traverse all the entries in buffer_, skipping CIEs and offering + // FDEs to the handler. + for (cursor = buffer_; cursor < buffer_end; + cursor = entry_end, all_ok = all_ok && ok) { + FDE fde; + + // Make it easy to skip this entry with 'continue': assume that + // things are not okay until we've checked all the data, and + // prepare the address of the next entry. + ok = false; + + // Read the entry's prologue. + if (!ReadEntryPrologue(cursor, &fde)) { + if (!fde.end) { + // If we couldn't even figure out this entry's extent, then we + // must stop processing entries altogether. + all_ok = false; + break; + } + entry_end = fde.end; + continue; + } + + // The next iteration picks up after this entry. + entry_end = fde.end; + + // Did we see an .eh_frame terminating mark? + if (fde.kind == kTerminator) { + // If there appears to be more data left in the section after the + // terminating mark, warn the user. But this is just a warning; + // we leave all_ok true. + if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); + break; + } + + // In this loop, we skip CIEs. We only parse them fully when we + // parse an FDE that refers to them. This limits our memory + // consumption (beyond the buffer itself) to that needed to + // process the largest single entry. + if (fde.kind != kFDE) { + ok = true; + continue; + } + + // Validate the CIE pointer. + if (fde.id > buffer_length_) { + reporter_->CIEPointerOutOfRange(fde.offset, fde.id); + continue; + } + + CIE cie; + + // Parse this FDE's CIE header. + if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) + continue; + // This had better be an actual CIE. + if (cie.kind != kCIE) { + reporter_->BadCIEId(fde.offset, fde.id); + continue; + } + if (!ReadCIEFields(&cie)) + continue; + + // TODO(nbilling): This could lead to strange behavior if a single buffer + // contained a mixture of DWARF versions as well as address sizes. Not + // sure if it's worth handling such a case. + + // DWARF4 CIE specifies address_size, so use it for this call frame. + if (cie.version >= 4) { + reader_->SetAddressSize(cie.address_size); + } + + // We now have the values that govern both the CIE and the FDE. + cie.cie = &cie; + fde.cie = &cie; + + // Parse the FDE's header. + if (!ReadFDEFields(&fde)) + continue; + + // Call Entry to ask the consumer if they're interested. + if (!handler_->Entry(fde.offset, fde.address, fde.size, + cie.version, cie.augmentation, + cie.return_address_register)) { + // The handler isn't interested in this entry. That's not an error. + ok = true; + continue; + } + + if (cie.has_z_augmentation) { + // Report the personality routine address, if we have one. + if (cie.has_z_personality) { + if (!handler_ + ->PersonalityRoutine(cie.personality_address, + IsIndirectEncoding(cie.personality_encoding))) + continue; + } + + // Report the language-specific data area address, if we have one. + if (cie.has_z_lsda) { + if (!handler_ + ->LanguageSpecificDataArea(fde.lsda_address, + IsIndirectEncoding(cie.lsda_encoding))) + continue; + } + + // If this is a signal-handling frame, report that. + if (cie.has_z_signal_frame) { + if (!handler_->SignalHandler()) + continue; + } + } + + // Interpret the CIE's instructions, and then the FDE's instructions. + State state(reader_, handler_, reporter_, fde.address); + ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); + + // Tell the ByteReader that the function start address from the + // FDE header is no longer valid. + reader_->ClearFunctionBase(); + + // Report the end of the entry. + handler_->End(); + } + + return all_ok; +} + +const char* CallFrameInfo::KindName(EntryKind kind) { + if (kind == CallFrameInfo::kUnknown) + return "entry"; + else if (kind == CallFrameInfo::kCIE) + return "common information entry"; + else if (kind == CallFrameInfo::kFDE) + return "frame description entry"; + else { + assert (kind == CallFrameInfo::kTerminator); + return ".eh_frame sequence terminator"; + } +} + +bool CallFrameInfo::ReportIncomplete(Entry* entry) { + reporter_->Incomplete(entry->offset, entry->kind); + return false; +} + +void CallFrameInfo::Reporter::Incomplete(uint64_t offset, + CallFrameInfo::EntryKind kind) { + fprintf(stderr, + "%s: CFI %s at offset 0x%" PRIx64 " in '%s': entry ends early\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str()); +} + +void CallFrameInfo::Reporter::EarlyEHTerminator(uint64_t offset) { + fprintf(stderr, + "%s: CFI at offset 0x%" PRIx64 " in '%s': saw end-of-data marker" + " before end of section contents\n", + filename_.c_str(), offset, section_.c_str()); +} + +void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64_t offset, + uint64_t cie_offset) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':" + " CIE pointer is out of range: 0x%" PRIx64 "\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); +} + +void CallFrameInfo::Reporter::BadCIEId(uint64_t offset, uint64_t cie_offset) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':" + " CIE pointer does not point to a CIE: 0x%" PRIx64 "\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); +} + +void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64_t offset, + uint8_t address_size) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':" + " CIE specifies unexpected address size: %d\n", + filename_.c_str(), offset, section_.c_str(), address_size); +} + +void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64_t offset, + uint8_t segment_size) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':" + " CIE specifies unexpected segment size: %d\n", + filename_.c_str(), offset, section_.c_str(), segment_size); +} + +void CallFrameInfo::Reporter::UnrecognizedVersion(uint64_t offset, int version) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':" + " CIE specifies unrecognized version: %d\n", + filename_.c_str(), offset, section_.c_str(), version); +} + +void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64_t offset, + const string& aug) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':" + " CIE specifies unrecognized augmentation: '%s'\n", + filename_.c_str(), offset, section_.c_str(), aug.c_str()); +} + +void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64_t offset, + uint8_t encoding) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':" + " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); +} + +void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64_t offset, + uint8_t encoding) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':" + " 'z' augmentation specifies a pointer encoding for which" + " we have no base address: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); +} + +void CallFrameInfo::Reporter::RestoreInCIE(uint64_t offset, uint64_t insn_offset) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':" + " the DW_CFA_restore instruction at offset 0x%" PRIx64 + " cannot be used in a common information entry\n", + filename_.c_str(), offset, section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::BadInstruction(uint64_t offset, + CallFrameInfo::EntryKind kind, + uint64_t insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':" + " the instruction at offset 0x%" PRIx64 " is unrecognized\n", + filename_.c_str(), CallFrameInfo::KindName(kind), + offset, section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::NoCFARule(uint64_t offset, + CallFrameInfo::EntryKind kind, + uint64_t insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':" + " the instruction at offset 0x%" PRIx64 " assumes that a CFA rule has" + " been set, but none has been set\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::EmptyStateStack(uint64_t offset, + CallFrameInfo::EntryKind kind, + uint64_t insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%" PRIx64 + " should pop a saved state from the stack, but the stack is empty\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::ClearingCFARule(uint64_t offset, + CallFrameInfo::EntryKind kind, + uint64_t insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%" PRIx64 + " would clear the CFA rule in effect\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +} // namespace google_breakpad diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.h b/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.h new file mode 100644 index 0000000000..97e5fea90a --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.h @@ -0,0 +1,1494 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file contains definitions related to the DWARF2/3 reader and +// it's handler interfaces. +// The DWARF2/3 specification can be found at +// http://dwarf.freestandards.org and should be considered required +// reading if you wish to modify the implementation. +// Only a cursory attempt is made to explain terminology that is +// used here, as it is much better explained in the standard documents +#ifndef COMMON_DWARF_DWARF2READER_H__ +#define COMMON_DWARF_DWARF2READER_H__ + +#include <assert.h> +#include <stdint.h> + +#include <list> +#include <map> +#include <string> +#include <utility> +#include <vector> +#include <memory> + +#include "common/dwarf/bytereader.h" +#include "common/dwarf/dwarf2enums.h" +#include "common/dwarf/types.h" +#include "common/using_std_string.h" +#include "common/dwarf/elf_reader.h" + +namespace google_breakpad { +struct LineStateMachine; +class Dwarf2Handler; +class LineInfoHandler; +class DwpReader; + +// This maps from a string naming a section to a pair containing a +// the data for the section, and the size of the section. +typedef std::map<string, std::pair<const uint8_t*, uint64_t> > SectionMap; + +// Abstract away the difference between elf and mach-o section names. +// Elf-names use ".section_name, mach-o uses "__section_name". Pass "name" in +// the elf form, ".section_name". +const SectionMap::const_iterator GetSectionByName(const SectionMap& + sections, const char* name); + +// Most of the time, this struct functions as a simple attribute and form pair. +// However, Dwarf5 DW_FORM_implicit_const means that a form may have its value +// in line in the abbrev table, and that value must be associated with the +// pair until the attr's value is needed. +struct AttrForm { + AttrForm(enum DwarfAttribute attr, enum DwarfForm form, uint64_t value) : + attr_(attr), form_(form), value_(value) { } + + enum DwarfAttribute attr_; + enum DwarfForm form_; + uint64_t value_; +}; +typedef std::list<AttrForm> AttributeList; +typedef AttributeList::iterator AttributeIterator; +typedef AttributeList::const_iterator ConstAttributeIterator; + +struct LineInfoHeader { + uint64_t total_length; + uint16_t version; + uint64_t prologue_length; + uint8_t min_insn_length; // insn stands for instructin + bool default_is_stmt; // stmt stands for statement + int8_t line_base; + uint8_t line_range; + uint8_t opcode_base; + // Use a pointer so that signalsafe_addr2line is able to use this structure + // without heap allocation problem. + std::vector<unsigned char>* std_opcode_lengths; +}; + +class LineInfo { + public: + + // Initializes a .debug_line reader. Buffer and buffer length point + // to the beginning and length of the line information to read. + // Reader is a ByteReader class that has the endianness set + // properly. + LineInfo(const uint8_t* buffer, uint64_t buffer_length, + ByteReader* reader, const uint8_t* string_buffer, + size_t string_buffer_length, const uint8_t* line_string_buffer, + size_t line_string_buffer_length, LineInfoHandler* handler); + + virtual ~LineInfo() { + if (header_.std_opcode_lengths) { + delete header_.std_opcode_lengths; + } + } + + // Start processing line info, and calling callbacks in the handler. + // Consumes the line number information for a single compilation unit. + // Returns the number of bytes processed. + uint64_t Start(); + + // Process a single line info opcode at START using the state + // machine at LSM. Return true if we should define a line using the + // current state of the line state machine. Place the length of the + // opcode in LEN. + // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm + // passes the address of PC. In other words, LSM_PASSES_PC will be + // set to true, if the following condition is met. + // + // lsm's old address < PC <= lsm's new address + static bool ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader& header, + const uint8_t* start, + struct LineStateMachine* lsm, + size_t* len, + uintptr pc, + bool* lsm_passes_pc); + + private: + // Reads the DWARF2/3 header for this line info. + void ReadHeader(); + + // Reads the DWARF2/3 line information + void ReadLines(); + + // Read the DWARF5 types and forms for the file and directory tables. + void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types, + uint32_t* content_forms, uint32_t max_types, + uint32_t* format_count); + + // Read a row from the dwarf5 LineInfo file table. + void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types, + const uint32_t* content_forms, uint32_t row, + uint32_t format_count); + + // Read and return the data at *lineptr according to form. Advance + // *lineptr appropriately. + uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr); + + // Read and return the data at *lineptr according to form. Advance + // *lineptr appropriately. + const char* ReadStringForm(uint32_t form, const uint8_t** lineptr); + + // The associated handler to call processing functions in + LineInfoHandler* handler_; + + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // A DWARF line info header. This is not the same size as in the actual file, + // as the one in the file may have a 32 bit or 64 bit lengths + + struct LineInfoHeader header_; + + // buffer is the buffer for our line info, starting at exactly where + // the line info to read is. after_header is the place right after + // the end of the line information header. + const uint8_t* buffer_; +#ifndef NDEBUG + uint64_t buffer_length_; +#endif + // Convenience pointers into .debug_str and .debug_line_str. These exactly + // correspond to those in the compilation unit. + const uint8_t* string_buffer_; +#ifndef NDEBUG + uint64_t string_buffer_length_; +#endif + const uint8_t* line_string_buffer_; +#ifndef NDEBUG + uint64_t line_string_buffer_length_; +#endif + + const uint8_t* after_header_; +}; + +// This class is the main interface between the line info reader and +// the client. The virtual functions inside this get called for +// interesting events that happen during line info reading. The +// default implementation does nothing + +class LineInfoHandler { + public: + LineInfoHandler() { } + + virtual ~LineInfoHandler() { } + + // Called when we define a directory. NAME is the directory name, + // DIR_NUM is the directory number + virtual void DefineDir(const string& name, uint32_t dir_num) { } + + // Called when we define a filename. NAME is the filename, FILE_NUM + // is the file number which is -1 if the file index is the next + // index after the last numbered index (this happens when files are + // dynamically defined by the line program), DIR_NUM is the + // directory index for the directory name of this file, MOD_TIME is + // the modification time of the file, and LENGTH is the length of + // the file + virtual void DefineFile(const string& name, int32_t file_num, + uint32_t dir_num, uint64_t mod_time, + uint64_t length) { } + + // Called when the line info reader has a new line, address pair + // ready for us. ADDRESS is the address of the code, LENGTH is the + // length of its machine code in bytes, FILE_NUM is the file number + // containing the code, LINE_NUM is the line number in that file for + // the code, and COLUMN_NUM is the column number the code starts at, + // if we know it (0 otherwise). + virtual void AddLine(uint64_t address, uint64_t length, + uint32_t file_num, uint32_t line_num, uint32_t column_num) { } +}; + +class RangeListHandler { + public: + RangeListHandler() { } + + virtual ~RangeListHandler() { } + + // Add a range. + virtual void AddRange(uint64_t begin, uint64_t end) { }; + + // Finish processing the range list. + virtual void Finish() { }; +}; + +class RangeListReader { + public: + // Reading a range list requires quite a bit of information + // from the compilation unit. Package it conveniently. + struct CURangesInfo { + CURangesInfo() : + version_(0), base_address_(0), ranges_base_(0), + buffer_(nullptr), size_(0), addr_buffer_(nullptr), + addr_buffer_size_(0), addr_base_(0) { } + + uint16_t version_; + // Ranges base address. Ordinarily the CU's low_pc. + uint64_t base_address_; + // Offset into .debug_rnglists for this CU's rangelists. + uint64_t ranges_base_; + // Contents of either .debug_ranges or .debug_rnglists. + const uint8_t* buffer_; + uint64_t size_; + // Contents of .debug_addr. This cu's contribution starts at + // addr_base_ + const uint8_t* addr_buffer_; + uint64_t addr_buffer_size_; + uint64_t addr_base_; + }; + + RangeListReader(ByteReader* reader, CURangesInfo* cu_info, + RangeListHandler* handler) : + reader_(reader), cu_info_(cu_info), handler_(handler), + offset_array_(0) { } + + // Read ranges from cu_info as specified by form and data. + bool ReadRanges(enum DwarfForm form, uint64_t data); + + private: + // Read dwarf4 .debug_ranges at offset. + bool ReadDebugRanges(uint64_t offset); + // Read dwarf5 .debug_rngslist at offset. + bool ReadDebugRngList(uint64_t offset); + + // Convenience functions to handle the mechanics of reading entries in the + // ranges section. + uint64_t ReadULEB(uint64_t offset, uint64_t* value) { + size_t len; + *value = reader_->ReadUnsignedLEB128(cu_info_->buffer_ + offset, &len); + return len; + } + + uint64_t ReadAddress(uint64_t offset, uint64_t* value) { + *value = reader_->ReadAddress(cu_info_->buffer_ + offset); + return reader_->AddressSize(); + } + + // Read the address at this CU's addr_index in the .debug_addr section. + uint64_t GetAddressAtIndex(uint64_t addr_index) { + assert(cu_info_->addr_buffer_ != nullptr); + uint64_t offset = + cu_info_->addr_base_ + addr_index * reader_->AddressSize(); + assert(offset < cu_info_->addr_buffer_size_); + return reader_->ReadAddress(cu_info_->addr_buffer_ + offset); + } + + ByteReader* reader_; + CURangesInfo* cu_info_; + RangeListHandler* handler_; + uint64_t offset_array_; +}; + +// This class is the main interface between the reader and the +// client. The virtual functions inside this get called for +// interesting events that happen during DWARF2 reading. +// The default implementation skips everything. +class Dwarf2Handler { + public: + Dwarf2Handler() { } + + virtual ~Dwarf2Handler() { } + + // Start to process a compilation unit at OFFSET from the beginning of the + // .debug_info section. Return false if you would like to skip this + // compilation unit. + virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size, + uint8_t offset_size, uint64_t cu_length, + uint8_t dwarf_version) { return false; } + + // When processing a skeleton compilation unit, resulting from a split + // DWARF compilation, once the skeleton debug info has been read, + // the reader will call this function to ask the client if it needs + // the full debug info from the .dwo or .dwp file. Return true if + // you need it, or false to skip processing the split debug info. + virtual bool NeedSplitDebugInfo() { return true; } + + // Start to process a split compilation unit at OFFSET from the beginning of + // the debug_info section in the .dwp/.dwo file. Return false if you would + // like to skip this compilation unit. + virtual bool StartSplitCompilationUnit(uint64_t offset, + uint64_t cu_length) { return false; } + + // Start to process a DIE at OFFSET from the beginning of the .debug_info + // section. Return false if you would like to skip this DIE. + virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; } + + // Called when we have an attribute with unsigned data to give to our + // handler. The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeUnsigned(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data) { } + + // Called when we have an attribute with signed data to give to our handler. + // The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeSigned(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64_t data) { } + + // Called when we have an attribute whose value is a reference to + // another DIE. The attribute belongs to the DIE at OFFSET from the + // beginning of the .debug_info section. Its name is ATTR, its form + // is FORM, and the offset of the DIE being referred to from the + // beginning of the .debug_info section is DATA. + virtual void ProcessAttributeReference(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data) { } + + // Called when we have an attribute with a buffer of data to give to our + // handler. The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, DATA points to + // the buffer's contents, and its length in bytes is LENGTH. The buffer is + // owned by the caller, not the callee, and may not persist for very long. + // If you want the data to be available later, it needs to be copied. + virtual void ProcessAttributeBuffer(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const uint8_t* data, + uint64_t len) { } + + // Called when we have an attribute with string data to give to our handler. + // The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeString(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string& data) { } + + // Called when we have an attribute whose value is the 64-bit signature + // of a type unit in the .debug_types section. OFFSET is the offset of + // the DIE whose attribute we're reporting. ATTR and FORM are the + // attribute's name and form. SIGNATURE is the type unit's signature. + virtual void ProcessAttributeSignature(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t signature) { } + + // Called when finished processing the DIE at OFFSET. + // Because DWARF2/3 specifies a tree of DIEs, you may get starts + // before ends of the previous DIE, as we process children before + // ending the parent. + virtual void EndDIE(uint64_t offset) { } + +}; + +// The base of DWARF2/3 debug info is a DIE (Debugging Information +// Entry. +// DWARF groups DIE's into a tree and calls the root of this tree a +// "compilation unit". Most of the time, there is one compilation +// unit in the .debug_info section for each file that had debug info +// generated. +// Each DIE consists of + +// 1. a tag specifying a thing that is being described (ie +// DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc +// 2. attributes (such as DW_AT_location for location in memory, +// DW_AT_name for name), and data for each attribute. +// 3. A flag saying whether the DIE has children or not + +// In order to gain some amount of compression, the format of +// each DIE (tag name, attributes and data forms for the attributes) +// are stored in a separate table called the "abbreviation table". +// This is done because a large number of DIEs have the exact same tag +// and list of attributes, but different data for those attributes. +// As a result, the .debug_info section is just a stream of data, and +// requires reading of the .debug_abbrev section to say what the data +// means. + +// As a warning to the user, it should be noted that the reason for +// using absolute offsets from the beginning of .debug_info is that +// DWARF2/3 supports referencing DIE's from other DIE's by their offset +// from either the current compilation unit start, *or* the beginning +// of the .debug_info section. This means it is possible to reference +// a DIE in one compilation unit from a DIE in another compilation +// unit. This style of reference is usually used to eliminate +// duplicated information that occurs across compilation +// units, such as base types, etc. GCC 3.4+ support this with +// -feliminate-dwarf2-dups. Other toolchains will sometimes do +// duplicate elimination in the linker. + +class CompilationUnit { + public: + + // Initialize a compilation unit. This requires a map of sections, + // the offset of this compilation unit in the .debug_info section, a + // ByteReader, and a Dwarf2Handler class to call callbacks in. + CompilationUnit(const string& path, const SectionMap& sections, + uint64_t offset, ByteReader* reader, Dwarf2Handler* handler); + virtual ~CompilationUnit() { + if (abbrevs_) delete abbrevs_; + } + + // Initialize a compilation unit from a .dwo or .dwp file. + // In this case, we need the .debug_addr section from the + // executable file that contains the corresponding skeleton + // compilation unit. We also inherit the Dwarf2Handler from + // the executable file, and call it as if we were still + // processing the original compilation unit. + void SetSplitDwarf(const uint8_t* addr_buffer, uint64_t addr_buffer_length, + uint64_t addr_base, uint64_t ranges_base, uint64_t dwo_id); + + // Begin reading a Dwarf2 compilation unit, and calling the + // callbacks in the Dwarf2Handler + + // Return the full length of the compilation unit, including + // headers. This plus the starting offset passed to the constructor + // is the offset of the end of the compilation unit --- and the + // start of the next compilation unit, if there is one. + uint64_t Start(); + + private: + + // This struct represents a single DWARF2/3 abbreviation + // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a + // tag and a list of attributes, as well as the data form of each attribute. + struct Abbrev { + uint64_t number; + enum DwarfTag tag; + bool has_children; + AttributeList attributes; + }; + + // A DWARF2/3 compilation unit header. This is not the same size as + // in the actual file, as the one in the file may have a 32 bit or + // 64 bit length. + struct CompilationUnitHeader { + uint64_t length; + uint16_t version; + uint64_t abbrev_offset; + uint8_t address_size; + } header_; + + // Reads the DWARF2/3 header for this compilation unit. + void ReadHeader(); + + // Reads the DWARF2/3 abbreviations for this compilation unit + void ReadAbbrevs(); + + // Read the abbreviation offset for this compilation unit + size_t ReadAbbrevOffset(const uint8_t* headerptr); + + // Read the address size for this compilation unit + size_t ReadAddressSize(const uint8_t* headerptr); + + // Read the DWO id from a split or skeleton compilation unit header + size_t ReadDwoId(const uint8_t* headerptr); + + // Read the type signature from a type or split type compilation unit header + size_t ReadTypeSignature(const uint8_t* headerptr); + + // Read the DWO id from a split or skeleton compilation unit header + size_t ReadTypeOffset(const uint8_t* headerptr); + + // Processes a single DIE for this compilation unit and return a new + // pointer just past the end of it + const uint8_t* ProcessDIE(uint64_t dieoffset, + const uint8_t* start, + const Abbrev& abbrev); + + // Processes a single attribute and return a new pointer just past the + // end of it + const uint8_t* ProcessAttribute(uint64_t dieoffset, + const uint8_t* start, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t implicit_const); + + // Special version of ProcessAttribute, for finding str_offsets_base and + // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5. + const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset, + const uint8_t* start, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t implicit_const); + + // Called when we have an attribute with unsigned data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + // If we see a DW_AT_GNU_dwo_id attribute, save the value so that + // we can find the debug info in a .dwo or .dwp file. + void ProcessAttributeUnsigned(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data) { + if (attr == DW_AT_GNU_dwo_id) { + dwo_id_ = data; + } + else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) { + addr_base_ = data; + } + else if (attr == DW_AT_str_offsets_base) { + str_offsets_base_ = data; + } + else if (attr == DW_AT_GNU_ranges_base || attr == DW_AT_rnglists_base) { + ranges_base_ = data; + } + // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5, + // that base will apply to DW_AT_ranges attributes in the + // skeleton CU as well as in the .dwo/.dwp files. + else if (attr == DW_AT_ranges && is_split_dwarf_) { + data += ranges_base_; + } + handler_->ProcessAttributeUnsigned(offset, attr, form, data); + } + + // Called when we have an attribute with signed data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + void ProcessAttributeSigned(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64_t data) { + handler_->ProcessAttributeSigned(offset, attr, form, data); + } + + // Called when we have an attribute with a buffer of data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA, and the + // length of the buffer is LENGTH. + void ProcessAttributeBuffer(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const uint8_t* data, + uint64_t len) { + handler_->ProcessAttributeBuffer(offset, attr, form, data, len); + } + + // Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx, + // DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4. + // Retrieves the data and calls through to ProcessAttributeString. + void ProcessFormStringIndex(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t str_index); + + // Called when we have an attribute with string data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + // If we see a DW_AT_GNU_dwo_name attribute, save the value so + // that we can find the debug info in a .dwo or .dwp file. + void ProcessAttributeString(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data) { + if (attr == DW_AT_GNU_dwo_name || attr == DW_AT_dwo_name) + dwo_name_ = data; + handler_->ProcessAttributeString(offset, attr, form, data); + } + + // Called to handle common portions of DW_FORM_addrx and variations, as well + // as DW_FORM_GNU_addr_index. + void ProcessAttributeAddrIndex(uint64_t offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t addr_index) { + const uint8_t* addr_ptr = + addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize(); + ProcessAttributeUnsigned( + offset, attr, form, reader_->ReadAddress(addr_ptr)); + } + + // Processes all DIEs for this compilation unit + void ProcessDIEs(); + + // Skips the die with attributes specified in ABBREV starting at + // START, and return the new place to position the stream to. + const uint8_t* SkipDIE(const uint8_t* start, const Abbrev& abbrev); + + // Skips the attribute starting at START, with FORM, and return the + // new place to position the stream to. + const uint8_t* SkipAttribute(const uint8_t* start, enum DwarfForm form); + + // Process the actual debug information in a split DWARF file. + void ProcessSplitDwarf(); + + // Read the debug sections from a .dwo file. + void ReadDebugSectionsFromDwo(ElfReader* elf_reader, + SectionMap* sections); + + // Path of the file containing the debug information. + const string path_; + + // Offset from section start is the offset of this compilation unit + // from the beginning of the .debug_info section. + uint64_t offset_from_section_start_; + + // buffer is the buffer for our CU, starting at .debug_info + offset + // passed in from constructor. + // after_header points to right after the compilation unit header. + const uint8_t* buffer_; + uint64_t buffer_length_; + const uint8_t* after_header_; + + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // The map of sections in our file to buffers containing their data + const SectionMap& sections_; + + // The associated handler to call processing functions in + Dwarf2Handler* handler_; + + // Set of DWARF2/3 abbreviations for this compilation unit. Indexed + // by abbreviation number, which means that abbrevs_[0] is not + // valid. + std::vector<Abbrev>* abbrevs_; + + // String section buffer and length, if we have a string section. + // This is here to avoid doing a section lookup for strings in + // ProcessAttribute, which is in the hot path for DWARF2 reading. + const uint8_t* string_buffer_; + uint64_t string_buffer_length_; + + // Similarly for .debug_line_string. + const uint8_t* line_string_buffer_; + uint64_t line_string_buffer_length_; + + // String offsets section buffer and length, if we have a string offsets + // section (.debug_str_offsets or .debug_str_offsets.dwo). + const uint8_t* str_offsets_buffer_; + uint64_t str_offsets_buffer_length_; + + // Address section buffer and length, if we have an address section + // (.debug_addr). + const uint8_t* addr_buffer_; + uint64_t addr_buffer_length_; + + // Flag indicating whether this compilation unit is part of a .dwo + // or .dwp file. If true, we are reading this unit because a + // skeleton compilation unit in an executable file had a + // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. + // In a .dwo file, we expect the string offsets section to + // have a ".dwo" suffix, and we will use the ".debug_addr" section + // associated with the skeleton compilation unit. + bool is_split_dwarf_; + + // Flag indicating if it's a Type Unit (only applicable to DWARF v5). + bool is_type_unit_; + + // The value of the DW_AT_GNU_dwo_id attribute, if any. + uint64_t dwo_id_; + + // The value of the DW_AT_GNU_type_signature attribute, if any. + uint64_t type_signature_; + + // The value of the DW_AT_GNU_type_offset attribute, if any. + size_t type_offset_; + + // The value of the DW_AT_GNU_dwo_name attribute, if any. + const char* dwo_name_; + + // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute + // from the skeleton CU. + uint64_t skeleton_dwo_id_; + + // The value of the DW_AT_GNU_ranges_base or DW_AT_rnglists_base attribute, + // if any. + uint64_t ranges_base_; + + // The value of the DW_AT_GNU_addr_base attribute, if any. + uint64_t addr_base_; + + // The value of DW_AT_str_offsets_base attribute, if any. + uint64_t str_offsets_base_; + + // True if we have already looked for a .dwp file. + bool have_checked_for_dwp_; + + // Path to the .dwp file. + string dwp_path_; + + // ByteReader for the DWP file. + std::unique_ptr<ByteReader> dwp_byte_reader_; + + // DWP reader. + std::unique_ptr<DwpReader> dwp_reader_; +}; + +// A Reader for a .dwp file. Supports the fetching of DWARF debug +// info for a given dwo_id. +// +// There are two versions of .dwp files. In both versions, the +// .dwp file is an ELF file containing only debug sections. +// In Version 1, the file contains many copies of each debug +// section, one for each .dwo file that is packaged in the .dwp +// file, and the .debug_cu_index section maps from the dwo_id +// to a set of section indexes. In Version 2, the file contains +// one of each debug section, and the .debug_cu_index section +// maps from the dwo_id to a set of offsets and lengths that +// identify each .dwo file's contribution to the larger sections. + +class DwpReader { + public: + DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); + + ~DwpReader(); + + // Read the CU index and initialize data members. + void Initialize(); + + // Read the debug sections for the given dwo_id. + void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections); + + private: + // Search a v1 hash table for "dwo_id". Returns the slot index + // where the dwo_id was found, or -1 if it was not found. + int LookupCU(uint64_t dwo_id); + + // Search a v2 hash table for "dwo_id". Returns the row index + // in the offsets and sizes tables, or 0 if it was not found. + uint32_t LookupCUv2(uint64_t dwo_id); + + // The ELF reader for the .dwp file. + ElfReader* elf_reader_; + + // The ByteReader for the .dwp file. + const ByteReader& byte_reader_; + + // Pointer to the .debug_cu_index section. + const char* cu_index_; + + // Size of the .debug_cu_index section. + size_t cu_index_size_; + + // Pointer to the .debug_str.dwo section. + const char* string_buffer_; + + // Size of the .debug_str.dwo section. + size_t string_buffer_size_; + + // Version of the .dwp file. We support versions 1 and 2 currently. + int version_; + + // Number of columns in the section tables (version 2). + unsigned int ncolumns_; + + // Number of units in the section tables (version 2). + unsigned int nunits_; + + // Number of slots in the hash table. + unsigned int nslots_; + + // Pointer to the beginning of the hash table. + const char* phash_; + + // Pointer to the beginning of the index table. + const char* pindex_; + + // Pointer to the beginning of the section index pool (version 1). + const char* shndx_pool_; + + // Pointer to the beginning of the section offset table (version 2). + const char* offset_table_; + + // Pointer to the beginning of the section size table (version 2). + const char* size_table_; + + // Contents of the sections of interest (version 2). + const char* abbrev_data_; + size_t abbrev_size_; + const char* info_data_; + size_t info_size_; + const char* str_offsets_data_; + size_t str_offsets_size_; +}; + +// This class is a reader for DWARF's Call Frame Information. CFI +// describes how to unwind stack frames --- even for functions that do +// not follow fixed conventions for saving registers, whose frame size +// varies as they execute, etc. +// +// CFI describes, at each machine instruction, how to compute the +// stack frame's base address, how to find the return address, and +// where to find the saved values of the caller's registers (if the +// callee has stashed them somewhere to free up the registers for its +// own use). +// +// For example, suppose we have a function whose machine code looks +// like this (imagine an assembly language that looks like C, for a +// machine with 32-bit registers, and a stack that grows towards lower +// addresses): +// +// func: ; entry point; return address at sp +// func+0: sp = sp - 16 ; allocate space for stack frame +// func+1: sp[12] = r0 ; save r0 at sp+12 +// ... ; other code, not frame-related +// func+10: sp -= 4; *sp = x ; push some x on the stack +// ... ; other code, not frame-related +// func+20: r0 = sp[16] ; restore saved r0 +// func+21: sp += 20 ; pop whole stack frame +// func+22: pc = *sp; sp += 4 ; pop return address and jump to it +// +// DWARF CFI is (a very compressed representation of) a table with a +// row for each machine instruction address and a column for each +// register showing how to restore it, if possible. +// +// A special column named "CFA", for "Canonical Frame Address", tells how +// to compute the base address of the frame; registers' entries may +// refer to the CFA in describing where the registers are saved. +// +// Another special column, named "RA", represents the return address. +// +// For example, here is a complete (uncompressed) table describing the +// function above: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 cfa[0] +// func+2: sp+16 cfa[-4] cfa[0] +// func+11: sp+20 cfa[-4] cfa[0] +// func+21: sp+20 cfa[0] +// func+22: sp cfa[0] +// +// Some things to note here: +// +// - Each row describes the state of affairs *before* executing the +// instruction at the given address. Thus, the row for func+0 +// describes the state before we allocate the stack frame. In the +// next row, the formula for computing the CFA has changed, +// reflecting that allocation. +// +// - The other entries are written in terms of the CFA; this allows +// them to remain unchanged as the stack pointer gets bumped around. +// For example, the rule for recovering the return address (the "ra" +// column) remains unchanged throughout the function, even as the +// stack pointer takes on three different offsets from the return +// address. +// +// - Although we haven't shown it, most calling conventions designate +// "callee-saves" and "caller-saves" registers. The callee must +// preserve the values of callee-saves registers; if it uses them, +// it must save their original values somewhere, and restore them +// before it returns. In contrast, the callee is free to trash +// caller-saves registers; if the callee uses these, it will +// probably not bother to save them anywhere, and the CFI will +// probably mark their values as "unrecoverable". +// +// (However, since the caller cannot assume the callee was going to +// save them, caller-saves registers are probably dead in the caller +// anyway, so compilers usually don't generate CFA for caller-saves +// registers.) +// +// - Exactly where the CFA points is a matter of convention that +// depends on the architecture and ABI in use. In the example, the +// CFA is the value the stack pointer had upon entry to the +// function, pointing at the saved return address. But on the x86, +// the call frame information generated by GCC follows the +// convention that the CFA is the address *after* the saved return +// address. +// +// But by definition, the CFA remains constant throughout the +// lifetime of the frame. This makes it a useful value for other +// columns to refer to. It is also gives debuggers a useful handle +// for identifying a frame. +// +// If you look at the table above, you'll notice that a given entry is +// often the same as the one immediately above it: most instructions +// change only one or two aspects of the stack frame, if they affect +// it at all. The DWARF format takes advantage of this fact, and +// reduces the size of the data by mentioning only the addresses and +// columns at which changes take place. So for the above, DWARF CFI +// data would only actually mention the following: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 +// func+2: cfa[-4] +// func+11: sp+20 +// func+21: r0 +// func+22: sp +// +// In fact, this is the way the parser reports CFI to the consumer: as +// a series of statements of the form, "At address X, column Y changed +// to Z," and related conventions for describing the initial state. +// +// Naturally, it would be impractical to have to scan the entire +// program's CFI, noting changes as we go, just to recover the +// unwinding rules in effect at one particular instruction. To avoid +// this, CFI data is grouped into "entries", each of which covers a +// specified range of addresses and begins with a complete statement +// of the rules for all recoverable registers at that starting +// address. Each entry typically covers a single function. +// +// Thus, to compute the contents of a given row of the table --- that +// is, rules for recovering the CFA, RA, and registers at a given +// instruction --- the consumer should find the entry that covers that +// instruction's address, start with the initial state supplied at the +// beginning of the entry, and work forward until it has processed all +// the changes up to and including those for the present instruction. +// +// There are seven kinds of rules that can appear in an entry of the +// table: +// +// - "undefined": The given register is not preserved by the callee; +// its value cannot be recovered. +// +// - "same value": This register has the same value it did in the callee. +// +// - offset(N): The register is saved at offset N from the CFA. +// +// - val_offset(N): The value the register had in the caller is the +// CFA plus offset N. (This is usually only useful for describing +// the stack pointer.) +// +// - register(R): The register's value was saved in another register R. +// +// - expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the address at which the +// register was saved. +// +// - val_expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the value the register +// had in the caller. + +class CallFrameInfo { + public: + // The different kinds of entries one finds in CFI. Used internally, + // and for error reporting. + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; + + // The handler class to which the parser hands the parsed call frame + // information. Defined below. + class Handler; + + // A reporter class, which CallFrameInfo uses to report errors + // encountered while parsing call frame information. Defined below. + class Reporter; + + // Create a DWARF CFI parser. BUFFER points to the contents of the + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. + // REPORTER is an error reporter the parser should use to report + // problems. READER is a ByteReader instance that has the endianness and + // address size set properly. Report the data we find to HANDLER. + // + // This class can also parse Linux C++ exception handling data, as found + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is + // placed in loadable segments so that it is present in the program's + // address space, and is interpreted by the C++ runtime to search the + // call stack for a handler interested in the exception being thrown, + // actually pop the frames, and find cleanup code to run. + // + // There are two differences between the call frame information described + // in the DWARF standard and the exception handling data Linux places in + // the .eh_frame section: + // + // - Exception handling data uses uses a different format for call frame + // information entry headers. The distinguished CIE id, the way FDEs + // refer to their CIEs, and the way the end of the series of entries is + // determined are all slightly different. + // + // If the constructor's EH_FRAME argument is true, then the + // CallFrameInfo parses the entry headers as Linux C++ exception + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo + // parses standard DWARF call frame information. + // + // - Linux C++ exception handling data uses CIE augmentation strings + // beginning with 'z' to specify the presence of additional data after + // the CIE and FDE headers and special encodings used for addresses in + // frame description entries. + // + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or + // exception handling data if you have supplied READER with the base + // addresses needed to interpret the pointer encodings that 'z' + // augmentations can specify. See the ByteReader interface for details + // about the base addresses. See the CallFrameInfo::Handler interface + // for details about the additional information one might find in + // 'z'-augmented data. + // + // Thus: + // + // - If you are parsing standard DWARF CFI, as found in a .debug_frame + // section, you should pass false for the EH_FRAME argument, or omit + // it, and you need not worry about providing READER with the + // additional base addresses. + // + // - If you want to parse Linux C++ exception handling data from a + // .eh_frame section, you should pass EH_FRAME as true, and call + // READER's Set*Base member functions before calling our Start method. + // + // - If you want to parse DWARF CFI that uses the 'z' augmentations + // (although I don't think any toolchain ever emits such data), you + // could pass false for EH_FRAME, but call READER's Set*Base members. + // + // The extensions the Linux C++ ABI makes to DWARF for exception + // handling are described here, rather poorly: + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // The mechanics of C++ exception handling, personality routines, + // and language-specific data areas are described here, rather nicely: + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html + CallFrameInfo(const uint8_t* buffer, size_t buffer_length, + ByteReader* reader, Handler* handler, Reporter* reporter, + bool eh_frame = false) + : buffer_(buffer), buffer_length_(buffer_length), + reader_(reader), handler_(handler), reporter_(reporter), + eh_frame_(eh_frame) { } + + ~CallFrameInfo() { } + + // Parse the entries in BUFFER, reporting what we find to HANDLER. + // Return true if we reach the end of the section successfully, or + // false if we encounter an error. + bool Start(); + + // Return the textual name of KIND. For error reporting. + static const char* KindName(EntryKind kind); + + private: + + struct CIE; + + // A CFI entry, either an FDE or a CIE. + struct Entry { + // The starting offset of the entry in the section, for error + // reporting. + size_t offset; + + // The start of this entry in the buffer. + const uint8_t* start; + + // Which kind of entry this is. + // + // We want to be able to use this for error reporting even while we're + // in the midst of parsing. Error reporting code may assume that kind, + // offset, and start fields are valid, although kind may be kUnknown. + EntryKind kind; + + // The end of this entry's common prologue (initial length and id), and + // the start of this entry's kind-specific fields. + const uint8_t* fields; + + // The start of this entry's instructions. + const uint8_t* instructions; + + // The address past the entry's last byte in the buffer. (Note that + // since offset points to the entry's initial length field, and the + // length field is the number of bytes after that field, this is not + // simply buffer_ + offset + length.) + const uint8_t* end; + + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a + // CIE, and the offset of the associated CIE in an FDE. + uint64_t id; + + // The CIE that applies to this entry, if we've parsed it. If this is a + // CIE, then this field points to this structure. + CIE* cie; + }; + + // A common information entry (CIE). + struct CIE: public Entry { + uint8_t version; // CFI data version number + string augmentation; // vendor format extension markers + uint64_t code_alignment_factor; // scale for code address adjustments + int data_alignment_factor; // scale for stack pointer adjustments + unsigned return_address_register; // which register holds the return addr + + // True if this CIE includes Linux C++ ABI 'z' augmentation data. + bool has_z_augmentation; + + // Parsed 'z' augmentation data. These are meaningful only if + // has_z_augmentation is true. + bool has_z_lsda; // The 'z' augmentation included 'L'. + bool has_z_personality; // The 'z' augmentation included 'P'. + bool has_z_signal_frame; // The 'z' augmentation included 'S'. + + // If has_z_lsda is true, this is the encoding to be used for language- + // specific data area pointers in FDEs. + DwarfPointerEncoding lsda_encoding; + + // If has_z_personality is true, this is the encoding used for the + // personality routine pointer in the augmentation data. + DwarfPointerEncoding personality_encoding; + + // If has_z_personality is true, this is the address of the personality + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the + // address where the personality routine's address is stored. + uint64_t personality_address; + + // This is the encoding used for addresses in the FDE header and + // in DW_CFA_set_loc instructions. This is always valid, whether + // or not we saw a 'z' augmentation string; its default value is + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. + DwarfPointerEncoding pointer_encoding; + + // These were only introduced in DWARF4, so will not be set in older + // versions. + uint8_t address_size; + uint8_t segment_size; + }; + + // A frame description entry (FDE). + struct FDE: public Entry { + uint64_t address; // start address of described code + uint64_t size; // size of described code, in bytes + + // If cie->has_z_lsda is true, then this is the language-specific data + // area's address --- or its address's address, if cie->lsda_encoding + // has the DW_EH_PE_indirect bit set. + uint64_t lsda_address; + }; + + // Internal use. + class Rule; + class UndefinedRule; + class SameValueRule; + class OffsetRule; + class ValOffsetRule; + class RegisterRule; + class ExpressionRule; + class ValExpressionRule; + class RuleMap; + class State; + + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the + // data to parse. On success, populate ENTRY as appropriate, and return + // true. On failure, report the problem, and return false. Even if we + // return false, set ENTRY->end to the first byte after the entry if we + // were able to figure that out, or NULL if we weren't. + bool ReadEntryPrologue(const uint8_t* cursor, Entry* entry); + + // Parse the fields of a CIE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of CIE are + // populated; use CIE->fields and CIE->end as the start and limit for + // parsing. On success, populate the rest of *CIE, and return true; on + // failure, report the problem and return false. + bool ReadCIEFields(CIE* cie); + + // Parse the fields of an FDE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of *FDE are + // initialized; use FDE->fields and FDE->end as the start and limit for + // parsing. Assume that FDE->cie is fully initialized. On success, + // populate the rest of *FDE, and return true; on failure, report the + // problem and return false. + bool ReadFDEFields(FDE* fde); + + // Report that ENTRY is incomplete, and return false. This is just a + // trivial wrapper for invoking reporter_->Incomplete; it provides a + // little brevity. + bool ReportIncomplete(Entry* entry); + + // Return true if ENCODING has the DW_EH_PE_indirect bit set. + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { + return encoding & DW_EH_PE_indirect; + } + + // The contents of the DWARF .debug_info section we're parsing. + const uint8_t* buffer_; + size_t buffer_length_; + + // For reading multi-byte values with the appropriate endianness. + ByteReader* reader_; + + // The handler to which we should report the data we find. + Handler* handler_; + + // For reporting problems in the info we're parsing. + Reporter* reporter_; + + // True if we are processing .eh_frame-format data. + bool eh_frame_; +}; + +// The handler class for CallFrameInfo. The a CFI parser calls the +// member functions of a handler object to report the data it finds. +class CallFrameInfo::Handler { + public: + // The pseudo-register number for the canonical frame address. + enum { kCFARegister = -1 }; + + Handler() { } + virtual ~Handler() { } + + // The parser has found CFI for the machine code at ADDRESS, + // extending for LENGTH bytes. OFFSET is the offset of the frame + // description entry in the section, for use in error messages. + // VERSION is the version number of the CFI format. AUGMENTATION is + // a string describing any producer-specific extensions present in + // the data. RETURN_ADDRESS is the number of the register that holds + // the address to which the function should return. + // + // Entry should return true to process this CFI, or false to skip to + // the next entry. + // + // The parser invokes Entry for each Frame Description Entry (FDE) + // it finds. The parser doesn't report Common Information Entries + // to the handler explicitly; instead, if the handler elects to + // process a given FDE, the parser reiterates the appropriate CIE's + // contents at the beginning of the FDE's rules. + virtual bool Entry(size_t offset, uint64_t address, uint64_t length, + uint8_t version, const string& augmentation, + unsigned return_address) = 0; + + // When the Entry function returns true, the parser calls these + // handler functions repeatedly to describe the rules for recovering + // registers at each instruction in the given range of machine code. + // Immediately after a call to Entry, the handler should assume that + // the rule for each callee-saves register is "unchanged" --- that + // is, that the register still has the value it had in the caller. + // + // If a *Rule function returns true, we continue processing this entry's + // instructions. If a *Rule function returns false, we stop evaluating + // instructions, and skip to the next entry. Either way, we call End + // before going on to the next entry. + // + // In all of these functions, if the REG parameter is kCFARegister, then + // the rule describes how to find the canonical frame address. + // kCFARegister may be passed as a BASE_REGISTER argument, meaning that + // the canonical frame address should be used as the base address for the + // computation. All other REG values will be positive. + + // At ADDRESS, register REG's value is not recoverable. + virtual bool UndefinedRule(uint64_t address, int reg) = 0; + + // At ADDRESS, register REG's value is the same as that it had in + // the caller. + virtual bool SameValueRule(uint64_t address, int reg) = 0; + + // At ADDRESS, register REG has been saved at offset OFFSET from + // BASE_REGISTER. + virtual bool OffsetRule(uint64_t address, int reg, + int base_register, long offset) = 0; + + // At ADDRESS, the caller's value of register REG is the current + // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an + // address at which the register's value is saved.) + virtual bool ValOffsetRule(uint64_t address, int reg, + int base_register, long offset) = 0; + + // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs + // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that + // BASE_REGISTER is the "home" for REG's saved value: if you want to + // assign to a variable whose home is REG in the calling frame, you + // should put the value in BASE_REGISTER. + virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the address at + // which REG was saved. + virtual bool ExpressionRule(uint64_t address, int reg, + const string& expression) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the caller's + // value for REG. (This rule doesn't provide an address at which the + // register's value is saved.) + virtual bool ValExpressionRule(uint64_t address, int reg, + const string& expression) = 0; + + // Indicate that the rules for the address range reported by the + // last call to Entry are complete. End should return true if + // everything is okay, or false if an error has occurred and parsing + // should stop. + virtual bool End() = 0; + + // Handler functions for Linux C++ exception handling data. These are + // only called if the data includes 'z' augmentation strings. + + // The Linux C++ ABI uses an extension of the DWARF CFI format to + // walk the stack to propagate exceptions from the throw to the + // appropriate catch, and do the appropriate cleanups along the way. + // CFI entries used for exception handling have two additional data + // associated with them: + // + // - The "language-specific data area" describes which exception + // types the function has 'catch' clauses for, and indicates how + // to go about re-entering the function at the appropriate catch + // clause. If the exception is not caught, it describes the + // destructors that must run before the frame is popped. + // + // - The "personality routine" is responsible for interpreting the + // language-specific data area's contents, and deciding whether + // the exception should continue to propagate down the stack, + // perhaps after doing some cleanup for this frame, or whether the + // exception will be caught here. + // + // In principle, the language-specific data area is opaque to + // everybody but the personality routine. In practice, these values + // may be useful or interesting to readers with extra context, and + // we have to at least skip them anyway, so we might as well report + // them to the handler. + + // This entry's exception handling personality routine's address is + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the routine's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool PersonalityRoutine(uint64_t address, bool indirect) { + return true; + } + + // This entry's language-specific data area (LSDA) is located at + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the area's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) { + return true; + } + + // This entry describes a signal trampoline --- this frame is the + // caller of a signal handler. The default definition for this + // handler function simply returns true, allowing parsing of the + // entry to continue. + // + // The best description of the rationale for and meaning of signal + // trampoline CFI entries seems to be in the GCC bug database: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 + virtual bool SignalHandler() { return true; } +}; + +// The CallFrameInfo class makes calls on an instance of this class to +// report errors or warn about problems in the data it is parsing. The +// default definitions of these methods print a message to stderr, but +// you can make a derived class that overrides them. +class CallFrameInfo::Reporter { + public: + // Create an error reporter which attributes troubles to the section + // named SECTION in FILENAME. + // + // Normally SECTION would be .debug_frame, but the Mac puts CFI data + // in a Mach-O section named __debug_frame. If we support + // Linux-style exception handling data, we could be reading an + // .eh_frame section. + Reporter(const string& filename, + const string& section = ".debug_frame") + : filename_(filename), section_(section) { } + virtual ~Reporter() { } + + // The CFI entry at OFFSET ends too early to be well-formed. KIND + // indicates what kind of entry it is; KIND can be kUnknown if we + // haven't parsed enough of the entry to tell yet. + virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind); + + // The .eh_frame data has a four-byte zero at OFFSET where the next + // entry's length would be; this is a terminator. However, the buffer + // length as given to the CallFrameInfo constructor says there should be + // more data. + virtual void EarlyEHTerminator(uint64_t offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the + // section is not that large. + virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry + // there is not a CIE. + virtual void BadCIEId(uint64_t offset, uint64_t cie_offset); + + // The FDE at OFFSET refers to a CIE with an address size we don't know how + // to handle. + virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size); + + // The FDE at OFFSET refers to a CIE with an segment descriptor size we + // don't know how to handle. + virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size); + + // The FDE at OFFSET refers to a CIE with version number VERSION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // a version number we don't recognize. + virtual void UnrecognizedVersion(uint64_t offset, int version); + + // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // augmentations we don't recognize. + virtual void UnrecognizedAugmentation(uint64_t offset, + const string& augmentation); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not + // a valid encoding. + virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends + // on a base address which has not been supplied. + virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding); + + // The CIE at OFFSET contains a DW_CFA_restore instruction at + // INSN_OFFSET, which may not appear in a CIE. + virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset); + + // The entry at OFFSET, of kind KIND, has an unrecognized + // instruction at INSN_OFFSET. + virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind, + uint64_t insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, establishes a rule that cites the CFA, but we have not + // established a CFA rule yet. + virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, + uint64_t insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, is a DW_CFA_restore_state instruction, but the stack of + // saved states is empty. + virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, + uint64_t insn_offset); + + // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry + // at OFFSET, of kind KIND, would restore a state that has no CFA + // rule, whereas the current state does have a CFA rule. This is + // bogus input, which the CallFrameInfo::Handler interface doesn't + // (and shouldn't) have any way to report. + virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, + uint64_t insn_offset); + + protected: + // The name of the file whose CFI we're reading. + string filename_; + + // The name of the CFI section in that file. + string section_; +}; + +} // namespace google_breakpad + +#endif // UTIL_DEBUGINFO_DWARF2READER_H__ diff --git a/contrib/libs/breakpad/src/common/dwarf/elf_reader.cc b/contrib/libs/breakpad/src/common/dwarf/elf_reader.cc new file mode 100644 index 0000000000..c6d9f08a93 --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/elf_reader.cc @@ -0,0 +1,1274 @@ +// Copyright 2005 Google Inc. All Rights Reserved. +// Author: chatham@google.com (Andrew Chatham) +// Author: satorux@google.com (Satoru Takabayashi) +// +// Code for reading in ELF files. +// +// For information on the ELF format, see +// http://www.x86.org/ftp/manuals/tools/elf.pdf +// +// I also liked: +// http://www.caldera.com/developers/gabi/1998-04-29/contents.html +// +// A note about types: When dealing with the file format, we use types +// like Elf32_Word, but in the public interfaces we treat all +// addresses as uint64. As a result, we should be able to symbolize +// 64-bit binaries from a 32-bit process (which we don't do, +// anyway). size_t should therefore be avoided, except where required +// by things like mmap(). +// +// Although most of this code can deal with arbitrary ELF files of +// either word size, the public ElfReader interface only examines +// files loaded into the current address space, which must all match +// the machine's native word size. This code cannot handle ELF files +// with a non-native byte ordering. +// +// TODO(chatham): It would be nice if we could accomplish this task +// without using malloc(), so we could use it as the process is dying. + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // needed for pread() +#endif + +#include <fcntl.h> +#include <limits.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <map> +#include <string> +#include <vector> +// TODO(saugustine): Add support for compressed debug. +// Also need to add configure tests for zlib. +//#include "zlib.h" + +#include "third_party/musl/include/elf.h" +#include "elf_reader.h" +#include "common/using_std_string.h" + +// EM_AARCH64 is not defined by elf.h of GRTE v3 on x86. +// TODO(dougkwan): Remove this when v17 is retired. +#if !defined(EM_AARCH64) +#define EM_AARCH64 183 /* ARM AARCH64 */ +#endif + +// Map Linux macros to their Apple equivalents. +#if __APPLE__ +#ifndef __LITTLE_ENDIAN +#define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ +#endif // __LITTLE_ENDIAN +#ifndef __BIG_ENDIAN +#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__ +#endif // __BIG_ENDIAN +#ifndef __BYTE_ORDER +#define __BYTE_ORDER __BYTE_ORDER__ +#endif // __BYTE_ORDER +#endif // __APPLE__ + +// TODO(dthomson): Can be removed once all Java code is using the Google3 +// launcher. We need to avoid processing PLT functions as it causes memory +// fragmentation in malloc, which is fixed in tcmalloc - and if the Google3 +// launcher is used the JVM will then use tcmalloc. b/13735638 +//DEFINE_bool(elfreader_process_dynsyms, true, +// "Activate PLT function processing"); + +using std::vector; + +namespace { + +// The lowest bit of an ARM symbol value is used to indicate a Thumb address. +const int kARMThumbBitOffset = 0; + +// Converts an ARM Thumb symbol value to a true aligned address value. +template <typename T> +T AdjustARMThumbSymbolValue(const T& symbol_table_value) { + return symbol_table_value & ~(1 << kARMThumbBitOffset); +} + +// Names of PLT-related sections. +const char kElfPLTRelSectionName[] = ".rel.plt"; // Use Rel struct. +const char kElfPLTRelaSectionName[] = ".rela.plt"; // Use Rela struct. +const char kElfPLTSectionName[] = ".plt"; +const char kElfDynSymSectionName[] = ".dynsym"; + +const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes. +const int kARMPLTCodeSize = 0xc; +const int kAARCH64PLTCodeSize = 0x10; + +const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry. +const int kARMPLT0Size = 0x14; +const int kAARCH64PLT0Size = 0x20; + +// Suffix for PLT functions when it needs to be explicitly identified as such. +const char kPLTFunctionSuffix[] = "@plt"; + +} // namespace + +namespace google_breakpad { + +template <class ElfArch> class ElfReaderImpl; + +// 32-bit and 64-bit ELF files are processed exactly the same, except +// for various field sizes. Elf32 and Elf64 encompass all of the +// differences between the two formats, and all format-specific code +// in this file is templated on one of them. +class Elf32 { + public: + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Word Word; + typedef Elf32_Sym Sym; + typedef Elf32_Rel Rel; + typedef Elf32_Rela Rela; + + // What should be in the EI_CLASS header. + static const int kElfClass = ELFCLASS32; + + // Given a symbol pointer, return the binding type (eg STB_WEAK). + static char Bind(const Elf32_Sym* sym) { + return ELF32_ST_BIND(sym->st_info); + } + // Given a symbol pointer, return the symbol type (eg STT_FUNC). + static char Type(const Elf32_Sym* sym) { + return ELF32_ST_TYPE(sym->st_info); + } + + // Extract the symbol index from the r_info field of a relocation. + static int r_sym(const Elf32_Word r_info) { + return ELF32_R_SYM(r_info); + } +}; + + +class Elf64 { + public: + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Word Word; + typedef Elf64_Sym Sym; + typedef Elf64_Rel Rel; + typedef Elf64_Rela Rela; + + // What should be in the EI_CLASS header. + static const int kElfClass = ELFCLASS64; + + static char Bind(const Elf64_Sym* sym) { + return ELF64_ST_BIND(sym->st_info); + } + static char Type(const Elf64_Sym* sym) { + return ELF64_ST_TYPE(sym->st_info); + } + static int r_sym(const Elf64_Xword r_info) { + return ELF64_R_SYM(r_info); + } +}; + + +// ElfSectionReader mmaps a section of an ELF file ("section" is ELF +// terminology). The ElfReaderImpl object providing the section header +// must exist for the lifetime of this object. +// +// The motivation for mmaping individual sections of the file is that +// many Google executables are large enough when unstripped that we +// have to worry about running out of virtual address space. +// +// For compressed sections we have no choice but to allocate memory. +template<class ElfArch> +class ElfSectionReader { + public: + ElfSectionReader(const char* name, const string& path, int fd, + const typename ElfArch::Shdr& section_header) + : contents_aligned_(NULL), + contents_(NULL), + header_(section_header) { + // Back up to the beginning of the page we're interested in. + const size_t additional = header_.sh_offset % getpagesize(); + const size_t offset_aligned = header_.sh_offset - additional; + section_size_ = header_.sh_size; + size_aligned_ = section_size_ + additional; + // If the section has been stripped or is empty, do not attempt + // to process its contents. + if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0) + return; + contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED, + fd, offset_aligned); + // Set where the offset really should begin. + contents_ = reinterpret_cast<char*>(contents_aligned_) + + (header_.sh_offset - offset_aligned); + + // Check for and handle any compressed contents. + //if (strncmp(name, ".zdebug_", strlen(".zdebug_")) == 0) + // DecompressZlibContents(); + // TODO(saugustine): Add support for proposed elf-section flag + // "SHF_COMPRESS". + } + + ~ElfSectionReader() { + if (contents_aligned_ != NULL) + munmap(contents_aligned_, size_aligned_); + else + delete[] contents_; + } + + // Return the section header for this section. + typename ElfArch::Shdr const& header() const { return header_; } + + // Return memory at the given offset within this section. + const char* GetOffset(typename ElfArch::Word bytes) const { + return contents_ + bytes; + } + + const char* contents() const { return contents_; } + size_t section_size() const { return section_size_; } + + private: + // page-aligned file contents + void* contents_aligned_; + // contents as usable by the client. For non-compressed sections, + // pointer within contents_aligned_ to where the section data + // begins; for compressed sections, pointer to the decompressed + // data. + char* contents_; + // size of contents_aligned_ + size_t size_aligned_; + // size of contents. + size_t section_size_; + const typename ElfArch::Shdr header_; +}; + +// An iterator over symbols in a given section. It handles walking +// through the entries in the specified section and mapping symbol +// entries to their names in the appropriate string table (in +// another section). +template<class ElfArch> +class SymbolIterator { + public: + SymbolIterator(ElfReaderImpl<ElfArch>* reader, + typename ElfArch::Word section_type) + : symbol_section_(reader->GetSectionByType(section_type)), + string_section_(NULL), + num_symbols_in_section_(0), + symbol_within_section_(0) { + + // If this section type doesn't exist, leave + // num_symbols_in_section_ as zero, so this iterator is already + // done(). + if (symbol_section_ != NULL) { + num_symbols_in_section_ = symbol_section_->header().sh_size / + symbol_section_->header().sh_entsize; + + // Symbol sections have sh_link set to the section number of + // the string section containing the symbol names. + string_section_ = reader->GetSection(symbol_section_->header().sh_link); + } + } + + // Return true iff we have passed all symbols in this section. + bool done() const { + return symbol_within_section_ >= num_symbols_in_section_; + } + + // Advance to the next symbol in this section. + // REQUIRES: !done() + void Next() { ++symbol_within_section_; } + + // Return a pointer to the current symbol. + // REQUIRES: !done() + const typename ElfArch::Sym* GetSymbol() const { + return reinterpret_cast<const typename ElfArch::Sym*>( + symbol_section_->GetOffset(symbol_within_section_ * + symbol_section_->header().sh_entsize)); + } + + // Return the name of the current symbol, NULL if it has none. + // REQUIRES: !done() + const char* GetSymbolName() const { + int name_offset = GetSymbol()->st_name; + if (name_offset == 0) + return NULL; + return string_section_->GetOffset(name_offset); + } + + int GetCurrentSymbolIndex() const { + return symbol_within_section_; + } + + private: + const ElfSectionReader<ElfArch>* const symbol_section_; + const ElfSectionReader<ElfArch>* string_section_; + int num_symbols_in_section_; + int symbol_within_section_; +}; + + +// Copied from strings/strutil.h. Per chatham, +// this library should not depend on strings. + +static inline bool MyHasSuffixString(const string& str, const string& suffix) { + int len = str.length(); + int suflen = suffix.length(); + return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0); +} + + +// ElfReader loads an ELF binary and can provide information about its +// contents. It is most useful for matching addresses to function +// names. It does not understand debugging formats (eg dwarf2), so it +// can't print line numbers. It takes a path to an elf file and a +// readable file descriptor for that file, which it does not assume +// ownership of. +template<class ElfArch> +class ElfReaderImpl { + public: + explicit ElfReaderImpl(const string& path, int fd) + : path_(path), + fd_(fd), + section_headers_(NULL), + program_headers_(NULL), + opd_section_(NULL), + base_for_text_(0), + plts_supported_(false), + plt_code_size_(0), + plt0_size_(0), + visited_relocation_entries_(false) { + string error; + is_dwp_ = MyHasSuffixString(path, ".dwp"); + ParseHeaders(fd, path); + // Currently we need some extra information for PowerPC64 binaries + // including a way to read the .opd section for function descriptors and a + // way to find the linked base for function symbols. + if (header_.e_machine == EM_PPC64) { + // "opd_section_" must always be checked for NULL before use. + opd_section_ = GetSectionInfoByName(".opd", &opd_info_); + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + const char* name = GetSectionName(section_headers_[k].sh_name); + if (strncmp(name, ".text", strlen(".text")) == 0) { + base_for_text_ = + section_headers_[k].sh_addr - section_headers_[k].sh_offset; + break; + } + } + } + // Turn on PLTs. + if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) { + plt_code_size_ = kX86PLTCodeSize; + plt0_size_ = kX86PLT0Size; + plts_supported_ = true; + } else if (header_.e_machine == EM_ARM) { + plt_code_size_ = kARMPLTCodeSize; + plt0_size_ = kARMPLT0Size; + plts_supported_ = true; + } else if (header_.e_machine == EM_AARCH64) { + plt_code_size_ = kAARCH64PLTCodeSize; + plt0_size_ = kAARCH64PLT0Size; + plts_supported_ = true; + } + } + + ~ElfReaderImpl() { + for (unsigned int i = 0u; i < sections_.size(); ++i) + delete sections_[i]; + delete [] section_headers_; + delete [] program_headers_; + } + + // Examine the headers of the file and return whether the file looks + // like an ELF file for this architecture. Takes an already-open + // file descriptor for the candidate file, reading in the prologue + // to see if the ELF file appears to match the current + // architecture. If error is non-NULL, it will be set with a reason + // in case of failure. + static bool IsArchElfFile(int fd, string* error) { + unsigned char header[EI_NIDENT]; + if (pread(fd, header, sizeof(header), 0) != sizeof(header)) { + if (error != NULL) *error = "Could not read header"; + return false; + } + + if (memcmp(header, ELFMAG, SELFMAG) != 0) { + if (error != NULL) *error = "Missing ELF magic"; + return false; + } + + if (header[EI_CLASS] != ElfArch::kElfClass) { + if (error != NULL) *error = "Different word size"; + return false; + } + + int endian = 0; + if (header[EI_DATA] == ELFDATA2LSB) + endian = __LITTLE_ENDIAN; + else if (header[EI_DATA] == ELFDATA2MSB) + endian = __BIG_ENDIAN; + if (endian != __BYTE_ORDER) { + if (error != NULL) *error = "Different byte order"; + return false; + } + + return true; + } + + // Return true if we can use this symbol in Address-to-Symbol map. + bool CanUseSymbol(const char* name, const typename ElfArch::Sym* sym) { + // For now we only save FUNC and NOTYPE symbols. For now we just + // care about functions, but some functions written in assembler + // don't have a proper ELF type attached to them, so we store + // NOTYPE symbols as well. The remaining significant type is + // OBJECT (eg global variables), which represent about 25% of + // the symbols in a typical google3 binary. + if (ElfArch::Type(sym) != STT_FUNC && + ElfArch::Type(sym) != STT_NOTYPE) { + return false; + } + + // Target specific filtering. + switch (header_.e_machine) { + case EM_AARCH64: + case EM_ARM: + // Filter out '$x' special local symbols used by tools + return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL; + case EM_X86_64: + // Filter out read-only constants like .LC123. + return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL; + default: + return true; + } + } + + // Iterate over the symbols in a section, either SHT_DYNSYM or + // SHT_SYMTAB. Add all symbols to the given SymbolMap. + /* + void GetSymbolPositions(SymbolMap* symbols, + typename ElfArch::Word section_type, + uint64_t mem_offset, + uint64_t file_offset) { + // This map is used to filter out "nested" functions. + // See comment below. + AddrToSymMap addr_to_sym_map; + for (SymbolIterator<ElfArch> it(this, section_type); + !it.done(); it.Next()) { + const char* name = it.GetSymbolName(); + if (name == NULL) + continue; + const typename ElfArch::Sym* sym = it.GetSymbol(); + if (CanUseSymbol(name, sym)) { + const int sec = sym->st_shndx; + + // We don't support special section indices. The most common + // is SHN_ABS, for absolute symbols used deep in the bowels of + // glibc. Also ignore any undefined symbols. + if (sec == SHN_UNDEF || + (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) { + continue; + } + + const typename ElfArch::Shdr& hdr = section_headers_[sec]; + + // Adjust for difference between where we expected to mmap + // this section, and where it was actually mmapped. + const int64_t expected_base = hdr.sh_addr - hdr.sh_offset; + const int64_t real_base = mem_offset - file_offset; + const int64_t adjust = real_base - expected_base; + + uint64_t start = sym->st_value + adjust; + + // Adjust function symbols for PowerPC64 by dereferencing and adjusting + // the function descriptor to get the function address. + if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) { + const uint64_t opd_addr = + AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); + // Only adjust the returned value if the function address was found. + if (opd_addr != sym->st_value) { + const int64_t adjust_function_symbols = + real_base - base_for_text_; + start = opd_addr + adjust_function_symbols; + } + } + + addr_to_sym_map.push_back(std::make_pair(start, sym)); + } + } + std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter); + addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(), + addr_to_sym_map.end(), &AddrToSymEquals), + addr_to_sym_map.end()); + + // Squeeze out any "nested functions". + // Nested functions are not allowed in C, but libc plays tricks. + // + // For example, here is disassembly of /lib64/tls/libc-2.3.5.so: + // 0x00000000000aa380 <read+0>: cmpl $0x0,0x2781b9(%rip) + // 0x00000000000aa387 <read+7>: jne 0xaa39b <read+27> + // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax + // 0x00000000000aa390 <__read_nocancel+7>: syscall + // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax + // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef <read+111> + // 0x00000000000aa39a <__read_nocancel+17>: retq + // 0x00000000000aa39b <read+27>: sub $0x28,%rsp + // 0x00000000000aa39f <read+31>: mov %rdi,0x8(%rsp) + // ... + // Without removing __read_nocancel, symbolizer will return NULL + // given e.g. 0xaa39f (because the lower bound is __read_nocancel, + // but 0xaa39f is beyond its end. + if (addr_to_sym_map.empty()) { + return; + } + const ElfSectionReader<ElfArch>* const symbol_section = + this->GetSectionByType(section_type); + const ElfSectionReader<ElfArch>* const string_section = + this->GetSection(symbol_section->header().sh_link); + + typename AddrToSymMap::iterator curr = addr_to_sym_map.begin(); + // Always insert the first symbol. + symbols->AddSymbol(string_section->GetOffset(curr->second->st_name), + curr->first, curr->second->st_size); + typename AddrToSymMap::iterator prev = curr++; + for (; curr != addr_to_sym_map.end(); ++curr) { + const uint64_t prev_addr = prev->first; + const uint64_t curr_addr = curr->first; + const typename ElfArch::Sym* const prev_sym = prev->second; + const typename ElfArch::Sym* const curr_sym = curr->second; + if (prev_addr + prev_sym->st_size <= curr_addr || + // The next condition is true if two symbols overlap like this: + // + // Previous symbol |----------------------------| + // Current symbol |-------------------------------| + // + // These symbols are not found in google3 codebase, but in + // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so. + // + // 0619e040 00000046 t CardTableModRefBS::write_region_work() + // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work() + // + // We allow overlapped symbols rather than ignore these. + // Due to the way SymbolMap::GetSymbolAtPosition() works, + // lookup for any address in [curr_addr, curr_addr + its size) + // (e.g. 0619e071) will produce the current symbol, + // which is the desired outcome. + prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) { + const char* name = string_section->GetOffset(curr_sym->st_name); + symbols->AddSymbol(name, curr_addr, curr_sym->st_size); + prev = curr; + } else { + // Current symbol is "nested" inside previous one like this: + // + // Previous symbol |----------------------------| + // Current symbol |---------------------| + // + // This happens within glibc, e.g. __read_nocancel is nested + // "inside" __read. Ignore "inner" symbol. + //DCHECK_LE(curr_addr + curr_sym->st_size, + // prev_addr + prev_sym->st_size); + ; + } + } + } +*/ + + void VisitSymbols(typename ElfArch::Word section_type, + ElfReader::SymbolSink* sink) { + VisitSymbols(section_type, sink, -1, -1, false); + } + + void VisitSymbols(typename ElfArch::Word section_type, + ElfReader::SymbolSink* sink, + int symbol_binding, + int symbol_type, + bool get_raw_symbol_values) { + for (SymbolIterator<ElfArch> it(this, section_type); + !it.done(); it.Next()) { + const char* name = it.GetSymbolName(); + if (!name) continue; + const typename ElfArch::Sym* sym = it.GetSymbol(); + if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) && + (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) { + typename ElfArch::Sym symbol = *sym; + // Add a PLT symbol in addition to the main undefined symbol. + // Only do this for SHT_DYNSYM, because PLT symbols are dynamic. + int symbol_index = it.GetCurrentSymbolIndex(); + // TODO(dthomson): Can be removed once all Java code is using the + // Google3 launcher. + if (section_type == SHT_DYNSYM && + static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size() && + symbols_plt_offsets_[symbol_index] != 0) { + string plt_name = string(name) + kPLTFunctionSuffix; + if (plt_function_names_[symbol_index].empty()) { + plt_function_names_[symbol_index] = plt_name; + } else if (plt_function_names_[symbol_index] != plt_name) { + ; + } + sink->AddSymbol(plt_function_names_[symbol_index].c_str(), + symbols_plt_offsets_[it.GetCurrentSymbolIndex()], + plt_code_size_); + } + if (!get_raw_symbol_values) + AdjustSymbolValue(&symbol); + sink->AddSymbol(name, symbol.st_value, symbol.st_size); + } + } + } + + void VisitRelocationEntries() { + if (visited_relocation_entries_) { + return; + } + visited_relocation_entries_ = true; + + if (!plts_supported_) { + return; + } + // First determine if PLTs exist. If not, then there is nothing to do. + ElfReader::SectionInfo plt_section_info; + const char* plt_section = + GetSectionInfoByName(kElfPLTSectionName, &plt_section_info); + if (!plt_section) { + return; + } + if (plt_section_info.size == 0) { + return; + } + + // The PLTs could be referenced by either a Rel or Rela (Rel with Addend) + // section. + ElfReader::SectionInfo rel_section_info; + ElfReader::SectionInfo rela_section_info; + const char* rel_section = + GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info); + const char* rela_section = + GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info); + + const typename ElfArch::Rel* rel = + reinterpret_cast<const typename ElfArch::Rel*>(rel_section); + const typename ElfArch::Rela* rela = + reinterpret_cast<const typename ElfArch::Rela*>(rela_section); + + if (!rel_section && !rela_section) { + return; + } + + // Use either Rel or Rela section, depending on which one exists. + size_t section_size = rel_section ? rel_section_info.size + : rela_section_info.size; + size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel) + : sizeof(typename ElfArch::Rela); + + // Determine the number of entries in the dynamic symbol table. + ElfReader::SectionInfo dynsym_section_info; + const char* dynsym_section = + GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info); + // The dynsym section might not exist, or it might be empty. In either case + // there is nothing to be done so return. + if (!dynsym_section || dynsym_section_info.size == 0) { + return; + } + size_t num_dynamic_symbols = + dynsym_section_info.size / dynsym_section_info.entsize; + symbols_plt_offsets_.resize(num_dynamic_symbols, 0); + + // TODO(dthomson): Can be removed once all Java code is using the + // Google3 launcher. + // Make storage room for PLT function name strings. + plt_function_names_.resize(num_dynamic_symbols); + + for (size_t i = 0; i < section_size / entry_size; ++i) { + // Determine symbol index from the |r_info| field. + int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info + : rela[i].r_info); + if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) { + continue; + } + symbols_plt_offsets_[sym_index] = + plt_section_info.addr + plt0_size_ + i * plt_code_size_; + } + } + + // Return an ElfSectionReader for the first section of the given + // type by iterating through all section headers. Returns NULL if + // the section type is not found. + const ElfSectionReader<ElfArch>* GetSectionByType( + typename ElfArch::Word section_type) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + if (section_headers_[k].sh_type == section_type) { + return GetSection(k); + } + } + return NULL; + } + + // Return the name of section "shndx". Returns NULL if the section + // is not found. + const char* GetSectionNameByIndex(int shndx) { + return GetSectionName(section_headers_[shndx].sh_name); + } + + // Return a pointer to section "shndx", and store the size in + // "size". Returns NULL if the section is not found. + const char* GetSectionContentsByIndex(int shndx, size_t* size) { + const ElfSectionReader<ElfArch>* section = GetSection(shndx); + if (section != NULL) { + *size = section->section_size(); + return section->contents(); + } + return NULL; + } + + // Return a pointer to the first section of the given name by + // iterating through all section headers, and store the size in + // "size". Returns NULL if the section name is not found. + const char* GetSectionContentsByName(const string& section_name, + size_t* size) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + // When searching for sections in a .dwp file, the sections + // we're looking for will always be at the end of the section + // table, so reverse the direction of iteration. + int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; + const char* name = GetSectionName(section_headers_[shndx].sh_name); + if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { + const ElfSectionReader<ElfArch>* section = GetSection(shndx); + if (section == NULL) { + return NULL; + } else { + *size = section->section_size(); + return section->contents(); + } + } + } + return NULL; + } + + // This is like GetSectionContentsByName() but it returns a lot of extra + // information about the section. + const char* GetSectionInfoByName(const string& section_name, + ElfReader::SectionInfo* info) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + // When searching for sections in a .dwp file, the sections + // we're looking for will always be at the end of the section + // table, so reverse the direction of iteration. + int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; + const char* name = GetSectionName(section_headers_[shndx].sh_name); + if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { + const ElfSectionReader<ElfArch>* section = GetSection(shndx); + if (section == NULL) { + return NULL; + } else { + info->type = section->header().sh_type; + info->flags = section->header().sh_flags; + info->addr = section->header().sh_addr; + info->offset = section->header().sh_offset; + info->size = section->header().sh_size; + info->link = section->header().sh_link; + info->info = section->header().sh_info; + info->addralign = section->header().sh_addralign; + info->entsize = section->header().sh_entsize; + return section->contents(); + } + } + } + return NULL; + } + + // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD + // segments are present. This is the address an ELF image was linked + // (by static linker) to be loaded at. Usually (but not always) 0 for + // shared libraries and position-independent executables. + uint64_t VaddrOfFirstLoadSegment() const { + // Relocatable objects (of type ET_REL) do not have LOAD segments. + if (header_.e_type == ET_REL) { + return 0; + } + for (int i = 0; i < GetNumProgramHeaders(); ++i) { + if (program_headers_[i].p_type == PT_LOAD) { + return program_headers_[i].p_vaddr; + } + } + return 0; + } + + // According to the LSB ("ELF special sections"), sections with debug + // info are prefixed by ".debug". The names are not specified, but they + // look like ".debug_line", ".debug_info", etc. + bool HasDebugSections() { + // Debug sections are likely to be near the end, so reverse the + // direction of iteration. + for (int k = GetNumSections() - 1; k >= 0; --k) { + const char* name = GetSectionName(section_headers_[k].sh_name); + if (strncmp(name, ".debug", strlen(".debug")) == 0) return true; + if (strncmp(name, ".zdebug", strlen(".zdebug")) == 0) return true; + } + return false; + } + + bool IsDynamicSharedObject() const { + return header_.e_type == ET_DYN; + } + + // Return the number of sections. + uint64_t GetNumSections() const { + if (HasManySections()) + return first_section_header_.sh_size; + return header_.e_shnum; + } + + private: + typedef vector<pair<uint64_t, const typename ElfArch::Sym*> > AddrToSymMap; + + static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs, + const typename AddrToSymMap::value_type& rhs) { + return lhs.first < rhs.first; + } + + static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs, + const typename AddrToSymMap::value_type& rhs) { + return lhs.first == rhs.first; + } + + // Does this ELF file have too many sections to fit in the program header? + bool HasManySections() const { + return header_.e_shnum == SHN_UNDEF; + } + + // Return the number of program headers. + int GetNumProgramHeaders() const { + if (HasManySections() && header_.e_phnum == 0xffff && + first_section_header_.sh_info != 0) + return first_section_header_.sh_info; + return header_.e_phnum; + } + + // Return the index of the string table. + int GetStringTableIndex() const { + if (HasManySections()) { + if (header_.e_shstrndx == 0xffff) + return first_section_header_.sh_link; + else if (header_.e_shstrndx >= GetNumSections()) + return 0; + } + return header_.e_shstrndx; + } + + // Given an offset into the section header string table, return the + // section name. + const char* GetSectionName(typename ElfArch::Word sh_name) { + const ElfSectionReader<ElfArch>* shstrtab = + GetSection(GetStringTableIndex()); + if (shstrtab != NULL) { + return shstrtab->GetOffset(sh_name); + } + return NULL; + } + + // Return an ElfSectionReader for the given section. The reader will + // be freed when this object is destroyed. + const ElfSectionReader<ElfArch>* GetSection(int num) { + const char* name; + // Hard-coding the name for the section-name string table prevents + // infinite recursion. + if (num == GetStringTableIndex()) + name = ".shstrtab"; + else + name = GetSectionNameByIndex(num); + ElfSectionReader<ElfArch>*& reader = sections_[num]; + if (reader == NULL) + reader = new ElfSectionReader<ElfArch>(name, path_, fd_, + section_headers_[num]); + return reader; + } + + // Parse out the overall header information from the file and assert + // that it looks sane. This contains information like the magic + // number and target architecture. + bool ParseHeaders(int fd, const string& path) { + // Read in the global ELF header. + if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) { + return false; + } + + // Must be an executable, dynamic shared object or relocatable object + if (header_.e_type != ET_EXEC && + header_.e_type != ET_DYN && + header_.e_type != ET_REL) { + return false; + } + // Need a section header. + if (header_.e_shoff == 0) { + return false; + } + + if (header_.e_shnum == SHN_UNDEF) { + // The number of sections in the program header is only a 16-bit value. In + // the event of overflow (greater than SHN_LORESERVE sections), e_shnum + // will read SHN_UNDEF and the true number of section header table entries + // is found in the sh_size field of the first section header. + // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html + if (pread(fd, &first_section_header_, sizeof(first_section_header_), + header_.e_shoff) != sizeof(first_section_header_)) { + return false; + } + } + + // Dynamically allocate enough space to store the section headers + // and read them out of the file. + const int section_headers_size = + GetNumSections() * sizeof(*section_headers_); + section_headers_ = new typename ElfArch::Shdr[section_headers_size]; + if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) != + section_headers_size) { + return false; + } + + // Dynamically allocate enough space to store the program headers + // and read them out of the file. + //const int program_headers_size = + // GetNumProgramHeaders() * sizeof(*program_headers_); + program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()]; + + // Presize the sections array for efficiency. + sections_.resize(GetNumSections(), NULL); + return true; + } + + // Given the "value" of a function descriptor return the address of the + // function (i.e. the dereferenced value). Otherwise return "value". + uint64_t AdjustPPC64FunctionDescriptorSymbolValue(uint64_t value) { + if (opd_section_ != NULL && + opd_info_.addr <= value && + value < opd_info_.addr + opd_info_.size) { + uint64_t offset = value - opd_info_.addr; + return (*reinterpret_cast<const uint64_t*>(opd_section_ + offset)); + } + return value; + } + + void AdjustSymbolValue(typename ElfArch::Sym* sym) { + switch (header_.e_machine) { + case EM_ARM: + // For ARM architecture, if the LSB of the function symbol offset is set, + // it indicates a Thumb function. This bit should not be taken literally. + // Clear it. + if (ElfArch::Type(sym) == STT_FUNC) + sym->st_value = AdjustARMThumbSymbolValue(sym->st_value); + break; + case EM_386: + // No adjustment needed for Intel x86 architecture. However, explicitly + // define this case as we use it quite often. + break; + case EM_PPC64: + // PowerPC64 currently has function descriptors as part of the ABI. + // Function symbols need to be adjusted accordingly. + if (ElfArch::Type(sym) == STT_FUNC) + sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); + break; + default: + break; + } + } + + friend class SymbolIterator<ElfArch>; + + // The file we're reading. + const string path_; + // Open file descriptor for path_. Not owned by this object. + const int fd_; + + // The global header of the ELF file. + typename ElfArch::Ehdr header_; + + // The header of the first section. This may be used to supplement the ELF + // file header. + typename ElfArch::Shdr first_section_header_; + + // Array of GetNumSections() section headers, allocated when we read + // in the global header. + typename ElfArch::Shdr* section_headers_; + + // Array of GetNumProgramHeaders() program headers, allocated when we read + // in the global header. + typename ElfArch::Phdr* program_headers_; + + // An array of pointers to ElfSectionReaders. Sections are + // mmaped as they're needed and not released until this object is + // destroyed. + vector<ElfSectionReader<ElfArch>*> sections_; + + // For PowerPC64 we need to keep track of function descriptors when looking up + // values for funtion symbols values. Function descriptors are kept in the + // .opd section and are dereferenced to find the function address. + ElfReader::SectionInfo opd_info_; + const char* opd_section_; // Must be checked for NULL before use. + int64_t base_for_text_; + + // Read PLT-related sections for the current architecture. + bool plts_supported_; + // Code size of each PLT function for the current architecture. + size_t plt_code_size_; + // Size of the special first entry in the .plt section that calls the runtime + // loader resolution routine, and that all other entries jump to when doing + // lazy symbol binding. + size_t plt0_size_; + + // Maps a dynamic symbol index to a PLT offset. + // The vector entry index is the dynamic symbol index. + std::vector<uint64_t> symbols_plt_offsets_; + + // Container for PLT function name strings. These strings are passed by + // reference to SymbolSink::AddSymbol() so they need to be stored somewhere. + std::vector<string> plt_function_names_; + + bool visited_relocation_entries_; + + // True if this is a .dwp file. + bool is_dwp_; +}; + +ElfReader::ElfReader(const string& path) + : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) { + // linux 2.6.XX kernel can show deleted files like this: + // /var/run/nscd/dbYLJYaE (deleted) + // and the kernel-supplied vdso and vsyscall mappings like this: + // [vdso] + // [vsyscall] + if (MyHasSuffixString(path, " (deleted)")) + return; + if (path == "[vdso]") + return; + if (path == "[vsyscall]") + return; + + fd_ = open(path.c_str(), O_RDONLY); +} + +ElfReader::~ElfReader() { + if (fd_ != -1) + close(fd_); + if (impl32_ != NULL) + delete impl32_; + if (impl64_ != NULL) + delete impl64_; +} + + +// The only word-size specific part of this file is IsNativeElfFile(). +#if ULONG_MAX == 0xffffffff +#define NATIVE_ELF_ARCH Elf32 +#elif ULONG_MAX == 0xffffffffffffffff +#define NATIVE_ELF_ARCH Elf64 +#else +#error "Invalid word size" +#endif + +template <typename ElfArch> +static bool IsElfFile(const int fd, const string& path) { + if (fd < 0) + return false; + if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) { + // No error message here. IsElfFile gets called many times. + return false; + } + return true; +} + +bool ElfReader::IsNativeElfFile() const { + return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_); +} + +bool ElfReader::IsElf32File() const { + return IsElfFile<Elf32>(fd_, path_); +} + +bool ElfReader::IsElf64File() const { + return IsElfFile<Elf64>(fd_, path_); +} + +/* +void ElfReader::AddSymbols(SymbolMap* symbols, + uint64_t mem_offset, uint64_t file_offset, + uint64_t length) { + if (fd_ < 0) + return; + // TODO(chatham): Actually use the information about file offset and + // the length of the mapped section. On some machines the data + // section gets mapped as executable, and we'll end up reading the + // file twice and getting some of the offsets wrong. + if (IsElf32File()) { + GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB, + mem_offset, file_offset); + GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM, + mem_offset, file_offset); + } else if (IsElf64File()) { + GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB, + mem_offset, file_offset); + GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM, + mem_offset, file_offset); + } +} +*/ + +void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink) { + VisitSymbols(sink, -1, -1); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink, + int symbol_binding, + int symbol_type) { + VisitSymbols(sink, symbol_binding, symbol_type, false); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink, + int symbol_binding, + int symbol_type, + bool get_raw_symbol_values) { + if (IsElf32File()) { + GetImpl32()->VisitRelocationEntries(); + GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + } else if (IsElf64File()) { + GetImpl64()->VisitRelocationEntries(); + GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + } +} + +uint64_t ElfReader::VaddrOfFirstLoadSegment() { + if (IsElf32File()) { + return GetImpl32()->VaddrOfFirstLoadSegment(); + } else if (IsElf64File()) { + return GetImpl64()->VaddrOfFirstLoadSegment(); + } else { + return 0; + } +} + +const char* ElfReader::GetSectionName(int shndx) { + if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return NULL; + if (IsElf32File()) { + return GetImpl32()->GetSectionNameByIndex(shndx); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionNameByIndex(shndx); + } else { + return NULL; + } +} + +uint64_t ElfReader::GetNumSections() { + if (IsElf32File()) { + return GetImpl32()->GetNumSections(); + } else if (IsElf64File()) { + return GetImpl64()->GetNumSections(); + } else { + return 0; + } +} + +const char* ElfReader::GetSectionByIndex(int shndx, size_t* size) { + if (IsElf32File()) { + return GetImpl32()->GetSectionContentsByIndex(shndx, size); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionContentsByIndex(shndx, size); + } else { + return NULL; + } +} + +const char* ElfReader::GetSectionByName(const string& section_name, + size_t* size) { + if (IsElf32File()) { + return GetImpl32()->GetSectionContentsByName(section_name, size); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionContentsByName(section_name, size); + } else { + return NULL; + } +} + +const char* ElfReader::GetSectionInfoByName(const string& section_name, + SectionInfo* info) { + if (IsElf32File()) { + return GetImpl32()->GetSectionInfoByName(section_name, info); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionInfoByName(section_name, info); + } else { + return NULL; + } +} + +bool ElfReader::SectionNamesMatch(const string& name, const string& sh_name) { + if ((name.find(".debug_", 0) == 0) && (sh_name.find(".zdebug_", 0) == 0)) { + const string name_suffix(name, strlen(".debug_")); + const string sh_name_suffix(sh_name, strlen(".zdebug_")); + return name_suffix == sh_name_suffix; + } + return name == sh_name; +} + +bool ElfReader::IsDynamicSharedObject() { + if (IsElf32File()) { + return GetImpl32()->IsDynamicSharedObject(); + } else if (IsElf64File()) { + return GetImpl64()->IsDynamicSharedObject(); + } else { + return false; + } +} + +ElfReaderImpl<Elf32>* ElfReader::GetImpl32() { + if (impl32_ == NULL) { + impl32_ = new ElfReaderImpl<Elf32>(path_, fd_); + } + return impl32_; +} + +ElfReaderImpl<Elf64>* ElfReader::GetImpl64() { + if (impl64_ == NULL) { + impl64_ = new ElfReaderImpl<Elf64>(path_, fd_); + } + return impl64_; +} + +// Return true if file is an ELF binary of ElfArch, with unstripped +// debug info (debug_only=true) or symbol table (debug_only=false). +// Otherwise, return false. +template <typename ElfArch> +static bool IsNonStrippedELFBinaryImpl(const string& path, const int fd, + bool debug_only) { + if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false; + ElfReaderImpl<ElfArch> elf_reader(path, fd); + return debug_only ? + elf_reader.HasDebugSections() + : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL); +} + +// Helper for the IsNon[Debug]StrippedELFBinary functions. +static bool IsNonStrippedELFBinaryHelper(const string& path, + bool debug_only) { + const int fd = open(path.c_str(), O_RDONLY); + if (fd == -1) { + return false; + } + + if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) || + IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) { + close(fd); + return true; + } + close(fd); + return false; +} + +bool ElfReader::IsNonStrippedELFBinary(const string& path) { + return IsNonStrippedELFBinaryHelper(path, false); +} + +bool ElfReader::IsNonDebugStrippedELFBinary(const string& path) { + return IsNonStrippedELFBinaryHelper(path, true); +} +} // namespace google_breakpad diff --git a/contrib/libs/breakpad/src/common/dwarf/elf_reader.h b/contrib/libs/breakpad/src/common/dwarf/elf_reader.h new file mode 100644 index 0000000000..13ac39beb8 --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/elf_reader.h @@ -0,0 +1,167 @@ +// Copyright 2005 Google Inc. All Rights Reserved. +// Author: chatham@google.com (Andrew Chatham) +// Author: satorux@google.com (Satoru Takabayashi) +// +// ElfReader handles reading in ELF. It can extract symbols from the +// current process, which may be used to symbolize stack traces +// without having to make a potentially dangerous call to fork(). +// +// ElfReader dynamically allocates memory, so it is not appropriate to +// use once the address space might be corrupted, such as during +// process death. +// +// ElfReader supports both 32-bit and 64-bit ELF binaries. + +#ifndef COMMON_DWARF_ELF_READER_H__ +#define COMMON_DWARF_ELF_READER_H__ + +#include <string> +#include <vector> + +#include "common/dwarf/types.h" +#include "common/using_std_string.h" + +using std::vector; +using std::pair; + +namespace google_breakpad { + +class SymbolMap; +class Elf32; +class Elf64; +template<typename ElfArch> +class ElfReaderImpl; + +class ElfReader { + public: + explicit ElfReader(const string& path); + ~ElfReader(); + + // Parse the ELF prologue of this file and return whether it was + // successfully parsed and matches the word size and byte order of + // the current process. + bool IsNativeElfFile() const; + + // Similar to IsNativeElfFile but checks if it's a 32-bit ELF file. + bool IsElf32File() const; + + // Similar to IsNativeElfFile but checks if it's a 64-bit ELF file. + bool IsElf64File() const; + + // Checks if it's an ELF file of type ET_DYN (shared object file). + bool IsDynamicSharedObject(); + + // Add symbols in the given ELF file into the provided SymbolMap, + // assuming that the file has been loaded into the specified + // offset. + // + // The remaining arguments are typically taken from a + // ProcMapsIterator (base/sysinfo.h) and describe which portions of + // the ELF file are mapped into which parts of memory: + // + // mem_offset - position at which the segment is mapped into memory + // file_offset - offset in the file where the mapping begins + // length - length of the mapped segment + void AddSymbols(SymbolMap* symbols, + uint64_t mem_offset, uint64_t file_offset, + uint64_t length); + + class SymbolSink { + public: + virtual ~SymbolSink() {} + virtual void AddSymbol(const char* name, uint64_t address, + uint64_t size) = 0; + }; + + // Like AddSymbols above, but with no address correction. + // Processes any SHT_SYMTAB section, followed by any SHT_DYNSYM section. + void VisitSymbols(SymbolSink* sink); + + // Like VisitSymbols above, but for a specific symbol binding/type. + // A negative value for the binding and type parameters means any + // binding or type. + void VisitSymbols(SymbolSink* sink, int symbol_binding, int symbol_type); + + // Like VisitSymbols above but can optionally export raw symbol values instead + // of adjusted ones. + void VisitSymbols(SymbolSink* sink, int symbol_binding, int symbol_type, + bool get_raw_symbol_values); + + // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD + // segments are present. This is the address an ELF image was linked + // (by static linker) to be loaded at. Usually (but not always) 0 for + // shared libraries and position-independent executables. + uint64_t VaddrOfFirstLoadSegment(); + + // Return the name of section "shndx". Returns NULL if the section + // is not found. + const char* GetSectionName(int shndx); + + // Return the number of sections in the given ELF file. + uint64_t GetNumSections(); + + // Get section "shndx" from the given ELF file. On success, return + // the pointer to the section and store the size in "size". + // On error, return NULL. The returned section data is only valid + // until the ElfReader gets destroyed. + const char* GetSectionByIndex(int shndx, size_t* size); + + // Get section with "section_name" (ex. ".text", ".symtab") in the + // given ELF file. On success, return the pointer to the section + // and store the size in "size". On error, return NULL. The + // returned section data is only valid until the ElfReader gets + // destroyed. + const char* GetSectionByName(const string& section_name, size_t* size); + + // This is like GetSectionByName() but it returns a lot of extra information + // about the section. The SectionInfo structure is almost identical to + // the typedef struct Elf64_Shdr defined in <elf.h>, but is redefined + // here so that the many short macro names in <elf.h> don't have to be + // added to our already cluttered namespace. + struct SectionInfo { + uint32_t type; // Section type (SHT_xxx constant from elf.h). + uint64_t flags; // Section flags (SHF_xxx constants from elf.h). + uint64_t addr; // Section virtual address at execution. + uint64_t offset; // Section file offset. + uint64_t size; // Section size in bytes. + uint32_t link; // Link to another section. + uint32_t info; // Additional section information. + uint64_t addralign; // Section alignment. + uint64_t entsize; // Entry size if section holds a table. + }; + const char* GetSectionInfoByName(const string& section_name, + SectionInfo* info); + + // Check if "path" is an ELF binary that has not been stripped of symbol + // tables. This function supports both 32-bit and 64-bit ELF binaries. + static bool IsNonStrippedELFBinary(const string& path); + + // Check if "path" is an ELF binary that has not been stripped of debug + // info. Unlike IsNonStrippedELFBinary, this function will return + // false for binaries passed through "strip -S". + static bool IsNonDebugStrippedELFBinary(const string& path); + + // Match a requested section name with the section name as it + // appears in the elf-file, adjusting for compressed debug section + // names. For example, returns true if name == ".debug_abbrev" and + // sh_name == ".zdebug_abbrev" + static bool SectionNamesMatch(const string& name, const string& sh_name); + + private: + // Lazily initialize impl32_ and return it. + ElfReaderImpl<Elf32>* GetImpl32(); + // Ditto for impl64_. + ElfReaderImpl<Elf64>* GetImpl64(); + + // Path of the file we're reading. + const string path_; + // Read-only file descriptor for the file. May be -1 if there was an + // error during open. + int fd_; + ElfReaderImpl<Elf32>* impl32_; + ElfReaderImpl<Elf64>* impl64_; +}; + +} // namespace google_breakpad + +#endif // COMMON_DWARF_ELF_READER_H__ diff --git a/contrib/libs/breakpad/src/common/dwarf/line_state_machine.h b/contrib/libs/breakpad/src/common/dwarf/line_state_machine.h new file mode 100644 index 0000000000..b0f3f4907d --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/line_state_machine.h @@ -0,0 +1,63 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef COMMON_DWARF_LINE_STATE_MACHINE_H__ +#define COMMON_DWARF_LINE_STATE_MACHINE_H__ + +#include <stdint.h> + +namespace google_breakpad { + +// This is the format of a DWARF2/3 line state machine that we process +// opcodes using. There is no need for anything outside the lineinfo +// processor to know how this works. +struct LineStateMachine { + void Reset(bool default_is_stmt) { + file_num = 1; + address = 0; + line_num = 1; + column_num = 0; + is_stmt = default_is_stmt; + basic_block = false; + end_sequence = false; + } + + uint32_t file_num; + uint64_t address; + uint32_t line_num; + uint32_t column_num; + bool is_stmt; // stmt means statement. + bool basic_block; + bool end_sequence; +}; + +} // namespace google_breakpad + + +#endif // COMMON_DWARF_LINE_STATE_MACHINE_H__ diff --git a/contrib/libs/breakpad/src/common/dwarf/types.h b/contrib/libs/breakpad/src/common/dwarf/types.h new file mode 100644 index 0000000000..23412d0eaa --- /dev/null +++ b/contrib/libs/breakpad/src/common/dwarf/types.h @@ -0,0 +1,41 @@ +// Copyright 2008 Google, Inc. All Rights reserved +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// This file contains some typedefs for basic types + + +#ifndef _COMMON_DWARF_TYPES_H__ +#define _COMMON_DWARF_TYPES_H__ + +#include <stdint.h> + +typedef intptr_t intptr; +typedef uintptr_t uintptr; + +#endif // _COMMON_DWARF_TYPES_H__ |