aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/breakpad/src/common/dwarf
diff options
context:
space:
mode:
authoriddqd <iddqd@yandex-team.com>2024-12-19 10:46:06 +0300
committeriddqd <iddqd@yandex-team.com>2024-12-19 10:59:56 +0300
commitbb0840c0025a75dd3b85b746ebcec7deb7d9fe1c (patch)
tree85bc5522e873d9d5c37df278f0300c26fe9e729e /contrib/libs/breakpad/src/common/dwarf
parent1353077f79bb3547792b2fc86c22a695f0bc76f9 (diff)
downloadydb-bb0840c0025a75dd3b85b746ebcec7deb7d9fe1c.tar.gz
Add contib/libs/breakpad to export
commit_hash:9d85255f8d9249f14105e4626bf4484805b8aed4
Diffstat (limited to 'contrib/libs/breakpad/src/common/dwarf')
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/bytereader-inl.h181
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/bytereader.cc250
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/bytereader.h320
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.cc199
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.h368
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/dwarf2enums.h744
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/dwarf2reader.cc3435
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/dwarf2reader.h1494
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/elf_reader.cc1274
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/elf_reader.h167
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/line_state_machine.h63
-rw-r--r--contrib/libs/breakpad/src/common/dwarf/types.h41
12 files changed, 8536 insertions, 0 deletions
diff --git a/contrib/libs/breakpad/src/common/dwarf/bytereader-inl.h b/contrib/libs/breakpad/src/common/dwarf/bytereader-inl.h
new file mode 100644
index 0000000000..dec20e6ffe
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/bytereader-inl.h
@@ -0,0 +1,181 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UTIL_DEBUGINFO_BYTEREADER_INL_H__
+#define UTIL_DEBUGINFO_BYTEREADER_INL_H__
+
+#include "common/dwarf/bytereader.h"
+
+#include <assert.h>
+#include <stdint.h>
+
+namespace google_breakpad {
+
+inline uint8_t ByteReader::ReadOneByte(const uint8_t* buffer) const {
+ return buffer[0];
+}
+
+inline uint16_t ByteReader::ReadTwoBytes(const uint8_t* buffer) const {
+ const uint16_t buffer0 = buffer[0];
+ const uint16_t buffer1 = buffer[1];
+ if (endian_ == ENDIANNESS_LITTLE) {
+ return buffer0 | buffer1 << 8;
+ } else {
+ return buffer1 | buffer0 << 8;
+ }
+}
+
+inline uint64_t ByteReader::ReadThreeBytes(const uint8_t* buffer) const {
+ const uint32_t buffer0 = buffer[0];
+ const uint32_t buffer1 = buffer[1];
+ const uint32_t buffer2 = buffer[2];
+ if (endian_ == ENDIANNESS_LITTLE) {
+ return buffer0 | buffer1 << 8 | buffer2 << 16;
+ } else {
+ return buffer2 | buffer1 << 8 | buffer0 << 16;
+ }
+}
+
+inline uint64_t ByteReader::ReadFourBytes(const uint8_t* buffer) const {
+ const uint32_t buffer0 = buffer[0];
+ const uint32_t buffer1 = buffer[1];
+ const uint32_t buffer2 = buffer[2];
+ const uint32_t buffer3 = buffer[3];
+ if (endian_ == ENDIANNESS_LITTLE) {
+ return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24;
+ } else {
+ return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24;
+ }
+}
+
+inline uint64_t ByteReader::ReadEightBytes(const uint8_t* buffer) const {
+ const uint64_t buffer0 = buffer[0];
+ const uint64_t buffer1 = buffer[1];
+ const uint64_t buffer2 = buffer[2];
+ const uint64_t buffer3 = buffer[3];
+ const uint64_t buffer4 = buffer[4];
+ const uint64_t buffer5 = buffer[5];
+ const uint64_t buffer6 = buffer[6];
+ const uint64_t buffer7 = buffer[7];
+ if (endian_ == ENDIANNESS_LITTLE) {
+ return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 |
+ buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56;
+ } else {
+ return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 |
+ buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56;
+ }
+}
+
+// Read an unsigned LEB128 number. Each byte contains 7 bits of
+// information, plus one bit saying whether the number continues or
+// not.
+
+inline uint64_t ByteReader::ReadUnsignedLEB128(const uint8_t* buffer,
+ size_t* len) const {
+ uint64_t result = 0;
+ size_t num_read = 0;
+ unsigned int shift = 0;
+ uint8_t byte;
+
+ do {
+ byte = *buffer++;
+ num_read++;
+
+ result |= (static_cast<uint64_t>(byte & 0x7f)) << shift;
+
+ shift += 7;
+
+ } while (byte & 0x80);
+
+ *len = num_read;
+
+ return result;
+}
+
+// Read a signed LEB128 number. These are like regular LEB128
+// numbers, except the last byte may have a sign bit set.
+
+inline int64_t ByteReader::ReadSignedLEB128(const uint8_t* buffer,
+ size_t* len) const {
+ int64_t result = 0;
+ unsigned int shift = 0;
+ size_t num_read = 0;
+ uint8_t byte;
+
+ do {
+ byte = *buffer++;
+ num_read++;
+ result |= (static_cast<uint64_t>(byte & 0x7f) << shift);
+ shift += 7;
+ } while (byte & 0x80);
+
+ if ((shift < 8 * sizeof (result)) && (byte & 0x40))
+ result |= -((static_cast<int64_t>(1)) << shift);
+ *len = num_read;
+ return result;
+}
+
+inline uint64_t ByteReader::ReadOffset(const uint8_t* buffer) const {
+ assert(this->offset_reader_);
+ return (this->*offset_reader_)(buffer);
+}
+
+inline uint64_t ByteReader::ReadAddress(const uint8_t* buffer) const {
+ assert(this->address_reader_);
+ return (this->*address_reader_)(buffer);
+}
+
+inline void ByteReader::SetCFIDataBase(uint64_t section_base,
+ const uint8_t* buffer_base) {
+ section_base_ = section_base;
+ buffer_base_ = buffer_base;
+ have_section_base_ = true;
+}
+
+inline void ByteReader::SetTextBase(uint64_t text_base) {
+ text_base_ = text_base;
+ have_text_base_ = true;
+}
+
+inline void ByteReader::SetDataBase(uint64_t data_base) {
+ data_base_ = data_base;
+ have_data_base_ = true;
+}
+
+inline void ByteReader::SetFunctionBase(uint64_t function_base) {
+ function_base_ = function_base;
+ have_function_base_ = true;
+}
+
+inline void ByteReader::ClearFunctionBase() {
+ have_function_base_ = false;
+}
+
+} // namespace google_breakpad
+
+#endif // UTIL_DEBUGINFO_BYTEREADER_INL_H__
diff --git a/contrib/libs/breakpad/src/common/dwarf/bytereader.cc b/contrib/libs/breakpad/src/common/dwarf/bytereader.cc
new file mode 100644
index 0000000000..ea3e4f6bc1
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/bytereader.cc
@@ -0,0 +1,250 @@
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "common/dwarf/bytereader-inl.h"
+#include "common/dwarf/bytereader.h"
+
+namespace google_breakpad {
+
+ByteReader::ByteReader(enum Endianness endian)
+ :offset_reader_(NULL), address_reader_(NULL), endian_(endian),
+ address_size_(0), offset_size_(0),
+ have_section_base_(), have_text_base_(), have_data_base_(),
+ have_function_base_() { }
+
+ByteReader::~ByteReader() { }
+
+void ByteReader::SetOffsetSize(uint8_t size) {
+ offset_size_ = size;
+ assert(size == 4 || size == 8);
+ if (size == 4) {
+ this->offset_reader_ = &ByteReader::ReadFourBytes;
+ } else {
+ this->offset_reader_ = &ByteReader::ReadEightBytes;
+ }
+}
+
+void ByteReader::SetAddressSize(uint8_t size) {
+ address_size_ = size;
+ assert(size == 4 || size == 8);
+ if (size == 4) {
+ this->address_reader_ = &ByteReader::ReadFourBytes;
+ } else {
+ this->address_reader_ = &ByteReader::ReadEightBytes;
+ }
+}
+
+uint64_t ByteReader::ReadInitialLength(const uint8_t* start, size_t* len) {
+ const uint64_t initial_length = ReadFourBytes(start);
+ start += 4;
+
+ // In DWARF2/3, if the initial length is all 1 bits, then the offset
+ // size is 8 and we need to read the next 8 bytes for the real length.
+ if (initial_length == 0xffffffff) {
+ SetOffsetSize(8);
+ *len = 12;
+ return ReadOffset(start);
+ } else {
+ SetOffsetSize(4);
+ *len = 4;
+ }
+ return initial_length;
+}
+
+bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const {
+ if (encoding == DW_EH_PE_omit) return true;
+ if (encoding == DW_EH_PE_aligned) return true;
+ if ((encoding & 0x7) > DW_EH_PE_udata8)
+ return false;
+ if ((encoding & 0x70) > DW_EH_PE_funcrel)
+ return false;
+ return true;
+}
+
+bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const {
+ switch (encoding & 0x70) {
+ case DW_EH_PE_absptr: return true;
+ case DW_EH_PE_pcrel: return have_section_base_;
+ case DW_EH_PE_textrel: return have_text_base_;
+ case DW_EH_PE_datarel: return have_data_base_;
+ case DW_EH_PE_funcrel: return have_function_base_;
+ default: return false;
+ }
+}
+
+uint64_t ByteReader::ReadEncodedPointer(const uint8_t* buffer,
+ DwarfPointerEncoding encoding,
+ size_t* len) const {
+ // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't
+ // see it here.
+ assert(encoding != DW_EH_PE_omit);
+
+ // The Linux Standards Base 4.0 does not make this clear, but the
+ // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c)
+ // agree that aligned pointers are always absolute, machine-sized,
+ // machine-signed pointers.
+ if (encoding == DW_EH_PE_aligned) {
+ assert(have_section_base_);
+
+ // We don't need to align BUFFER in *our* address space. Rather, we
+ // need to find the next position in our buffer that would be aligned
+ // when the .eh_frame section the buffer contains is loaded into the
+ // program's memory. So align assuming that buffer_base_ gets loaded at
+ // address section_base_, where section_base_ itself may or may not be
+ // aligned.
+
+ // First, find the offset to START from the closest prior aligned
+ // address.
+ uint64_t skew = section_base_ & (AddressSize() - 1);
+ // Now find the offset from that aligned address to buffer.
+ uint64_t offset = skew + (buffer - buffer_base_);
+ // Round up to the next boundary.
+ uint64_t aligned = (offset + AddressSize() - 1) & -AddressSize();
+ // Convert back to a pointer.
+ const uint8_t* aligned_buffer = buffer_base_ + (aligned - skew);
+ // Finally, store the length and actually fetch the pointer.
+ *len = aligned_buffer - buffer + AddressSize();
+ return ReadAddress(aligned_buffer);
+ }
+
+ // Extract the value first, ignoring whether it's a pointer or an
+ // offset relative to some base.
+ uint64_t offset;
+ switch (encoding & 0x0f) {
+ case DW_EH_PE_absptr:
+ // DW_EH_PE_absptr is weird, as it is used as a meaningful value for
+ // both the high and low nybble of encoding bytes. When it appears in
+ // the high nybble, it means that the pointer is absolute, not an
+ // offset from some base address. When it appears in the low nybble,
+ // as here, it means that the pointer is stored as a normal
+ // machine-sized and machine-signed address. A low nybble of
+ // DW_EH_PE_absptr does not imply that the pointer is absolute; it is
+ // correct for us to treat the value as an offset from a base address
+ // if the upper nybble is not DW_EH_PE_absptr.
+ offset = ReadAddress(buffer);
+ *len = AddressSize();
+ break;
+
+ case DW_EH_PE_uleb128:
+ offset = ReadUnsignedLEB128(buffer, len);
+ break;
+
+ case DW_EH_PE_udata2:
+ offset = ReadTwoBytes(buffer);
+ *len = 2;
+ break;
+
+ case DW_EH_PE_udata4:
+ offset = ReadFourBytes(buffer);
+ *len = 4;
+ break;
+
+ case DW_EH_PE_udata8:
+ offset = ReadEightBytes(buffer);
+ *len = 8;
+ break;
+
+ case DW_EH_PE_sleb128:
+ offset = ReadSignedLEB128(buffer, len);
+ break;
+
+ case DW_EH_PE_sdata2:
+ offset = ReadTwoBytes(buffer);
+ // Sign-extend from 16 bits.
+ offset = (offset ^ 0x8000) - 0x8000;
+ *len = 2;
+ break;
+
+ case DW_EH_PE_sdata4:
+ offset = ReadFourBytes(buffer);
+ // Sign-extend from 32 bits.
+ offset = (offset ^ 0x80000000ULL) - 0x80000000ULL;
+ *len = 4;
+ break;
+
+ case DW_EH_PE_sdata8:
+ // No need to sign-extend; this is the full width of our type.
+ offset = ReadEightBytes(buffer);
+ *len = 8;
+ break;
+
+ default:
+ abort();
+ }
+
+ // Find the appropriate base address.
+ uint64_t base;
+ switch (encoding & 0x70) {
+ case DW_EH_PE_absptr:
+ base = 0;
+ break;
+
+ case DW_EH_PE_pcrel:
+ assert(have_section_base_);
+ base = section_base_ + (buffer - buffer_base_);
+ break;
+
+ case DW_EH_PE_textrel:
+ assert(have_text_base_);
+ base = text_base_;
+ break;
+
+ case DW_EH_PE_datarel:
+ assert(have_data_base_);
+ base = data_base_;
+ break;
+
+ case DW_EH_PE_funcrel:
+ assert(have_function_base_);
+ base = function_base_;
+ break;
+
+ default:
+ abort();
+ }
+
+ uint64_t pointer = base + offset;
+
+ // Remove inappropriate upper bits.
+ if (AddressSize() == 4)
+ pointer = pointer & 0xffffffff;
+ else
+ assert(AddressSize() == sizeof(uint64_t));
+
+ return pointer;
+}
+
+Endianness ByteReader::GetEndianness() const {
+ return endian_;
+}
+
+} // namespace google_breakpad
diff --git a/contrib/libs/breakpad/src/common/dwarf/bytereader.h b/contrib/libs/breakpad/src/common/dwarf/bytereader.h
new file mode 100644
index 0000000000..b0cac43315
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/bytereader.h
@@ -0,0 +1,320 @@
+// -*- mode: C++ -*-
+
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef COMMON_DWARF_BYTEREADER_H__
+#define COMMON_DWARF_BYTEREADER_H__
+
+#include <stdint.h>
+
+#include <string>
+
+#include "common/dwarf/types.h"
+#include "common/dwarf/dwarf2enums.h"
+
+namespace google_breakpad {
+
+// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
+// because it conflicts with a macro
+enum Endianness {
+ ENDIANNESS_BIG,
+ ENDIANNESS_LITTLE
+};
+
+// A ByteReader knows how to read single- and multi-byte values of
+// various endiannesses, sizes, and encodings, as used in DWARF
+// debugging information and Linux C++ exception handling data.
+class ByteReader {
+ public:
+ // Construct a ByteReader capable of reading one-, two-, four-, and
+ // eight-byte values according to ENDIANNESS, absolute machine-sized
+ // addresses, DWARF-style "initial length" values, signed and
+ // unsigned LEB128 numbers, and Linux C++ exception handling data's
+ // encoded pointers.
+ explicit ByteReader(enum Endianness endianness);
+ virtual ~ByteReader();
+
+ // Read a single byte from BUFFER and return it as an unsigned 8 bit
+ // number.
+ uint8_t ReadOneByte(const uint8_t* buffer) const;
+
+ // Read two bytes from BUFFER and return them as an unsigned 16 bit
+ // number, using this ByteReader's endianness.
+ uint16_t ReadTwoBytes(const uint8_t* buffer) const;
+
+ // Read three bytes from BUFFER and return them as an unsigned 64 bit
+ // number, using this ByteReader's endianness. DWARF 5 uses this encoding
+ // for various index-related DW_FORMs.
+ uint64_t ReadThreeBytes(const uint8_t* buffer) const;
+
+ // Read four bytes from BUFFER and return them as an unsigned 32 bit
+ // number, using this ByteReader's endianness. This function returns
+ // a uint64_t so that it is compatible with ReadAddress and
+ // ReadOffset. The number it returns will never be outside the range
+ // of an unsigned 32 bit integer.
+ uint64_t ReadFourBytes(const uint8_t* buffer) const;
+
+ // Read eight bytes from BUFFER and return them as an unsigned 64
+ // bit number, using this ByteReader's endianness.
+ uint64_t ReadEightBytes(const uint8_t* buffer) const;
+
+ // Read an unsigned LEB128 (Little Endian Base 128) number from
+ // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
+ // the number of bytes read.
+ //
+ // The unsigned LEB128 representation of an integer N is a variable
+ // number of bytes:
+ //
+ // - If N is between 0 and 0x7f, then its unsigned LEB128
+ // representation is a single byte whose value is N.
+ //
+ // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
+ // 0x80, followed by the unsigned LEB128 representation of N /
+ // 128, rounded towards negative infinity.
+ //
+ // In other words, we break VALUE into groups of seven bits, put
+ // them in little-endian order, and then write them as eight-bit
+ // bytes with the high bit on all but the last.
+ uint64_t ReadUnsignedLEB128(const uint8_t* buffer, size_t* len) const;
+
+ // Read a signed LEB128 number from BUFFER and return it as an
+ // signed 64 bit integer. Set LEN to the number of bytes read.
+ //
+ // The signed LEB128 representation of an integer N is a variable
+ // number of bytes:
+ //
+ // - If N is between -0x40 and 0x3f, then its signed LEB128
+ // representation is a single byte whose value is N in two's
+ // complement.
+ //
+ // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
+ // 0x80, followed by the signed LEB128 representation of N / 128,
+ // rounded towards negative infinity.
+ //
+ // In other words, we break VALUE into groups of seven bits, put
+ // them in little-endian order, and then write them as eight-bit
+ // bytes with the high bit on all but the last.
+ int64_t ReadSignedLEB128(const uint8_t* buffer, size_t* len) const;
+
+ // Indicate that addresses on this architecture are SIZE bytes long. SIZE
+ // must be either 4 or 8. (DWARF allows addresses to be any number of
+ // bytes in length from 1 to 255, but we only support 32- and 64-bit
+ // addresses at the moment.) You must call this before using the
+ // ReadAddress member function.
+ //
+ // For data in a .debug_info section, or something that .debug_info
+ // refers to like line number or macro data, the compilation unit
+ // header's address_size field indicates the address size to use. Call
+ // frame information doesn't indicate its address size (a shortcoming of
+ // the spec); you must supply the appropriate size based on the
+ // architecture of the target machine.
+ void SetAddressSize(uint8_t size);
+
+ // Return the current address size, in bytes. This is either 4,
+ // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
+ uint8_t AddressSize() const { return address_size_; }
+
+ // Read an address from BUFFER and return it as an unsigned 64 bit
+ // integer, respecting this ByteReader's endianness and address size. You
+ // must call SetAddressSize before calling this function.
+ uint64_t ReadAddress(const uint8_t* buffer) const;
+
+ // DWARF actually defines two slightly different formats: 32-bit DWARF
+ // and 64-bit DWARF. This is *not* related to the size of registers or
+ // addresses on the target machine; it refers only to the size of section
+ // offsets and data lengths appearing in the DWARF data. One only needs
+ // 64-bit DWARF when the debugging data itself is larger than 4GiB.
+ // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
+ // debugging data itself is very large.
+ //
+ // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
+ // compilation unit and call frame information entry begins with an
+ // "initial length" field, which, in addition to giving the length of the
+ // data, also indicates the size of section offsets and lengths appearing
+ // in that data. The ReadInitialLength member function, below, reads an
+ // initial length and sets the ByteReader's offset size as a side effect.
+ // Thus, in the normal process of reading DWARF data, the appropriate
+ // offset size is set automatically. So, you should only need to call
+ // SetOffsetSize if you are using the same ByteReader to jump from the
+ // midst of one block of DWARF data into another.
+
+ // Read a DWARF "initial length" field from START, and return it as
+ // an unsigned 64 bit integer, respecting this ByteReader's
+ // endianness. Set *LEN to the length of the initial length in
+ // bytes, either four or twelve. As a side effect, set this
+ // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
+ // initial length) or 8 (if we see a 64-bit DWARF initial length).
+ //
+ // A DWARF initial length is either:
+ //
+ // - a byte count stored as an unsigned 32-bit value less than
+ // 0xffffff00, indicating that the data whose length is being
+ // measured uses the 32-bit DWARF format, or
+ //
+ // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
+ // indicating that the data whose length is being measured uses
+ // the 64-bit DWARF format.
+ uint64_t ReadInitialLength(const uint8_t* start, size_t* len);
+
+ // Read an offset from BUFFER and return it as an unsigned 64 bit
+ // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
+ // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
+ // long. You must call ReadInitialLength or SetOffsetSize before calling
+ // this function; see the comments above for details.
+ uint64_t ReadOffset(const uint8_t* buffer) const;
+
+ // Return the current offset size, in bytes.
+ // A return value of 4 indicates that we are reading 32-bit DWARF.
+ // A return value of 8 indicates that we are reading 64-bit DWARF.
+ uint8_t OffsetSize() const { return offset_size_; }
+
+ // Indicate that section offsets and lengths are SIZE bytes long. SIZE
+ // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
+ // Usually, you should not call this function yourself; instead, let a
+ // call to ReadInitialLength establish the data's offset size
+ // automatically.
+ void SetOffsetSize(uint8_t size);
+
+ // The Linux C++ ABI uses a variant of DWARF call frame information
+ // for exception handling. This data is included in the program's
+ // address space as the ".eh_frame" section, and intepreted at
+ // runtime to walk the stack, find exception handlers, and run
+ // cleanup code. The format is mostly the same as DWARF CFI, with
+ // some adjustments made to provide the additional
+ // exception-handling data, and to make the data easier to work with
+ // in memory --- for example, to allow it to be placed in read-only
+ // memory even when describing position-independent code.
+ //
+ // In particular, exception handling data can select a number of
+ // different encodings for pointers that appear in the data, as
+ // described by the DwarfPointerEncoding enum. There are actually
+ // four axes(!) to the encoding:
+ //
+ // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
+ // the DWARF LEB128 encoding.
+ //
+ // - The pointer's signedness: pointers can be signed or unsigned.
+ //
+ // - The pointer's base address: the data stored in the exception
+ // handling data can be the actual address (that is, an absolute
+ // pointer), or relative to one of a number of different base
+ // addreses --- including that of the encoded pointer itself, for
+ // a form of "pc-relative" addressing.
+ //
+ // - The pointer may be indirect: it may be the address where the
+ // true pointer is stored. (This is used to refer to things via
+ // global offset table entries, program linkage table entries, or
+ // other tricks used in position-independent code.)
+ //
+ // There are also two options that fall outside that matrix
+ // altogether: the pointer may be omitted, or it may have padding to
+ // align it on an appropriate address boundary. (That last option
+ // may seem like it should be just another axis, but it is not.)
+
+ // Indicate that the exception handling data is loaded starting at
+ // SECTION_BASE, and that the start of its buffer in our own memory
+ // is BUFFER_BASE. This allows us to find the address that a given
+ // byte in our buffer would have when loaded into the program the
+ // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
+ void SetCFIDataBase(uint64_t section_base, const uint8_t* buffer_base);
+
+ // Indicate that the base address of the program's ".text" section
+ // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
+ void SetTextBase(uint64_t text_base);
+
+ // Indicate that the base address for DW_EH_PE_datarel pointers is
+ // DATA_BASE. The proper value depends on the ABI; it is usually the
+ // address of the global offset table, held in a designated register in
+ // position-independent code. You will need to look at the startup code
+ // for the target system to be sure. I tried; my eyes bled.
+ void SetDataBase(uint64_t data_base);
+
+ // Indicate that the base address for the FDE we are processing is
+ // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
+ // pointers. (This encoding does not seem to be used by the GNU
+ // toolchain.)
+ void SetFunctionBase(uint64_t function_base);
+
+ // Indicate that we are no longer processing any FDE, so any use of
+ // a DW_EH_PE_funcrel encoding is an error.
+ void ClearFunctionBase();
+
+ // Return true if ENCODING is a valid pointer encoding.
+ bool ValidEncoding(DwarfPointerEncoding encoding) const;
+
+ // Return true if we have all the information we need to read a
+ // pointer that uses ENCODING. This checks that the appropriate
+ // SetFooBase function for ENCODING has been called.
+ bool UsableEncoding(DwarfPointerEncoding encoding) const;
+
+ // Read an encoded pointer from BUFFER using ENCODING; return the
+ // absolute address it represents, and set *LEN to the pointer's
+ // length in bytes, including any padding for aligned pointers.
+ //
+ // This function calls 'abort' if ENCODING is invalid or refers to a
+ // base address this reader hasn't been given, so you should check
+ // with ValidEncoding and UsableEncoding first if you would rather
+ // die in a more helpful way.
+ uint64_t ReadEncodedPointer(const uint8_t* buffer,
+ DwarfPointerEncoding encoding,
+ size_t* len) const;
+
+ Endianness GetEndianness() const;
+ private:
+
+ // Function pointer type for our address and offset readers.
+ typedef uint64_t (ByteReader::*AddressReader)(const uint8_t*) const;
+
+ // Read an offset from BUFFER and return it as an unsigned 64 bit
+ // integer. DWARF2/3 define offsets as either 4 or 8 bytes,
+ // generally depending on the amount of DWARF2/3 info present.
+ // This function pointer gets set by SetOffsetSize.
+ AddressReader offset_reader_;
+
+ // Read an address from BUFFER and return it as an unsigned 64 bit
+ // integer. DWARF2/3 allow addresses to be any size from 0-255
+ // bytes currently. Internally we support 4 and 8 byte addresses,
+ // and will CHECK on anything else.
+ // This function pointer gets set by SetAddressSize.
+ AddressReader address_reader_;
+
+ Endianness endian_;
+ uint8_t address_size_;
+ uint8_t offset_size_;
+
+ // Base addresses for Linux C++ exception handling data's encoded pointers.
+ bool have_section_base_, have_text_base_, have_data_base_;
+ bool have_function_base_;
+ uint64_t section_base_, text_base_, data_base_, function_base_;
+ const uint8_t* buffer_base_;
+};
+
+} // namespace google_breakpad
+
+#endif // COMMON_DWARF_BYTEREADER_H__
diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.cc b/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.cc
new file mode 100644
index 0000000000..1393c3d7d8
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.cc
@@ -0,0 +1,199 @@
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// dwarf2diehandler.cc: Implement the dwarf2reader::DieDispatcher class.
+// See dwarf2diehandler.h for details.
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "common/dwarf/dwarf2diehandler.h"
+#include "common/using_std_string.h"
+
+namespace google_breakpad {
+
+DIEDispatcher::~DIEDispatcher() {
+ while (!die_handlers_.empty()) {
+ HandlerStack& entry = die_handlers_.top();
+ if (entry.handler_ != root_handler_)
+ delete entry.handler_;
+ die_handlers_.pop();
+ }
+}
+
+bool DIEDispatcher::StartCompilationUnit(uint64_t offset, uint8_t address_size,
+ uint8_t offset_size, uint64_t cu_length,
+ uint8_t dwarf_version) {
+ return root_handler_->StartCompilationUnit(offset, address_size,
+ offset_size, cu_length,
+ dwarf_version);
+}
+
+bool DIEDispatcher::StartDIE(uint64_t offset, enum DwarfTag tag) {
+ // The stack entry for the parent of this DIE, if there is one.
+ HandlerStack* parent = die_handlers_.empty() ? NULL : &die_handlers_.top();
+
+ // Does this call indicate that we're done receiving the parent's
+ // attributes' values? If so, call its EndAttributes member function.
+ if (parent && parent->handler_ && !parent->reported_attributes_end_) {
+ parent->reported_attributes_end_ = true;
+ if (!parent->handler_->EndAttributes()) {
+ // Finish off this handler now. and edit *PARENT to indicate that
+ // we don't want to visit any of the children.
+ parent->handler_->Finish();
+ if (parent->handler_ != root_handler_)
+ delete parent->handler_;
+ parent->handler_ = NULL;
+ return false;
+ }
+ }
+
+ // Find a handler for this DIE.
+ DIEHandler* handler;
+ if (parent) {
+ if (parent->handler_)
+ // Ask the parent to find a handler.
+ handler = parent->handler_->FindChildHandler(offset, tag);
+ else
+ // No parent handler means we're not interested in any of our
+ // children.
+ handler = NULL;
+ } else {
+ // This is the root DIE. For a non-root DIE, the parent's handler
+ // decides whether to visit it, but the root DIE has no parent
+ // handler, so we have a special method on the root DIE handler
+ // itself to decide.
+ if (root_handler_->StartRootDIE(offset, tag))
+ handler = root_handler_;
+ else
+ handler = NULL;
+ }
+
+ // Push a handler stack entry for this new handler. As an
+ // optimization, we don't push NULL-handler entries on top of other
+ // NULL-handler entries; we just let the oldest such entry stand for
+ // the whole subtree.
+ if (handler || !parent || parent->handler_) {
+ HandlerStack entry;
+ entry.offset_ = offset;
+ entry.handler_ = handler;
+ entry.reported_attributes_end_ = false;
+ die_handlers_.push(entry);
+ }
+
+ return handler != NULL;
+}
+
+void DIEDispatcher::EndDIE(uint64_t offset) {
+ assert(!die_handlers_.empty());
+ HandlerStack* entry = &die_handlers_.top();
+ if (entry->handler_) {
+ // This entry had better be the handler for this DIE.
+ assert(entry->offset_ == offset);
+ // If a DIE has no children, this EndDIE call indicates that we're
+ // done receiving its attributes' values.
+ if (!entry->reported_attributes_end_)
+ entry->handler_->EndAttributes(); // Ignore return value: no children.
+ entry->handler_->Finish();
+ if (entry->handler_ != root_handler_)
+ delete entry->handler_;
+ } else {
+ // If this DIE is within a tree we're ignoring, then don't pop the
+ // handler stack: that entry stands for the whole tree.
+ if (entry->offset_ != offset)
+ return;
+ }
+ die_handlers_.pop();
+}
+
+void DIEDispatcher::ProcessAttributeUnsigned(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data) {
+ HandlerStack& current = die_handlers_.top();
+ // This had better be an attribute of the DIE we were meant to handle.
+ assert(offset == current.offset_);
+ current.handler_->ProcessAttributeUnsigned(attr, form, data);
+}
+
+void DIEDispatcher::ProcessAttributeSigned(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ int64_t data) {
+ HandlerStack& current = die_handlers_.top();
+ // This had better be an attribute of the DIE we were meant to handle.
+ assert(offset == current.offset_);
+ current.handler_->ProcessAttributeSigned(attr, form, data);
+}
+
+void DIEDispatcher::ProcessAttributeReference(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data) {
+ HandlerStack& current = die_handlers_.top();
+ // This had better be an attribute of the DIE we were meant to handle.
+ assert(offset == current.offset_);
+ current.handler_->ProcessAttributeReference(attr, form, data);
+}
+
+void DIEDispatcher::ProcessAttributeBuffer(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const uint8_t* data,
+ uint64_t len) {
+ HandlerStack& current = die_handlers_.top();
+ // This had better be an attribute of the DIE we were meant to handle.
+ assert(offset == current.offset_);
+ current.handler_->ProcessAttributeBuffer(attr, form, data, len);
+}
+
+void DIEDispatcher::ProcessAttributeString(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const string& data) {
+ HandlerStack& current = die_handlers_.top();
+ // This had better be an attribute of the DIE we were meant to handle.
+ assert(offset == current.offset_);
+ current.handler_->ProcessAttributeString(attr, form, data);
+}
+
+void DIEDispatcher::ProcessAttributeSignature(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t signature) {
+ HandlerStack& current = die_handlers_.top();
+ // This had better be an attribute of the DIE we were meant to handle.
+ assert(offset == current.offset_);
+ current.handler_->ProcessAttributeSignature(attr, form, signature);
+}
+
+} // namespace google_breakpad
diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.h b/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.h
new file mode 100644
index 0000000000..5e568eb85f
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2diehandler.h
@@ -0,0 +1,368 @@
+// -*- mode: c++ -*-
+
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// dwarf2reader::CompilationUnit is a simple and direct parser for
+// DWARF data, but its handler interface is not convenient to use. In
+// particular:
+//
+// - CompilationUnit calls Dwarf2Handler's member functions to report
+// every attribute's value, regardless of what sort of DIE it is.
+// As a result, the ProcessAttributeX functions end up looking like
+// this:
+//
+// switch (parent_die_tag) {
+// case DW_TAG_x:
+// switch (attribute_name) {
+// case DW_AT_y:
+// handle attribute y of DIE type x
+// ...
+// } break;
+// ...
+// }
+//
+// In C++ it's much nicer to use virtual function dispatch to find
+// the right code for a given case than to switch on the DIE tag
+// like this.
+//
+// - Processing different kinds of DIEs requires different sets of
+// data: lexical block DIEs have start and end addresses, but struct
+// type DIEs don't. It would be nice to be able to have separate
+// handler classes for separate kinds of DIEs, each with the members
+// appropriate to its role, instead of having one handler class that
+// needs to hold data for every DIE type.
+//
+// - There should be a separate instance of the appropriate handler
+// class for each DIE, instead of a single object with tables
+// tracking all the dies in the compilation unit.
+//
+// - It's not convenient to take some action after all a DIE's
+// attributes have been seen, but before visiting any of its
+// children. The only indication you have that a DIE's attribute
+// list is complete is that you get either a StartDIE or an EndDIE
+// call.
+//
+// - It's not convenient to make use of the tree structure of the
+// DIEs. Skipping all the children of a given die requires
+// maintaining state and returning false from StartDIE until we get
+// an EndDIE call with the appropriate offset.
+//
+// This interface tries to take care of all that. (You're shocked, I'm sure.)
+//
+// Using the classes here, you provide an initial handler for the root
+// DIE of the compilation unit. Each handler receives its DIE's
+// attributes, and provides fresh handler objects for children of
+// interest, if any. The three classes are:
+//
+// - DIEHandler: the base class for your DIE-type-specific handler
+// classes.
+//
+// - RootDIEHandler: derived from DIEHandler, the base class for your
+// root DIE handler class.
+//
+// - DIEDispatcher: derived from Dwarf2Handler, an instance of this
+// invokes your DIE-type-specific handler objects.
+//
+// In detail:
+//
+// - Define handler classes specialized for the DIE types you're
+// interested in. These handler classes must inherit from
+// DIEHandler. Thus:
+//
+// class My_DW_TAG_X_Handler: public DIEHandler { ... };
+// class My_DW_TAG_Y_Handler: public DIEHandler { ... };
+//
+// DIEHandler subclasses needn't correspond exactly to single DIE
+// types, as shown here; the point is that you can have several
+// different classes appropriate to different kinds of DIEs.
+//
+// - In particular, define a handler class for the compilation
+// unit's root DIE, that inherits from RootDIEHandler:
+//
+// class My_DW_TAG_compile_unit_Handler: public RootDIEHandler { ... };
+//
+// RootDIEHandler inherits from DIEHandler, adding a few additional
+// member functions for examining the compilation unit as a whole,
+// and other quirks of rootness.
+//
+// - Then, create a DIEDispatcher instance, passing it an instance of
+// your root DIE handler class, and use that DIEDispatcher as the
+// dwarf2reader::CompilationUnit's handler:
+//
+// My_DW_TAG_compile_unit_Handler root_die_handler(...);
+// DIEDispatcher die_dispatcher(&root_die_handler);
+// CompilationUnit reader(sections, offset, bytereader, &die_dispatcher);
+//
+// Here, 'die_dispatcher' acts as a shim between 'reader' and the
+// various DIE-specific handlers you have defined.
+//
+// - When you call reader.Start(), die_dispatcher behaves as follows,
+// starting with your root die handler and the compilation unit's
+// root DIE:
+//
+// - It calls the handler's ProcessAttributeX member functions for
+// each of the DIE's attributes.
+//
+// - It calls the handler's EndAttributes member function. This
+// should return true if any of the DIE's children should be
+// visited, in which case:
+//
+// - For each of the DIE's children, die_dispatcher calls the
+// DIE's handler's FindChildHandler member function. If that
+// returns a pointer to a DIEHandler instance, then
+// die_dispatcher uses that handler to process the child, using
+// this procedure recursively. Alternatively, if
+// FindChildHandler returns NULL, die_dispatcher ignores that
+// child and its descendants.
+//
+// - When die_dispatcher has finished processing all the DIE's
+// children, it invokes the handler's Finish() member function,
+// and destroys the handler. (As a special case, it doesn't
+// destroy the root DIE handler.)
+//
+// This allows the code for handling a particular kind of DIE to be
+// gathered together in a single class, makes it easy to skip all the
+// children or individual children of a particular DIE, and provides
+// appropriate parental context for each die.
+
+#ifndef COMMON_DWARF_DWARF2DIEHANDLER_H__
+#define COMMON_DWARF_DWARF2DIEHANDLER_H__
+
+#include <stdint.h>
+
+#include <stack>
+#include <string>
+
+#include "common/dwarf/types.h"
+#include "common/dwarf/dwarf2enums.h"
+#include "common/dwarf/dwarf2reader.h"
+#include "common/using_std_string.h"
+
+namespace google_breakpad {
+
+// A base class for handlers for specific DIE types. The series of
+// calls made on a DIE handler is as follows:
+//
+// - for each attribute of the DIE:
+// - ProcessAttributeX()
+// - EndAttributes()
+// - if that returned true, then for each child:
+// - FindChildHandler()
+// - if that returns a non-NULL pointer to a new handler:
+// - recurse, with the new handler and the child die
+// - Finish()
+// - destruction
+class DIEHandler {
+ public:
+ DIEHandler() { }
+ virtual ~DIEHandler() { }
+
+ // When we visit a DIE, we first use these member functions to
+ // report the DIE's attributes and their values. These have the
+ // same restrictions as the corresponding member functions of
+ // dwarf2reader::Dwarf2Handler.
+ //
+ // Since DWARF does not specify in what order attributes must
+ // appear, avoid making decisions in these functions that would be
+ // affected by the presence of other attributes. The EndAttributes
+ // function is a more appropriate place for such work, as all the
+ // DIE's attributes have been seen at that point.
+ //
+ // The default definitions ignore the values they are passed.
+ virtual void ProcessAttributeUnsigned(enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data) { }
+ virtual void ProcessAttributeSigned(enum DwarfAttribute attr,
+ enum DwarfForm form,
+ int64_t data) { }
+ virtual void ProcessAttributeReference(enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data) { }
+ virtual void ProcessAttributeBuffer(enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const uint8_t* data,
+ uint64_t len) { }
+ virtual void ProcessAttributeString(enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const string& data) { }
+ virtual void ProcessAttributeSignature(enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t signture) { }
+
+ // Once we have reported all the DIE's attributes' values, we call
+ // this member function. If it returns false, we skip all the DIE's
+ // children. If it returns true, we call FindChildHandler on each
+ // child. If that returns a handler object, we use that to visit
+ // the child; otherwise, we skip the child.
+ //
+ // This is a good place to make decisions that depend on more than
+ // one attribute. DWARF does not specify in what order attributes
+ // must appear, so only when the EndAttributes function is called
+ // does the handler have a complete picture of the DIE's attributes.
+ //
+ // The default definition elects to ignore the DIE's children.
+ // You'll need to override this if you override FindChildHandler,
+ // but at least the default behavior isn't to pass the children to
+ // FindChildHandler, which then ignores them all.
+ virtual bool EndAttributes() { return false; }
+
+ // If EndAttributes returns true to indicate that some of the DIE's
+ // children might be of interest, then we apply this function to
+ // each of the DIE's children. If it returns a handler object, then
+ // we use that to visit the child DIE. If it returns NULL, we skip
+ // that child DIE (and all its descendants).
+ //
+ // OFFSET is the offset of the child; TAG indicates what kind of DIE
+ // it is.
+ //
+ // The default definition skips all children.
+ virtual DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag) {
+ return NULL;
+ }
+
+ // When we are done processing a DIE, we call this member function.
+ // This happens after the EndAttributes call, all FindChildHandler
+ // calls (if any), and all operations on the children themselves (if
+ // any). We call Finish on every handler --- even if EndAttributes
+ // returns false.
+ virtual void Finish() { };
+};
+
+// A subclass of DIEHandler, with additional kludges for handling the
+// compilation unit's root die.
+class RootDIEHandler : public DIEHandler {
+ public:
+ bool handle_inline;
+
+ explicit RootDIEHandler(bool handle_inline = false)
+ : handle_inline(handle_inline) {}
+ virtual ~RootDIEHandler() {}
+
+ // We pass the values reported via Dwarf2Handler::StartCompilationUnit
+ // to this member function, and skip the entire compilation unit if it
+ // returns false. So the root DIE handler is actually also
+ // responsible for handling the compilation unit metadata.
+ // The default definition always visits the compilation unit.
+ virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size,
+ uint8_t offset_size, uint64_t cu_length,
+ uint8_t dwarf_version) { return true; }
+
+ // For the root DIE handler only, we pass the offset, tag and
+ // attributes of the compilation unit's root DIE. This is the only
+ // way the root DIE handler can find the root DIE's tag. If this
+ // function returns true, we will visit the root DIE using the usual
+ // DIEHandler methods; otherwise, we skip the entire compilation
+ // unit.
+ //
+ // The default definition elects to visit the root DIE.
+ virtual bool StartRootDIE(uint64_t offset, enum DwarfTag tag) { return true; }
+};
+
+class DIEDispatcher: public Dwarf2Handler {
+ public:
+ // Create a Dwarf2Handler which uses ROOT_HANDLER as the handler for
+ // the compilation unit's root die, as described for the DIEHandler
+ // class.
+ DIEDispatcher(RootDIEHandler* root_handler) : root_handler_(root_handler) { }
+ // Destroying a DIEDispatcher destroys all active handler objects
+ // except the root handler.
+ ~DIEDispatcher();
+ bool StartCompilationUnit(uint64_t offset, uint8_t address_size,
+ uint8_t offset_size, uint64_t cu_length,
+ uint8_t dwarf_version);
+ bool StartDIE(uint64_t offset, enum DwarfTag tag);
+ void ProcessAttributeUnsigned(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data);
+ void ProcessAttributeSigned(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ int64_t data);
+ void ProcessAttributeReference(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data);
+ void ProcessAttributeBuffer(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const uint8_t* data,
+ uint64_t len);
+ void ProcessAttributeString(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const string& data);
+ void ProcessAttributeSignature(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t signature);
+ void EndDIE(uint64_t offset);
+
+ private:
+
+ // The type of a handler stack entry. This includes some fields
+ // which don't really need to be on the stack --- they could just be
+ // single data members of DIEDispatcher --- but putting them here
+ // makes it easier to see that the code is correct.
+ struct HandlerStack {
+ // The offset of the DIE for this handler stack entry.
+ uint64_t offset_;
+
+ // The handler object interested in this DIE's attributes and
+ // children. If NULL, we're not interested in either.
+ DIEHandler* handler_;
+
+ // Have we reported the end of this DIE's attributes to the handler?
+ bool reported_attributes_end_;
+ };
+
+ // Stack of DIE attribute handlers. At StartDIE(D), the top of the
+ // stack is the handler of D's parent, whom we may ask for a handler
+ // for D itself. At EndDIE(D), the top of the stack is D's handler.
+ // Special cases:
+ //
+ // - Before we've seen the compilation unit's root DIE, the stack is
+ // empty; we'll call root_handler_'s special member functions, and
+ // perhaps push root_handler_ on the stack to look at the root's
+ // immediate children.
+ //
+ // - When we decide to ignore a subtree, we only push an entry on
+ // the stack for the root of the tree being ignored, rather than
+ // pushing lots of stack entries with handler_ set to NULL.
+ std::stack<HandlerStack> die_handlers_;
+
+ // The root handler. We don't push it on die_handlers_ until we
+ // actually get the StartDIE call for the root.
+ RootDIEHandler* root_handler_;
+};
+
+} // namespace google_breakpad
+#endif // COMMON_DWARF_DWARF2DIEHANDLER_H__
diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2enums.h b/contrib/libs/breakpad/src/common/dwarf/dwarf2enums.h
new file mode 100644
index 0000000000..7d84f35e23
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2enums.h
@@ -0,0 +1,744 @@
+// -*- mode: c++ -*-
+
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef COMMON_DWARF_DWARF2ENUMS_H__
+#define COMMON_DWARF_DWARF2ENUMS_H__
+
+namespace google_breakpad {
+
+// These enums do not follow the google3 style only because they are
+// known universally (specs, other implementations) by the names in
+// exactly this capitalization.
+// Tag names and codes.
+enum DwarfTag {
+ DW_TAG_padding = 0x00,
+ DW_TAG_array_type = 0x01,
+ DW_TAG_class_type = 0x02,
+ DW_TAG_entry_point = 0x03,
+ DW_TAG_enumeration_type = 0x04,
+ DW_TAG_formal_parameter = 0x05,
+ DW_TAG_imported_declaration = 0x08,
+ DW_TAG_label = 0x0a,
+ DW_TAG_lexical_block = 0x0b,
+ DW_TAG_member = 0x0d,
+ DW_TAG_pointer_type = 0x0f,
+ DW_TAG_reference_type = 0x10,
+ DW_TAG_compile_unit = 0x11,
+ DW_TAG_string_type = 0x12,
+ DW_TAG_structure_type = 0x13,
+ DW_TAG_subroutine_type = 0x15,
+ DW_TAG_typedef = 0x16,
+ DW_TAG_union_type = 0x17,
+ DW_TAG_unspecified_parameters = 0x18,
+ DW_TAG_variant = 0x19,
+ DW_TAG_common_block = 0x1a,
+ DW_TAG_common_inclusion = 0x1b,
+ DW_TAG_inheritance = 0x1c,
+ DW_TAG_inlined_subroutine = 0x1d,
+ DW_TAG_module = 0x1e,
+ DW_TAG_ptr_to_member_type = 0x1f,
+ DW_TAG_set_type = 0x20,
+ DW_TAG_subrange_type = 0x21,
+ DW_TAG_with_stmt = 0x22,
+ DW_TAG_access_declaration = 0x23,
+ DW_TAG_base_type = 0x24,
+ DW_TAG_catch_block = 0x25,
+ DW_TAG_const_type = 0x26,
+ DW_TAG_constant = 0x27,
+ DW_TAG_enumerator = 0x28,
+ DW_TAG_file_type = 0x29,
+ DW_TAG_friend = 0x2a,
+ DW_TAG_namelist = 0x2b,
+ DW_TAG_namelist_item = 0x2c,
+ DW_TAG_packed_type = 0x2d,
+ DW_TAG_subprogram = 0x2e,
+ DW_TAG_template_type_param = 0x2f,
+ DW_TAG_template_value_param = 0x30,
+ DW_TAG_thrown_type = 0x31,
+ DW_TAG_try_block = 0x32,
+ DW_TAG_variant_part = 0x33,
+ DW_TAG_variable = 0x34,
+ DW_TAG_volatile_type = 0x35,
+ // DWARF 3.
+ DW_TAG_dwarf_procedure = 0x36,
+ DW_TAG_restrict_type = 0x37,
+ DW_TAG_interface_type = 0x38,
+ DW_TAG_namespace = 0x39,
+ DW_TAG_imported_module = 0x3a,
+ DW_TAG_unspecified_type = 0x3b,
+ DW_TAG_partial_unit = 0x3c,
+ DW_TAG_imported_unit = 0x3d,
+ // DWARF 4.
+ DW_TAG_type_unit = 0x41,
+ // DWARF 5.
+ DW_TAG_skeleton_unit = 0x4a,
+ // SGI/MIPS Extensions.
+ DW_TAG_MIPS_loop = 0x4081,
+ // HP extensions. See:
+ // ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz
+ DW_TAG_HP_array_descriptor = 0x4090,
+ // GNU extensions.
+ DW_TAG_format_label = 0x4101, // For FORTRAN 77 and Fortran 90.
+ DW_TAG_function_template = 0x4102, // For C++.
+ DW_TAG_class_template = 0x4103, // For C++.
+ DW_TAG_GNU_BINCL = 0x4104,
+ DW_TAG_GNU_EINCL = 0x4105,
+ // Extensions for UPC. See: http://upc.gwu.edu/~upc.
+ DW_TAG_upc_shared_type = 0x8765,
+ DW_TAG_upc_strict_type = 0x8766,
+ DW_TAG_upc_relaxed_type = 0x8767,
+ // PGI (STMicroelectronics) extensions. No documentation available.
+ DW_TAG_PGI_kanji_type = 0xA000,
+ DW_TAG_PGI_interface_block = 0xA020
+};
+
+enum DwarfUnitHeader {
+ DW_UT_compile = 0x01,
+ DW_UT_type = 0x02,
+ DW_UT_partial = 0x03,
+ DW_UT_skeleton = 0x04,
+ DW_UT_split_compile = 0x05,
+ DW_UT_split_type = 0x06,
+ DW_UT_lo_user = 0x80,
+ DW_UT_hi_user = 0xFF
+};
+
+enum DwarfHasChild {
+ DW_children_no = 0,
+ DW_children_yes = 1
+};
+
+// Form names and codes.
+enum DwarfForm {
+ DW_FORM_addr = 0x01,
+ DW_FORM_block2 = 0x03,
+ DW_FORM_block4 = 0x04,
+ DW_FORM_data2 = 0x05,
+ DW_FORM_data4 = 0x06,
+ DW_FORM_data8 = 0x07,
+ DW_FORM_string = 0x08,
+ DW_FORM_block = 0x09,
+ DW_FORM_block1 = 0x0a,
+ DW_FORM_data1 = 0x0b,
+ DW_FORM_flag = 0x0c,
+ DW_FORM_sdata = 0x0d,
+ DW_FORM_strp = 0x0e,
+ DW_FORM_udata = 0x0f,
+ DW_FORM_ref_addr = 0x10,
+ DW_FORM_ref1 = 0x11,
+ DW_FORM_ref2 = 0x12,
+ DW_FORM_ref4 = 0x13,
+ DW_FORM_ref8 = 0x14,
+ DW_FORM_ref_udata = 0x15,
+ DW_FORM_indirect = 0x16,
+
+ // Added in DWARF 4:
+ DW_FORM_sec_offset = 0x17,
+ DW_FORM_exprloc = 0x18,
+ DW_FORM_flag_present = 0x19,
+
+ // Added in DWARF 5:
+ DW_FORM_strx = 0x1a,
+ DW_FORM_addrx = 0x1b,
+ DW_FORM_ref_sup4 = 0x1c,
+ DW_FORM_strp_sup = 0x1d,
+ DW_FORM_data16 = 0x1e,
+ DW_FORM_line_strp = 0x1f,
+
+ // DWARF 4, but value out of order.
+ DW_FORM_ref_sig8 = 0x20,
+
+ // Added in DWARF 5:
+ DW_FORM_implicit_const = 0x21,
+ DW_FORM_loclistx = 0x22,
+ DW_FORM_rnglistx = 0x23,
+ DW_FORM_ref_sup8 = 0x24,
+ DW_FORM_strx1 = 0x25,
+ DW_FORM_strx2 = 0x26,
+ DW_FORM_strx3 = 0x27,
+ DW_FORM_strx4 = 0x28,
+
+ DW_FORM_addrx1 = 0x29,
+ DW_FORM_addrx2 = 0x2a,
+ DW_FORM_addrx3 = 0x2b,
+ DW_FORM_addrx4 = 0x2c,
+
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_FORM_GNU_addr_index = 0x1f01,
+ DW_FORM_GNU_str_index = 0x1f02
+};
+
+// Attribute names and codes
+enum DwarfAttribute {
+ DW_AT_sibling = 0x01,
+ DW_AT_location = 0x02,
+ DW_AT_name = 0x03,
+ DW_AT_ordering = 0x09,
+ DW_AT_subscr_data = 0x0a,
+ DW_AT_byte_size = 0x0b,
+ DW_AT_bit_offset = 0x0c,
+ DW_AT_bit_size = 0x0d,
+ DW_AT_element_list = 0x0f,
+ DW_AT_stmt_list = 0x10,
+ DW_AT_low_pc = 0x11,
+ DW_AT_high_pc = 0x12,
+ DW_AT_language = 0x13,
+ DW_AT_member = 0x14,
+ DW_AT_discr = 0x15,
+ DW_AT_discr_value = 0x16,
+ DW_AT_visibility = 0x17,
+ DW_AT_import = 0x18,
+ DW_AT_string_length = 0x19,
+ DW_AT_common_reference = 0x1a,
+ DW_AT_comp_dir = 0x1b,
+ DW_AT_const_value = 0x1c,
+ DW_AT_containing_type = 0x1d,
+ DW_AT_default_value = 0x1e,
+ DW_AT_inline = 0x20,
+ DW_AT_is_optional = 0x21,
+ DW_AT_lower_bound = 0x22,
+ DW_AT_producer = 0x25,
+ DW_AT_prototyped = 0x27,
+ DW_AT_return_addr = 0x2a,
+ DW_AT_start_scope = 0x2c,
+ DW_AT_stride_size = 0x2e,
+ DW_AT_upper_bound = 0x2f,
+ DW_AT_abstract_origin = 0x31,
+ DW_AT_accessibility = 0x32,
+ DW_AT_address_class = 0x33,
+ DW_AT_artificial = 0x34,
+ DW_AT_base_types = 0x35,
+ DW_AT_calling_convention = 0x36,
+ DW_AT_count = 0x37,
+ DW_AT_data_member_location = 0x38,
+ DW_AT_decl_column = 0x39,
+ DW_AT_decl_file = 0x3a,
+ DW_AT_decl_line = 0x3b,
+ DW_AT_declaration = 0x3c,
+ DW_AT_discr_list = 0x3d,
+ DW_AT_encoding = 0x3e,
+ DW_AT_external = 0x3f,
+ DW_AT_frame_base = 0x40,
+ DW_AT_friend = 0x41,
+ DW_AT_identifier_case = 0x42,
+ DW_AT_macro_info = 0x43,
+ DW_AT_namelist_items = 0x44,
+ DW_AT_priority = 0x45,
+ DW_AT_segment = 0x46,
+ DW_AT_specification = 0x47,
+ DW_AT_static_link = 0x48,
+ DW_AT_type = 0x49,
+ DW_AT_use_location = 0x4a,
+ DW_AT_variable_parameter = 0x4b,
+ DW_AT_virtuality = 0x4c,
+ DW_AT_vtable_elem_location = 0x4d,
+ // DWARF 3 values.
+ DW_AT_allocated = 0x4e,
+ DW_AT_associated = 0x4f,
+ DW_AT_data_location = 0x50,
+ DW_AT_stride = 0x51,
+ DW_AT_entry_pc = 0x52,
+ DW_AT_use_UTF8 = 0x53,
+ DW_AT_extension = 0x54,
+ DW_AT_ranges = 0x55,
+ DW_AT_trampoline = 0x56,
+ DW_AT_call_column = 0x57,
+ DW_AT_call_file = 0x58,
+ DW_AT_call_line = 0x59,
+ // DWARF 4
+ DW_AT_linkage_name = 0x6e,
+ // DWARF 5
+ DW_AT_str_offsets_base = 0x72,
+ DW_AT_addr_base = 0x73,
+ DW_AT_rnglists_base = 0x74,
+ DW_AT_dwo_name = 0x76,
+ // SGI/MIPS extensions.
+ DW_AT_MIPS_fde = 0x2001,
+ DW_AT_MIPS_loop_begin = 0x2002,
+ DW_AT_MIPS_tail_loop_begin = 0x2003,
+ DW_AT_MIPS_epilog_begin = 0x2004,
+ DW_AT_MIPS_loop_unroll_factor = 0x2005,
+ DW_AT_MIPS_software_pipeline_depth = 0x2006,
+ DW_AT_MIPS_linkage_name = 0x2007,
+ DW_AT_MIPS_stride = 0x2008,
+ DW_AT_MIPS_abstract_name = 0x2009,
+ DW_AT_MIPS_clone_origin = 0x200a,
+ DW_AT_MIPS_has_inlines = 0x200b,
+ // HP extensions.
+ DW_AT_HP_block_index = 0x2000,
+ DW_AT_HP_unmodifiable = 0x2001, // Same as DW_AT_MIPS_fde.
+ DW_AT_HP_actuals_stmt_list = 0x2010,
+ DW_AT_HP_proc_per_section = 0x2011,
+ DW_AT_HP_raw_data_ptr = 0x2012,
+ DW_AT_HP_pass_by_reference = 0x2013,
+ DW_AT_HP_opt_level = 0x2014,
+ DW_AT_HP_prof_version_id = 0x2015,
+ DW_AT_HP_opt_flags = 0x2016,
+ DW_AT_HP_cold_region_low_pc = 0x2017,
+ DW_AT_HP_cold_region_high_pc = 0x2018,
+ DW_AT_HP_all_variables_modifiable = 0x2019,
+ DW_AT_HP_linkage_name = 0x201a,
+ DW_AT_HP_prof_flags = 0x201b, // In comp unit of procs_info for -g.
+ // GNU extensions.
+ DW_AT_sf_names = 0x2101,
+ DW_AT_src_info = 0x2102,
+ DW_AT_mac_info = 0x2103,
+ DW_AT_src_coords = 0x2104,
+ DW_AT_body_begin = 0x2105,
+ DW_AT_body_end = 0x2106,
+ DW_AT_GNU_vector = 0x2107,
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_AT_GNU_dwo_name = 0x2130,
+ DW_AT_GNU_dwo_id = 0x2131,
+ DW_AT_GNU_ranges_base = 0x2132,
+ DW_AT_GNU_addr_base = 0x2133,
+ DW_AT_GNU_pubnames = 0x2134,
+ DW_AT_GNU_pubtypes = 0x2135,
+ // VMS extensions.
+ DW_AT_VMS_rtnbeg_pd_address = 0x2201,
+ // UPC extension.
+ DW_AT_upc_threads_scaled = 0x3210,
+ // PGI (STMicroelectronics) extensions.
+ DW_AT_PGI_lbase = 0x3a00,
+ DW_AT_PGI_soffset = 0x3a01,
+ DW_AT_PGI_lstride = 0x3a02
+};
+
+// .debug_rngslist entry types
+enum DwarfRngListEntry {
+ DW_RLE_end_of_list = 0,
+ DW_RLE_base_addressx = 1,
+ DW_RLE_startx_endx = 2,
+ DW_RLE_startx_length = 3,
+ DW_RLE_offset_pair = 4,
+ DW_RLE_base_address = 5,
+ DW_RLE_start_end = 6,
+ DW_RLE_start_length = 7,
+};
+
+// Line number content type codes (DWARF 5).
+enum DwarfLineNumberContentType {
+ DW_LNCT_path = 1,
+ DW_LNCT_directory_index = 2,
+ DW_LNCT_timestamp = 3,
+ DW_LNCT_size = 4,
+ DW_LNCT_MD5 = 5,
+};
+
+// Line number opcodes.
+enum DwarfLineNumberOps {
+ DW_LNS_extended_op = 0,
+ DW_LNS_copy = 1,
+ DW_LNS_advance_pc = 2,
+ DW_LNS_advance_line = 3,
+ DW_LNS_set_file = 4,
+ DW_LNS_set_column = 5,
+ DW_LNS_negate_stmt = 6,
+ DW_LNS_set_basic_block = 7,
+ DW_LNS_const_add_pc = 8,
+ DW_LNS_fixed_advance_pc = 9,
+ // DWARF 3.
+ DW_LNS_set_prologue_end = 10,
+ DW_LNS_set_epilogue_begin = 11,
+ DW_LNS_set_isa = 12
+};
+
+// Line number extended opcodes.
+enum DwarfLineNumberExtendedOps {
+ DW_LNE_end_sequence = 1,
+ DW_LNE_set_address = 2,
+ DW_LNE_define_file = 3,
+ // HP extensions.
+ DW_LNE_HP_negate_is_UV_update = 0x11,
+ DW_LNE_HP_push_context = 0x12,
+ DW_LNE_HP_pop_context = 0x13,
+ DW_LNE_HP_set_file_line_column = 0x14,
+ DW_LNE_HP_set_routine_name = 0x15,
+ DW_LNE_HP_set_sequence = 0x16,
+ DW_LNE_HP_negate_post_semantics = 0x17,
+ DW_LNE_HP_negate_function_exit = 0x18,
+ DW_LNE_HP_negate_front_end_logical = 0x19,
+ DW_LNE_HP_define_proc = 0x20
+};
+
+// Type encoding names and codes
+enum DwarfEncoding {
+ DW_ATE_address =0x1,
+ DW_ATE_boolean =0x2,
+ DW_ATE_complex_float =0x3,
+ DW_ATE_float =0x4,
+ DW_ATE_signed =0x5,
+ DW_ATE_signed_char =0x6,
+ DW_ATE_unsigned =0x7,
+ DW_ATE_unsigned_char =0x8,
+ // DWARF3/DWARF3f
+ DW_ATE_imaginary_float =0x9,
+ DW_ATE_packed_decimal =0xa,
+ DW_ATE_numeric_string =0xb,
+ DW_ATE_edited =0xc,
+ DW_ATE_signed_fixed =0xd,
+ DW_ATE_unsigned_fixed =0xe,
+ DW_ATE_decimal_float =0xf,
+ DW_ATE_lo_user =0x80,
+ DW_ATE_hi_user =0xff
+};
+
+// Location virtual machine opcodes
+enum DwarfOpcode {
+ DW_OP_addr =0x03,
+ DW_OP_deref =0x06,
+ DW_OP_const1u =0x08,
+ DW_OP_const1s =0x09,
+ DW_OP_const2u =0x0a,
+ DW_OP_const2s =0x0b,
+ DW_OP_const4u =0x0c,
+ DW_OP_const4s =0x0d,
+ DW_OP_const8u =0x0e,
+ DW_OP_const8s =0x0f,
+ DW_OP_constu =0x10,
+ DW_OP_consts =0x11,
+ DW_OP_dup =0x12,
+ DW_OP_drop =0x13,
+ DW_OP_over =0x14,
+ DW_OP_pick =0x15,
+ DW_OP_swap =0x16,
+ DW_OP_rot =0x17,
+ DW_OP_xderef =0x18,
+ DW_OP_abs =0x19,
+ DW_OP_and =0x1a,
+ DW_OP_div =0x1b,
+ DW_OP_minus =0x1c,
+ DW_OP_mod =0x1d,
+ DW_OP_mul =0x1e,
+ DW_OP_neg =0x1f,
+ DW_OP_not =0x20,
+ DW_OP_or =0x21,
+ DW_OP_plus =0x22,
+ DW_OP_plus_uconst =0x23,
+ DW_OP_shl =0x24,
+ DW_OP_shr =0x25,
+ DW_OP_shra =0x26,
+ DW_OP_xor =0x27,
+ DW_OP_bra =0x28,
+ DW_OP_eq =0x29,
+ DW_OP_ge =0x2a,
+ DW_OP_gt =0x2b,
+ DW_OP_le =0x2c,
+ DW_OP_lt =0x2d,
+ DW_OP_ne =0x2e,
+ DW_OP_skip =0x2f,
+ DW_OP_lit0 =0x30,
+ DW_OP_lit1 =0x31,
+ DW_OP_lit2 =0x32,
+ DW_OP_lit3 =0x33,
+ DW_OP_lit4 =0x34,
+ DW_OP_lit5 =0x35,
+ DW_OP_lit6 =0x36,
+ DW_OP_lit7 =0x37,
+ DW_OP_lit8 =0x38,
+ DW_OP_lit9 =0x39,
+ DW_OP_lit10 =0x3a,
+ DW_OP_lit11 =0x3b,
+ DW_OP_lit12 =0x3c,
+ DW_OP_lit13 =0x3d,
+ DW_OP_lit14 =0x3e,
+ DW_OP_lit15 =0x3f,
+ DW_OP_lit16 =0x40,
+ DW_OP_lit17 =0x41,
+ DW_OP_lit18 =0x42,
+ DW_OP_lit19 =0x43,
+ DW_OP_lit20 =0x44,
+ DW_OP_lit21 =0x45,
+ DW_OP_lit22 =0x46,
+ DW_OP_lit23 =0x47,
+ DW_OP_lit24 =0x48,
+ DW_OP_lit25 =0x49,
+ DW_OP_lit26 =0x4a,
+ DW_OP_lit27 =0x4b,
+ DW_OP_lit28 =0x4c,
+ DW_OP_lit29 =0x4d,
+ DW_OP_lit30 =0x4e,
+ DW_OP_lit31 =0x4f,
+ DW_OP_reg0 =0x50,
+ DW_OP_reg1 =0x51,
+ DW_OP_reg2 =0x52,
+ DW_OP_reg3 =0x53,
+ DW_OP_reg4 =0x54,
+ DW_OP_reg5 =0x55,
+ DW_OP_reg6 =0x56,
+ DW_OP_reg7 =0x57,
+ DW_OP_reg8 =0x58,
+ DW_OP_reg9 =0x59,
+ DW_OP_reg10 =0x5a,
+ DW_OP_reg11 =0x5b,
+ DW_OP_reg12 =0x5c,
+ DW_OP_reg13 =0x5d,
+ DW_OP_reg14 =0x5e,
+ DW_OP_reg15 =0x5f,
+ DW_OP_reg16 =0x60,
+ DW_OP_reg17 =0x61,
+ DW_OP_reg18 =0x62,
+ DW_OP_reg19 =0x63,
+ DW_OP_reg20 =0x64,
+ DW_OP_reg21 =0x65,
+ DW_OP_reg22 =0x66,
+ DW_OP_reg23 =0x67,
+ DW_OP_reg24 =0x68,
+ DW_OP_reg25 =0x69,
+ DW_OP_reg26 =0x6a,
+ DW_OP_reg27 =0x6b,
+ DW_OP_reg28 =0x6c,
+ DW_OP_reg29 =0x6d,
+ DW_OP_reg30 =0x6e,
+ DW_OP_reg31 =0x6f,
+ DW_OP_breg0 =0x70,
+ DW_OP_breg1 =0x71,
+ DW_OP_breg2 =0x72,
+ DW_OP_breg3 =0x73,
+ DW_OP_breg4 =0x74,
+ DW_OP_breg5 =0x75,
+ DW_OP_breg6 =0x76,
+ DW_OP_breg7 =0x77,
+ DW_OP_breg8 =0x78,
+ DW_OP_breg9 =0x79,
+ DW_OP_breg10 =0x7a,
+ DW_OP_breg11 =0x7b,
+ DW_OP_breg12 =0x7c,
+ DW_OP_breg13 =0x7d,
+ DW_OP_breg14 =0x7e,
+ DW_OP_breg15 =0x7f,
+ DW_OP_breg16 =0x80,
+ DW_OP_breg17 =0x81,
+ DW_OP_breg18 =0x82,
+ DW_OP_breg19 =0x83,
+ DW_OP_breg20 =0x84,
+ DW_OP_breg21 =0x85,
+ DW_OP_breg22 =0x86,
+ DW_OP_breg23 =0x87,
+ DW_OP_breg24 =0x88,
+ DW_OP_breg25 =0x89,
+ DW_OP_breg26 =0x8a,
+ DW_OP_breg27 =0x8b,
+ DW_OP_breg28 =0x8c,
+ DW_OP_breg29 =0x8d,
+ DW_OP_breg30 =0x8e,
+ DW_OP_breg31 =0x8f,
+ DW_OP_regX =0x90,
+ DW_OP_fbreg =0x91,
+ DW_OP_bregX =0x92,
+ DW_OP_piece =0x93,
+ DW_OP_deref_size =0x94,
+ DW_OP_xderef_size =0x95,
+ DW_OP_nop =0x96,
+ // DWARF3/DWARF3f
+ DW_OP_push_object_address =0x97,
+ DW_OP_call2 =0x98,
+ DW_OP_call4 =0x99,
+ DW_OP_call_ref =0x9a,
+ DW_OP_form_tls_address =0x9b,
+ DW_OP_call_frame_cfa =0x9c,
+ DW_OP_bit_piece =0x9d,
+ DW_OP_lo_user =0xe0,
+ DW_OP_hi_user =0xff,
+ // GNU extensions
+ DW_OP_GNU_push_tls_address =0xe0,
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_OP_GNU_addr_index =0xfb,
+ DW_OP_GNU_const_index =0xfc
+};
+
+// Section identifiers for DWP files
+enum DwarfSectionId {
+ DW_SECT_INFO = 1,
+ DW_SECT_TYPES = 2,
+ DW_SECT_ABBREV = 3,
+ DW_SECT_LINE = 4,
+ DW_SECT_LOC = 5,
+ DW_SECT_STR_OFFSETS = 6,
+ DW_SECT_MACINFO = 7,
+ DW_SECT_MACRO = 8
+};
+
+// Source languages. These are values for DW_AT_language.
+enum DwarfLanguage
+ {
+ DW_LANG_none =0x0000,
+ DW_LANG_C89 =0x0001,
+ DW_LANG_C =0x0002,
+ DW_LANG_Ada83 =0x0003,
+ DW_LANG_C_plus_plus =0x0004,
+ DW_LANG_Cobol74 =0x0005,
+ DW_LANG_Cobol85 =0x0006,
+ DW_LANG_Fortran77 =0x0007,
+ DW_LANG_Fortran90 =0x0008,
+ DW_LANG_Pascal83 =0x0009,
+ DW_LANG_Modula2 =0x000a,
+ DW_LANG_Java =0x000b,
+ DW_LANG_C99 =0x000c,
+ DW_LANG_Ada95 =0x000d,
+ DW_LANG_Fortran95 =0x000e,
+ DW_LANG_PLI =0x000f,
+ DW_LANG_ObjC =0x0010,
+ DW_LANG_ObjC_plus_plus =0x0011,
+ DW_LANG_UPC =0x0012,
+ DW_LANG_D =0x0013,
+ DW_LANG_Rust =0x001c,
+ DW_LANG_Swift =0x001e,
+ // Implementation-defined language code range.
+ DW_LANG_lo_user = 0x8000,
+ DW_LANG_hi_user = 0xffff,
+
+ // Extensions.
+
+ // MIPS assembly language. The GNU toolchain uses this for all
+ // assembly languages, since there's no generic DW_LANG_ value for that.
+ // See include/dwarf2.h in the binutils, gdb, or gcc source trees.
+ DW_LANG_Mips_Assembler =0x8001,
+ DW_LANG_Upc =0x8765 // Unified Parallel C
+ };
+
+// Inline codes. These are values for DW_AT_inline.
+enum DwarfInline {
+ DW_INL_not_inlined =0x0,
+ DW_INL_inlined =0x1,
+ DW_INL_declared_not_inlined =0x2,
+ DW_INL_declared_inlined =0x3
+};
+
+// Call Frame Info instructions.
+enum DwarfCFI
+ {
+ DW_CFA_advance_loc = 0x40,
+ DW_CFA_offset = 0x80,
+ DW_CFA_restore = 0xc0,
+ DW_CFA_nop = 0x00,
+ DW_CFA_set_loc = 0x01,
+ DW_CFA_advance_loc1 = 0x02,
+ DW_CFA_advance_loc2 = 0x03,
+ DW_CFA_advance_loc4 = 0x04,
+ DW_CFA_offset_extended = 0x05,
+ DW_CFA_restore_extended = 0x06,
+ DW_CFA_undefined = 0x07,
+ DW_CFA_same_value = 0x08,
+ DW_CFA_register = 0x09,
+ DW_CFA_remember_state = 0x0a,
+ DW_CFA_restore_state = 0x0b,
+ DW_CFA_def_cfa = 0x0c,
+ DW_CFA_def_cfa_register = 0x0d,
+ DW_CFA_def_cfa_offset = 0x0e,
+ DW_CFA_def_cfa_expression = 0x0f,
+ DW_CFA_expression = 0x10,
+ DW_CFA_offset_extended_sf = 0x11,
+ DW_CFA_def_cfa_sf = 0x12,
+ DW_CFA_def_cfa_offset_sf = 0x13,
+ DW_CFA_val_offset = 0x14,
+ DW_CFA_val_offset_sf = 0x15,
+ DW_CFA_val_expression = 0x16,
+
+ // Opcodes in this range are reserved for user extensions.
+ DW_CFA_lo_user = 0x1c,
+ DW_CFA_hi_user = 0x3f,
+
+ // SGI/MIPS specific.
+ DW_CFA_MIPS_advance_loc8 = 0x1d,
+
+ // GNU extensions.
+ DW_CFA_GNU_window_save = 0x2d,
+ DW_CFA_GNU_args_size = 0x2e,
+ DW_CFA_GNU_negative_offset_extended = 0x2f
+ };
+
+// Exception handling 'z' augmentation letters.
+enum DwarfZAugmentationCodes {
+ // If the CFI augmentation string begins with 'z', then the CIE and FDE
+ // have an augmentation data area just before the instructions, whose
+ // contents are determined by the subsequent augmentation letters.
+ DW_Z_augmentation_start = 'z',
+
+ // If this letter is present in a 'z' augmentation string, the CIE
+ // augmentation data includes a pointer encoding, and the FDE
+ // augmentation data includes a language-specific data area pointer,
+ // represented using that encoding.
+ DW_Z_has_LSDA = 'L',
+
+ // If this letter is present in a 'z' augmentation string, the CIE
+ // augmentation data includes a pointer encoding, followed by a pointer
+ // to a personality routine, represented using that encoding.
+ DW_Z_has_personality_routine = 'P',
+
+ // If this letter is present in a 'z' augmentation string, the CIE
+ // augmentation data includes a pointer encoding describing how the FDE's
+ // initial location, address range, and DW_CFA_set_loc operands are
+ // encoded.
+ DW_Z_has_FDE_address_encoding = 'R',
+
+ // If this letter is present in a 'z' augmentation string, then code
+ // addresses covered by FDEs that cite this CIE are signal delivery
+ // trampolines. Return addresses of frames in trampolines should not be
+ // adjusted as described in section 6.4.4 of the DWARF 3 spec.
+ DW_Z_is_signal_trampoline = 'S'
+};
+
+// Exception handling frame description pointer formats, as described
+// by the Linux Standard Base Core Specification 4.0, section 11.5,
+// DWARF Extensions.
+enum DwarfPointerEncoding
+ {
+ DW_EH_PE_absptr = 0x00,
+ DW_EH_PE_omit = 0xff,
+ DW_EH_PE_uleb128 = 0x01,
+ DW_EH_PE_udata2 = 0x02,
+ DW_EH_PE_udata4 = 0x03,
+ DW_EH_PE_udata8 = 0x04,
+ DW_EH_PE_sleb128 = 0x09,
+ DW_EH_PE_sdata2 = 0x0A,
+ DW_EH_PE_sdata4 = 0x0B,
+ DW_EH_PE_sdata8 = 0x0C,
+ DW_EH_PE_pcrel = 0x10,
+ DW_EH_PE_textrel = 0x20,
+ DW_EH_PE_datarel = 0x30,
+ DW_EH_PE_funcrel = 0x40,
+ DW_EH_PE_aligned = 0x50,
+
+ // The GNU toolchain sources define this enum value as well,
+ // simply to help classify the lower nybble values into signed and
+ // unsigned groups.
+ DW_EH_PE_signed = 0x08,
+
+ // This is not documented in LSB 4.0, but it is used in both the
+ // Linux and OS X toolchains. It can be added to any other
+ // encoding (except DW_EH_PE_aligned), and indicates that the
+ // encoded value represents the address at which the true address
+ // is stored, not the true address itself.
+ DW_EH_PE_indirect = 0x80
+ };
+
+} // namespace google_breakpad
+#endif // COMMON_DWARF_DWARF2ENUMS_H__
diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.cc b/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.cc
new file mode 100644
index 0000000000..6bd2614b42
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.cc
@@ -0,0 +1,3435 @@
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Implementation of LineInfo, CompilationUnit,
+// and CallFrameInfo. See dwarf2reader.h for details.
+
+#include "common/dwarf/dwarf2reader.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <stack>
+#include <string>
+#include <utility>
+
+#include <sys/stat.h>
+
+#include "common/dwarf/bytereader-inl.h"
+#include "common/dwarf/bytereader.h"
+#include "common/dwarf/line_state_machine.h"
+#include "common/using_std_string.h"
+#include "google_breakpad/common/breakpad_types.h"
+
+namespace google_breakpad {
+
+const SectionMap::const_iterator GetSectionByName(const SectionMap&
+ sections, const char *name) {
+ assert(name[0] == '.');
+ auto iter = sections.find(name);
+ if (iter != sections.end())
+ return iter;
+ std::string macho_name("__");
+ macho_name += name + 1;
+ iter = sections.find(macho_name);
+ return iter;
+}
+
+CompilationUnit::CompilationUnit(const string& path,
+ const SectionMap& sections, uint64_t offset,
+ ByteReader* reader, Dwarf2Handler* handler)
+ : path_(path), offset_from_section_start_(offset), reader_(reader),
+ sections_(sections), handler_(handler), abbrevs_(),
+ string_buffer_(NULL), string_buffer_length_(0),
+ line_string_buffer_(NULL), line_string_buffer_length_(0),
+ str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
+ addr_buffer_(NULL), addr_buffer_length_(0),
+ is_split_dwarf_(false), is_type_unit_(false), dwo_id_(0), dwo_name_(),
+ skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0),
+ str_offsets_base_(0), have_checked_for_dwp_(false), dwp_path_(),
+ dwp_byte_reader_(), dwp_reader_() {}
+
+// Initialize a compilation unit from a .dwo or .dwp file.
+// In this case, we need the .debug_addr section from the
+// executable file that contains the corresponding skeleton
+// compilation unit. We also inherit the Dwarf2Handler from
+// the executable file, and call it as if we were still
+// processing the original compilation unit.
+
+void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer,
+ uint64_t addr_buffer_length,
+ uint64_t addr_base,
+ uint64_t ranges_base,
+ uint64_t dwo_id) {
+ is_split_dwarf_ = true;
+ addr_buffer_ = addr_buffer;
+ addr_buffer_length_ = addr_buffer_length;
+ addr_base_ = addr_base;
+ ranges_base_ = ranges_base;
+ skeleton_dwo_id_ = dwo_id;
+}
+
+// Read a DWARF2/3 abbreviation section.
+// Each abbrev consists of a abbreviation number, a tag, a byte
+// specifying whether the tag has children, and a list of
+// attribute/form pairs.
+// The list of forms is terminated by a 0 for the attribute, and a
+// zero for the form. The entire abbreviation section is terminated
+// by a zero for the code.
+
+void CompilationUnit::ReadAbbrevs() {
+ if (abbrevs_)
+ return;
+
+ // First get the debug_abbrev section.
+ SectionMap::const_iterator iter =
+ GetSectionByName(sections_, ".debug_abbrev");
+ assert(iter != sections_.end());
+
+ abbrevs_ = new std::vector<Abbrev>;
+ abbrevs_->resize(1);
+
+ // The only way to check whether we are reading over the end of the
+ // buffer would be to first compute the size of the leb128 data by
+ // reading it, then go back and read it again.
+ const uint8_t* abbrev_start = iter->second.first +
+ header_.abbrev_offset;
+ const uint8_t* abbrevptr = abbrev_start;
+#ifndef NDEBUG
+ const uint64_t abbrev_length = iter->second.second - header_.abbrev_offset;
+#endif
+
+ uint64_t highest_number = 0;
+
+ while (1) {
+ CompilationUnit::Abbrev abbrev;
+ size_t len;
+ const uint64_t number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
+ highest_number = std::max(highest_number, number);
+
+ if (number == 0)
+ break;
+ abbrev.number = number;
+ abbrevptr += len;
+
+ assert(abbrevptr < abbrev_start + abbrev_length);
+ const uint64_t tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
+ abbrevptr += len;
+ abbrev.tag = static_cast<enum DwarfTag>(tag);
+
+ assert(abbrevptr < abbrev_start + abbrev_length);
+ abbrev.has_children = reader_->ReadOneByte(abbrevptr);
+ abbrevptr += 1;
+
+ assert(abbrevptr < abbrev_start + abbrev_length);
+
+ while (1) {
+ const uint64_t nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
+ abbrevptr += len;
+
+ assert(abbrevptr < abbrev_start + abbrev_length);
+ const uint64_t formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
+ abbrevptr += len;
+ if (nametemp == 0 && formtemp == 0)
+ break;
+
+ uint64_t value = 0;
+ if (formtemp == DW_FORM_implicit_const) {
+ value = reader_->ReadUnsignedLEB128(abbrevptr, &len);
+ abbrevptr += len;
+ }
+ AttrForm abbrev_attr(static_cast<enum DwarfAttribute>(nametemp),
+ static_cast<enum DwarfForm>(formtemp),
+ value);
+ abbrev.attributes.push_back(abbrev_attr);
+ }
+ abbrevs_->push_back(abbrev);
+ }
+
+ // Account of cases where entries are out of order.
+ std::sort(abbrevs_->begin(), abbrevs_->end(),
+ [](const CompilationUnit::Abbrev& lhs, const CompilationUnit::Abbrev& rhs) {
+ return lhs.number < rhs.number;
+ });
+
+ // Ensure that there are no missing sections.
+ assert(abbrevs_->size() == highest_number + 1);
+}
+
+// Skips a single DIE's attributes.
+const uint8_t* CompilationUnit::SkipDIE(const uint8_t* start,
+ const Abbrev& abbrev) {
+ for (AttributeList::const_iterator i = abbrev.attributes.begin();
+ i != abbrev.attributes.end();
+ i++) {
+ start = SkipAttribute(start, i->form_);
+ }
+ return start;
+}
+
+// Skips a single attribute form's data.
+const uint8_t* CompilationUnit::SkipAttribute(const uint8_t* start,
+ enum DwarfForm form) {
+ size_t len;
+
+ switch (form) {
+ case DW_FORM_indirect:
+ form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
+ &len));
+ start += len;
+ return SkipAttribute(start, form);
+
+ case DW_FORM_flag_present:
+ case DW_FORM_implicit_const:
+ return start;
+ case DW_FORM_addrx1:
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ case DW_FORM_ref1:
+ case DW_FORM_strx1:
+ return start + 1;
+ case DW_FORM_addrx2:
+ case DW_FORM_ref2:
+ case DW_FORM_data2:
+ case DW_FORM_strx2:
+ return start + 2;
+ case DW_FORM_addrx3:
+ case DW_FORM_strx3:
+ return start + 3;
+ case DW_FORM_addrx4:
+ case DW_FORM_ref4:
+ case DW_FORM_data4:
+ case DW_FORM_strx4:
+ case DW_FORM_ref_sup4:
+ return start + 4;
+ case DW_FORM_ref8:
+ case DW_FORM_data8:
+ case DW_FORM_ref_sig8:
+ case DW_FORM_ref_sup8:
+ return start + 8;
+ case DW_FORM_data16:
+ return start + 16;
+ case DW_FORM_string:
+ return start + strlen(reinterpret_cast<const char*>(start)) + 1;
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ case DW_FORM_strx:
+ case DW_FORM_GNU_str_index:
+ case DW_FORM_GNU_addr_index:
+ case DW_FORM_addrx:
+ case DW_FORM_rnglistx:
+ case DW_FORM_loclistx:
+ reader_->ReadUnsignedLEB128(start, &len);
+ return start + len;
+
+ case DW_FORM_sdata:
+ reader_->ReadSignedLEB128(start, &len);
+ return start + len;
+ case DW_FORM_addr:
+ return start + reader_->AddressSize();
+ case DW_FORM_ref_addr:
+ // DWARF2 and 3/4 differ on whether ref_addr is address size or
+ // offset size.
+ assert(header_.version >= 2);
+ if (header_.version == 2) {
+ return start + reader_->AddressSize();
+ } else if (header_.version >= 3) {
+ return start + reader_->OffsetSize();
+ }
+ break;
+
+ case DW_FORM_block1:
+ return start + 1 + reader_->ReadOneByte(start);
+ case DW_FORM_block2:
+ return start + 2 + reader_->ReadTwoBytes(start);
+ case DW_FORM_block4:
+ return start + 4 + reader_->ReadFourBytes(start);
+ case DW_FORM_block:
+ case DW_FORM_exprloc: {
+ uint64_t size = reader_->ReadUnsignedLEB128(start, &len);
+ return start + size + len;
+ }
+ case DW_FORM_strp:
+ case DW_FORM_line_strp:
+ case DW_FORM_strp_sup:
+ case DW_FORM_sec_offset:
+ return start + reader_->OffsetSize();
+ }
+ fprintf(stderr,"Unhandled form type");
+ return NULL;
+}
+
+// Read the abbreviation offset from a compilation unit header.
+size_t CompilationUnit::ReadAbbrevOffset(const uint8_t* headerptr) {
+ assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
+ header_.abbrev_offset = reader_->ReadOffset(headerptr);
+ return reader_->OffsetSize();
+}
+
+// Read the address size from a compilation unit header.
+size_t CompilationUnit::ReadAddressSize(const uint8_t* headerptr) {
+ // Compare against less than or equal because this may be the last
+ // section in the file.
+ assert(headerptr + 1 <= buffer_ + buffer_length_);
+ header_.address_size = reader_->ReadOneByte(headerptr);
+ reader_->SetAddressSize(header_.address_size);
+ return 1;
+}
+
+// Read the DWO id from a split or skeleton compilation unit header.
+size_t CompilationUnit::ReadDwoId(const uint8_t* headerptr) {
+ assert(headerptr + 8 <= buffer_ + buffer_length_);
+ dwo_id_ = reader_->ReadEightBytes(headerptr);
+ return 8;
+}
+
+// Read the type signature from a type or split type compilation unit header.
+size_t CompilationUnit::ReadTypeSignature(const uint8_t* headerptr) {
+ assert(headerptr + 8 <= buffer_ + buffer_length_);
+ type_signature_ = reader_->ReadEightBytes(headerptr);
+ return 8;
+}
+
+// Read the DWO id from a split or skeleton compilation unit header.
+size_t CompilationUnit::ReadTypeOffset(const uint8_t* headerptr) {
+ assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
+ type_offset_ = reader_->ReadOffset(headerptr);
+ return reader_->OffsetSize();
+}
+
+
+// Read a DWARF header. The header is variable length in DWARF3 and DWARF4
+// (and DWARF2 as extended by most compilers), and consists of an length
+// field, a version number, the offset in the .debug_abbrev section for our
+// abbrevs, and an address size. DWARF5 adds a unit_type to distinguish
+// between partial-, full-, skeleton-, split-, and type- compilation units.
+void CompilationUnit::ReadHeader() {
+ const uint8_t* headerptr = buffer_;
+ size_t initial_length_size;
+
+ assert(headerptr + 4 < buffer_ + buffer_length_);
+ const uint64_t initial_length
+ = reader_->ReadInitialLength(headerptr, &initial_length_size);
+ headerptr += initial_length_size;
+ header_.length = initial_length;
+
+ assert(headerptr + 2 < buffer_ + buffer_length_);
+ header_.version = reader_->ReadTwoBytes(headerptr);
+ headerptr += 2;
+
+ if (header_.version <= 4) {
+ // Older versions of dwarf have a relatively simple structure.
+ headerptr += ReadAbbrevOffset(headerptr);
+ headerptr += ReadAddressSize(headerptr);
+ } else {
+ // DWARF5 adds a unit_type field, and various fields based on unit_type.
+ assert(headerptr + 1 < buffer_ + buffer_length_);
+ uint8_t unit_type = reader_->ReadOneByte(headerptr);
+ headerptr += 1;
+ headerptr += ReadAddressSize(headerptr);
+ headerptr += ReadAbbrevOffset(headerptr);
+ switch (unit_type) {
+ case DW_UT_compile:
+ case DW_UT_partial:
+ // nothing else to read
+ break;
+ case DW_UT_skeleton:
+ case DW_UT_split_compile:
+ headerptr += ReadDwoId(headerptr);
+ break;
+ case DW_UT_type:
+ case DW_UT_split_type:
+ is_type_unit_ = true;
+ headerptr += ReadTypeSignature(headerptr);
+ headerptr += ReadTypeOffset(headerptr);
+ break;
+ default:
+ fprintf(stderr, "Unhandled compilation unit type 0x%x", unit_type);
+ break;
+ }
+ }
+ after_header_ = headerptr;
+
+ // This check ensures that we don't have to do checking during the
+ // reading of DIEs. header_.length does not include the size of the
+ // initial length.
+ assert(buffer_ + initial_length_size + header_.length <=
+ buffer_ + buffer_length_);
+}
+
+uint64_t CompilationUnit::Start() {
+ // First get the debug_info section.
+ SectionMap::const_iterator iter =
+ GetSectionByName(sections_, ".debug_info");
+ assert(iter != sections_.end());
+
+ // Set up our buffer
+ buffer_ = iter->second.first + offset_from_section_start_;
+ buffer_length_ = iter->second.second - offset_from_section_start_;
+
+ // Read the header
+ ReadHeader();
+
+ // Figure out the real length from the end of the initial length to
+ // the end of the compilation unit, since that is the value we
+ // return.
+ uint64_t ourlength = header_.length;
+ if (reader_->OffsetSize() == 8)
+ ourlength += 12;
+ else
+ ourlength += 4;
+
+ // See if the user wants this compilation unit, and if not, just return.
+ if (!handler_->StartCompilationUnit(offset_from_section_start_,
+ reader_->AddressSize(),
+ reader_->OffsetSize(),
+ header_.length,
+ header_.version))
+ return ourlength;
+ else if (header_.version == 5 && is_type_unit_)
+ return ourlength;
+
+ // Otherwise, continue by reading our abbreviation entries.
+ ReadAbbrevs();
+
+ // Set the string section if we have one.
+ iter = GetSectionByName(sections_, ".debug_str");
+ if (iter != sections_.end()) {
+ string_buffer_ = iter->second.first;
+ string_buffer_length_ = iter->second.second;
+ }
+
+ // Set the line string section if we have one.
+ iter = GetSectionByName(sections_, ".debug_line_str");
+ if (iter != sections_.end()) {
+ line_string_buffer_ = iter->second.first;
+ line_string_buffer_length_ = iter->second.second;
+ }
+
+ // Set the string offsets section if we have one.
+ iter = GetSectionByName(sections_, ".debug_str_offsets");
+ if (iter != sections_.end()) {
+ str_offsets_buffer_ = iter->second.first;
+ str_offsets_buffer_length_ = iter->second.second;
+ }
+
+ // Set the address section if we have one.
+ iter = GetSectionByName(sections_, ".debug_addr");
+ if (iter != sections_.end()) {
+ addr_buffer_ = iter->second.first;
+ addr_buffer_length_ = iter->second.second;
+ }
+
+ // Now that we have our abbreviations, start processing DIE's.
+ ProcessDIEs();
+
+ // If this is a skeleton compilation unit generated with split DWARF,
+ // and the client needs the full debug info, we need to find the full
+ // compilation unit in a .dwo or .dwp file.
+ if (!is_split_dwarf_
+ && dwo_name_ != NULL
+ && handler_->NeedSplitDebugInfo())
+ ProcessSplitDwarf();
+
+ return ourlength;
+}
+
+void CompilationUnit::ProcessFormStringIndex(
+ uint64_t dieoffset, enum DwarfAttribute attr, enum DwarfForm form,
+ uint64_t str_index) {
+ const size_t kStringOffsetsTableHeaderSize =
+ header_.version >= 5 ? (reader_->OffsetSize() == 8 ? 16 : 8) : 0;
+ const uint8_t* str_offsets_table_after_header = str_offsets_base_ ?
+ str_offsets_buffer_ + str_offsets_base_ :
+ str_offsets_buffer_ + kStringOffsetsTableHeaderSize;
+ const uint8_t* offset_ptr =
+ str_offsets_table_after_header + str_index * reader_->OffsetSize();
+
+ const uint64_t offset = reader_->ReadOffset(offset_ptr);
+ if (offset >= string_buffer_length_) {
+ return;
+ }
+
+ const char* str = reinterpret_cast<const char*>(string_buffer_) + offset;
+ ProcessAttributeString(dieoffset, attr, form, str);
+}
+
+// Special function for pre-processing the
+// DW_AT_str_offsets_base and DW_AT_addr_base in a DW_TAG_compile_unit die (for
+// DWARF v5). We must make sure to find and process the
+// DW_AT_str_offsets_base and DW_AT_addr_base attributes before attempting to
+// read any string and address attribute in the compile unit.
+const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute(
+ uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
+ enum DwarfForm form, uint64_t implicit_const) {
+ size_t len;
+
+ switch (form) {
+ // DW_FORM_indirect is never used because it is such a space
+ // waster.
+ case DW_FORM_indirect:
+ form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
+ &len));
+ start += len;
+ return ProcessOffsetBaseAttribute(dieoffset, start, attr, form,
+ implicit_const);
+
+ case DW_FORM_flag_present:
+ return start;
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ return start + 1;
+ case DW_FORM_data2:
+ return start + 2;
+ case DW_FORM_data4:
+ return start + 4;
+ case DW_FORM_data8:
+ return start + 8;
+ case DW_FORM_data16:
+ // This form is designed for an md5 checksum inside line tables.
+ return start + 16;
+ case DW_FORM_string: {
+ const char* str = reinterpret_cast<const char*>(start);
+ return start + strlen(str) + 1;
+ }
+ case DW_FORM_udata:
+ reader_->ReadUnsignedLEB128(start, &len);
+ return start + len;
+ case DW_FORM_sdata:
+ reader_->ReadSignedLEB128(start, &len);
+ return start + len;
+ case DW_FORM_addr:
+ reader_->ReadAddress(start);
+ return start + reader_->AddressSize();
+
+ // This is the important one here!
+ case DW_FORM_sec_offset:
+ if (attr == DW_AT_str_offsets_base ||
+ attr == DW_AT_addr_base)
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadOffset(start));
+ else
+ reader_->ReadOffset(start);
+ return start + reader_->OffsetSize();
+
+ case DW_FORM_ref1:
+ return start + 1;
+ case DW_FORM_ref2:
+ return start + 2;
+ case DW_FORM_ref4:
+ return start + 4;
+ case DW_FORM_ref8:
+ return start + 8;
+ case DW_FORM_ref_udata:
+ reader_->ReadUnsignedLEB128(start, &len);
+ return start + len;
+ case DW_FORM_ref_addr:
+ // DWARF2 and 3/4 differ on whether ref_addr is address size or
+ // offset size.
+ assert(header_.version >= 2);
+ if (header_.version == 2) {
+ reader_->ReadAddress(start);
+ return start + reader_->AddressSize();
+ } else if (header_.version >= 3) {
+ reader_->ReadOffset(start);
+ return start + reader_->OffsetSize();
+ }
+ break;
+ case DW_FORM_ref_sig8:
+ return start + 8;
+ case DW_FORM_implicit_const:
+ return start;
+ case DW_FORM_block1: {
+ uint64_t datalen = reader_->ReadOneByte(start);
+ return start + 1 + datalen;
+ }
+ case DW_FORM_block2: {
+ uint64_t datalen = reader_->ReadTwoBytes(start);
+ return start + 2 + datalen;
+ }
+ case DW_FORM_block4: {
+ uint64_t datalen = reader_->ReadFourBytes(start);
+ return start + 4 + datalen;
+ }
+ case DW_FORM_block:
+ case DW_FORM_exprloc: {
+ uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
+ return start + datalen + len;
+ }
+ case DW_FORM_strp: {
+ reader_->ReadOffset(start);
+ return start + reader_->OffsetSize();
+ }
+ case DW_FORM_line_strp: {
+ reader_->ReadOffset(start);
+ return start + reader_->OffsetSize();
+ }
+ case DW_FORM_strp_sup:
+ return start + 4;
+ case DW_FORM_ref_sup4:
+ return start + 4;
+ case DW_FORM_ref_sup8:
+ return start + 8;
+ case DW_FORM_loclistx:
+ reader_->ReadUnsignedLEB128(start, &len);
+ return start + len;
+ case DW_FORM_strx:
+ case DW_FORM_GNU_str_index: {
+ reader_->ReadUnsignedLEB128(start, &len);
+ return start + len;
+ }
+ case DW_FORM_strx1: {
+ return start + 1;
+ }
+ case DW_FORM_strx2: {
+ return start + 2;
+ }
+ case DW_FORM_strx3: {
+ return start + 3;
+ }
+ case DW_FORM_strx4: {
+ return start + 4;
+ }
+
+ case DW_FORM_addrx:
+ case DW_FORM_GNU_addr_index:
+ reader_->ReadUnsignedLEB128(start, &len);
+ return start + len;
+ case DW_FORM_addrx1:
+ return start + 1;
+ case DW_FORM_addrx2:
+ return start + 2;
+ case DW_FORM_addrx3:
+ return start + 3;
+ case DW_FORM_addrx4:
+ return start + 4;
+ case DW_FORM_rnglistx:
+ reader_->ReadUnsignedLEB128(start, &len);
+ return start + len;
+ }
+ fprintf(stderr, "Unhandled form type\n");
+ return NULL;
+}
+
+// If one really wanted, you could merge SkipAttribute and
+// ProcessAttribute
+// This is all boring data manipulation and calling of the handler.
+const uint8_t* CompilationUnit::ProcessAttribute(
+ uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
+ enum DwarfForm form, uint64_t implicit_const) {
+ size_t len;
+
+ switch (form) {
+ // DW_FORM_indirect is never used because it is such a space
+ // waster.
+ case DW_FORM_indirect:
+ form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
+ &len));
+ start += len;
+ return ProcessAttribute(dieoffset, start, attr, form, implicit_const);
+
+ case DW_FORM_flag_present:
+ ProcessAttributeUnsigned(dieoffset, attr, form, 1);
+ return start;
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadOneByte(start));
+ return start + 1;
+ case DW_FORM_data2:
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadTwoBytes(start));
+ return start + 2;
+ case DW_FORM_data4:
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadFourBytes(start));
+ return start + 4;
+ case DW_FORM_data8:
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadEightBytes(start));
+ return start + 8;
+ case DW_FORM_data16:
+ // This form is designed for an md5 checksum inside line tables.
+ fprintf(stderr, "Unhandled form type: DW_FORM_data16\n");
+ return start + 16;
+ case DW_FORM_string: {
+ const char* str = reinterpret_cast<const char*>(start);
+ ProcessAttributeString(dieoffset, attr, form, str);
+ return start + strlen(str) + 1;
+ }
+ case DW_FORM_udata:
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadUnsignedLEB128(start, &len));
+ return start + len;
+
+ case DW_FORM_sdata:
+ ProcessAttributeSigned(dieoffset, attr, form,
+ reader_->ReadSignedLEB128(start, &len));
+ return start + len;
+ case DW_FORM_addr:
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadAddress(start));
+ return start + reader_->AddressSize();
+ case DW_FORM_sec_offset:
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadOffset(start));
+ return start + reader_->OffsetSize();
+
+ case DW_FORM_ref1:
+ handler_->ProcessAttributeReference(dieoffset, attr, form,
+ reader_->ReadOneByte(start)
+ + offset_from_section_start_);
+ return start + 1;
+ case DW_FORM_ref2:
+ handler_->ProcessAttributeReference(dieoffset, attr, form,
+ reader_->ReadTwoBytes(start)
+ + offset_from_section_start_);
+ return start + 2;
+ case DW_FORM_ref4:
+ handler_->ProcessAttributeReference(dieoffset, attr, form,
+ reader_->ReadFourBytes(start)
+ + offset_from_section_start_);
+ return start + 4;
+ case DW_FORM_ref8:
+ handler_->ProcessAttributeReference(dieoffset, attr, form,
+ reader_->ReadEightBytes(start)
+ + offset_from_section_start_);
+ return start + 8;
+ case DW_FORM_ref_udata:
+ handler_->ProcessAttributeReference(dieoffset, attr, form,
+ reader_->ReadUnsignedLEB128(start,
+ &len)
+ + offset_from_section_start_);
+ return start + len;
+ case DW_FORM_ref_addr:
+ // DWARF2 and 3/4 differ on whether ref_addr is address size or
+ // offset size.
+ assert(header_.version >= 2);
+ if (header_.version == 2) {
+ handler_->ProcessAttributeReference(dieoffset, attr, form,
+ reader_->ReadAddress(start));
+ return start + reader_->AddressSize();
+ } else if (header_.version >= 3) {
+ handler_->ProcessAttributeReference(dieoffset, attr, form,
+ reader_->ReadOffset(start));
+ return start + reader_->OffsetSize();
+ }
+ break;
+ case DW_FORM_ref_sig8:
+ handler_->ProcessAttributeSignature(dieoffset, attr, form,
+ reader_->ReadEightBytes(start));
+ return start + 8;
+ case DW_FORM_implicit_const:
+ handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
+ implicit_const);
+ return start;
+ case DW_FORM_block1: {
+ uint64_t datalen = reader_->ReadOneByte(start);
+ handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
+ datalen);
+ return start + 1 + datalen;
+ }
+ case DW_FORM_block2: {
+ uint64_t datalen = reader_->ReadTwoBytes(start);
+ handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
+ datalen);
+ return start + 2 + datalen;
+ }
+ case DW_FORM_block4: {
+ uint64_t datalen = reader_->ReadFourBytes(start);
+ handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
+ datalen);
+ return start + 4 + datalen;
+ }
+ case DW_FORM_block:
+ case DW_FORM_exprloc: {
+ uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
+ handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
+ datalen);
+ return start + datalen + len;
+ }
+ case DW_FORM_strp: {
+ assert(string_buffer_ != NULL);
+
+ const uint64_t offset = reader_->ReadOffset(start);
+ assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
+
+ const char* str = reinterpret_cast<const char*>(string_buffer_ + offset);
+ ProcessAttributeString(dieoffset, attr, form, str);
+ return start + reader_->OffsetSize();
+ }
+ case DW_FORM_line_strp: {
+ assert(line_string_buffer_ != NULL);
+
+ const uint64_t offset = reader_->ReadOffset(start);
+ assert(line_string_buffer_ + offset <
+ line_string_buffer_ + line_string_buffer_length_);
+
+ const char* str =
+ reinterpret_cast<const char*>(line_string_buffer_ + offset);
+ ProcessAttributeString(dieoffset, attr, form, str);
+ return start + reader_->OffsetSize();
+ }
+ case DW_FORM_strp_sup:
+ // No support currently for suplementary object files.
+ fprintf(stderr, "Unhandled form type: DW_FORM_strp_sup\n");
+ return start + 4;
+ case DW_FORM_ref_sup4:
+ // No support currently for suplementary object files.
+ fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup4\n");
+ return start + 4;
+ case DW_FORM_ref_sup8:
+ // No support currently for suplementary object files.
+ fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup8\n");
+ return start + 8;
+ case DW_FORM_loclistx:
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadUnsignedLEB128(start, &len));
+ return start + len;
+ case DW_FORM_strx:
+ case DW_FORM_GNU_str_index: {
+ uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len);
+ ProcessFormStringIndex(dieoffset, attr, form, str_index);
+ return start + len;
+ }
+ case DW_FORM_strx1: {
+ uint64_t str_index = reader_->ReadOneByte(start);
+ ProcessFormStringIndex(dieoffset, attr, form, str_index);
+ return start + 1;
+ }
+ case DW_FORM_strx2: {
+ uint64_t str_index = reader_->ReadTwoBytes(start);
+ ProcessFormStringIndex(dieoffset, attr, form, str_index);
+ return start + 2;
+ }
+ case DW_FORM_strx3: {
+ uint64_t str_index = reader_->ReadThreeBytes(start);
+ ProcessFormStringIndex(dieoffset, attr, form, str_index);
+ return start + 3;
+ }
+ case DW_FORM_strx4: {
+ uint64_t str_index = reader_->ReadFourBytes(start);
+ ProcessFormStringIndex(dieoffset, attr, form, str_index);
+ return start + 4;
+ }
+
+ case DW_FORM_addrx:
+ case DW_FORM_GNU_addr_index:
+ ProcessAttributeAddrIndex(
+ dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len));
+ return start + len;
+ case DW_FORM_addrx1:
+ ProcessAttributeAddrIndex(
+ dieoffset, attr, form, reader_->ReadOneByte(start));
+ return start + 1;
+ case DW_FORM_addrx2:
+ ProcessAttributeAddrIndex(
+ dieoffset, attr, form, reader_->ReadTwoBytes(start));
+ return start + 2;
+ case DW_FORM_addrx3:
+ ProcessAttributeAddrIndex(
+ dieoffset, attr, form, reader_->ReadThreeBytes(start));
+ return start + 3;
+ case DW_FORM_addrx4:
+ ProcessAttributeAddrIndex(
+ dieoffset, attr, form, reader_->ReadFourBytes(start));
+ return start + 4;
+ case DW_FORM_rnglistx:
+ ProcessAttributeUnsigned(
+ dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len));
+ return start + len;
+ }
+ fprintf(stderr, "Unhandled form type\n");
+ return NULL;
+}
+
+const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset,
+ const uint8_t* start,
+ const Abbrev& abbrev) {
+ // With DWARF v5, the compile_unit die may contain a
+ // DW_AT_str_offsets_base or DW_AT_addr_base. If it does, that attribute must
+ // be found and processed before trying to process the other attributes;
+ // otherwise the string or address values will all come out incorrect.
+ if (abbrev.tag == DW_TAG_compile_unit && header_.version == 5) {
+ uint64_t dieoffset_copy = dieoffset;
+ const uint8_t* start_copy = start;
+ for (AttributeList::const_iterator i = abbrev.attributes.begin();
+ i != abbrev.attributes.end();
+ i++) {
+ start_copy = ProcessOffsetBaseAttribute(dieoffset_copy, start_copy,
+ i->attr_, i->form_,
+ i->value_);
+ }
+ }
+
+ for (AttributeList::const_iterator i = abbrev.attributes.begin();
+ i != abbrev.attributes.end();
+ i++) {
+ start = ProcessAttribute(dieoffset, start, i->attr_, i->form_, i->value_);
+ }
+
+ // If this is a compilation unit in a split DWARF object, verify that
+ // the dwo_id matches. If it does not match, we will ignore this
+ // compilation unit.
+ if (abbrev.tag == DW_TAG_compile_unit
+ && is_split_dwarf_
+ && dwo_id_ != skeleton_dwo_id_) {
+ return NULL;
+ }
+
+ return start;
+}
+
+void CompilationUnit::ProcessDIEs() {
+ const uint8_t* dieptr = after_header_;
+ size_t len;
+
+ // lengthstart is the place the length field is based on.
+ // It is the point in the header after the initial length field
+ const uint8_t* lengthstart = buffer_;
+
+ // In 64 bit dwarf, the initial length is 12 bytes, because of the
+ // 0xffffffff at the start.
+ if (reader_->OffsetSize() == 8)
+ lengthstart += 12;
+ else
+ lengthstart += 4;
+
+ std::stack<uint64_t> die_stack;
+
+ while (dieptr < (lengthstart + header_.length)) {
+ // We give the user the absolute offset from the beginning of
+ // debug_info, since they need it to deal with ref_addr forms.
+ uint64_t absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
+
+ uint64_t abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
+
+ dieptr += len;
+
+ // Abbrev == 0 represents the end of a list of children, or padding
+ // at the end of the compilation unit.
+ if (abbrev_num == 0) {
+ if (die_stack.size() == 0)
+ // If it is padding, then we are done with the compilation unit's DIEs.
+ return;
+ const uint64_t offset = die_stack.top();
+ die_stack.pop();
+ handler_->EndDIE(offset);
+ continue;
+ }
+
+ const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
+ const enum DwarfTag tag = abbrev.tag;
+ if (!handler_->StartDIE(absolute_offset, tag)) {
+ dieptr = SkipDIE(dieptr, abbrev);
+ } else {
+ dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
+ }
+
+ if (abbrev.has_children) {
+ die_stack.push(absolute_offset);
+ } else {
+ handler_->EndDIE(absolute_offset);
+ }
+ }
+}
+
+// Check for a valid ELF file and return the Address size.
+// Returns 0 if not a valid ELF file.
+inline int GetElfWidth(const ElfReader& elf) {
+ if (elf.IsElf32File())
+ return 4;
+ if (elf.IsElf64File())
+ return 8;
+ return 0;
+}
+
+void CompilationUnit::ProcessSplitDwarf() {
+ struct stat statbuf;
+ if (!have_checked_for_dwp_) {
+ // Look for a .dwp file in the same directory as the executable.
+ have_checked_for_dwp_ = true;
+ string dwp_suffix(".dwp");
+ dwp_path_ = path_ + dwp_suffix;
+ if (stat(dwp_path_.c_str(), &statbuf) != 0) {
+ // Fall back to a split .debug file in the same directory.
+ string debug_suffix(".debug");
+ dwp_path_ = path_;
+ size_t found = path_.rfind(debug_suffix);
+ if (found + debug_suffix.length() == path_.length())
+ dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix);
+ }
+ if (stat(dwp_path_.c_str(), &statbuf) == 0) {
+ ElfReader* elf = new ElfReader(dwp_path_);
+ int width = GetElfWidth(*elf);
+ if (width != 0) {
+ dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness()));
+ dwp_byte_reader_->SetAddressSize(width);
+ dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf));
+ dwp_reader_->Initialize();
+ } else {
+ delete elf;
+ }
+ }
+ }
+ bool found_in_dwp = false;
+ if (dwp_reader_) {
+ // If we have a .dwp file, read the debug sections for the requested CU.
+ SectionMap sections;
+ dwp_reader_->ReadDebugSectionsForCU(dwo_id_, &sections);
+ if (!sections.empty()) {
+ found_in_dwp = true;
+ CompilationUnit dwp_comp_unit(dwp_path_, sections, 0,
+ dwp_byte_reader_.get(), handler_);
+ dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_,
+ ranges_base_, dwo_id_);
+ dwp_comp_unit.Start();
+ }
+ }
+ if (!found_in_dwp) {
+ // If no .dwp file, try to open the .dwo file.
+ if (stat(dwo_name_, &statbuf) == 0) {
+ ElfReader elf(dwo_name_);
+ int width = GetElfWidth(elf);
+ if (width != 0) {
+ ByteReader reader(ENDIANNESS_LITTLE);
+ reader.SetAddressSize(width);
+ SectionMap sections;
+ ReadDebugSectionsFromDwo(&elf, &sections);
+ CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader,
+ handler_);
+ dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_,
+ addr_base_, ranges_base_, dwo_id_);
+ dwo_comp_unit.Start();
+ }
+ }
+ }
+}
+
+void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
+ SectionMap* sections) {
+ static const char* const section_names[] = {
+ ".debug_abbrev",
+ ".debug_info",
+ ".debug_str_offsets",
+ ".debug_str"
+ };
+ for (unsigned int i = 0u;
+ i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
+ string base_name = section_names[i];
+ string dwo_name = base_name + ".dwo";
+ size_t section_size;
+ const char* section_data = elf_reader->GetSectionByName(dwo_name,
+ &section_size);
+ if (section_data != NULL)
+ sections->insert(std::make_pair(
+ base_name, std::make_pair(
+ reinterpret_cast<const uint8_t*>(section_data),
+ section_size)));
+ }
+}
+
+DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
+ : elf_reader_(elf_reader), byte_reader_(byte_reader),
+ cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL),
+ string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0),
+ nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
+ offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
+ abbrev_size_(0), info_data_(NULL), info_size_(0),
+ str_offsets_data_(NULL), str_offsets_size_(0) {}
+
+DwpReader::~DwpReader() {
+ if (elf_reader_) delete elf_reader_;
+}
+
+void DwpReader::Initialize() {
+ cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
+ &cu_index_size_);
+ if (cu_index_ == NULL) {
+ return;
+ }
+ // The .debug_str.dwo section is shared by all CUs in the file.
+ string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
+ &string_buffer_size_);
+
+ version_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(cu_index_));
+
+ if (version_ == 1) {
+ nslots_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(cu_index_)
+ + 3 * sizeof(uint32_t));
+ phash_ = cu_index_ + 4 * sizeof(uint32_t);
+ pindex_ = phash_ + nslots_ * sizeof(uint64_t);
+ shndx_pool_ = pindex_ + nslots_ * sizeof(uint32_t);
+ if (shndx_pool_ >= cu_index_ + cu_index_size_) {
+ version_ = 0;
+ }
+ } else if (version_ == 2 || version_ == 5) {
+ ncolumns_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(cu_index_) + sizeof(uint32_t));
+ nunits_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(cu_index_) + 2 * sizeof(uint32_t));
+ nslots_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(cu_index_) + 3 * sizeof(uint32_t));
+ phash_ = cu_index_ + 4 * sizeof(uint32_t);
+ pindex_ = phash_ + nslots_ * sizeof(uint64_t);
+ offset_table_ = pindex_ + nslots_ * sizeof(uint32_t);
+ size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32_t);
+ abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
+ &abbrev_size_);
+ info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
+ str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
+ &str_offsets_size_);
+ if (size_table_ >= cu_index_ + cu_index_size_) {
+ version_ = 0;
+ }
+ }
+}
+
+void DwpReader::ReadDebugSectionsForCU(uint64_t dwo_id,
+ SectionMap* sections) {
+ if (version_ == 1) {
+ int slot = LookupCU(dwo_id);
+ if (slot == -1) {
+ return;
+ }
+
+ // The index table points to the section index pool, where we
+ // can read a list of section indexes for the debug sections
+ // for the CU whose dwo_id we are looking for.
+ int index = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(pindex_)
+ + slot * sizeof(uint32_t));
+ const char* shndx_list = shndx_pool_ + index * sizeof(uint32_t);
+ for (;;) {
+ if (shndx_list >= cu_index_ + cu_index_size_) {
+ version_ = 0;
+ return;
+ }
+ unsigned int shndx = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(shndx_list));
+ shndx_list += sizeof(uint32_t);
+ if (shndx == 0)
+ break;
+ const char* section_name = elf_reader_->GetSectionName(shndx);
+ size_t section_size;
+ const char* section_data;
+ // We're only interested in these four debug sections.
+ // The section names in the .dwo file end with ".dwo", but we
+ // add them to the sections table with their normal names.
+ if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
+ section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
+ sections->insert(std::make_pair(
+ ".debug_abbrev",
+ std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
+ section_size)));
+ } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
+ section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
+ sections->insert(std::make_pair(
+ ".debug_info",
+ std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
+ section_size)));
+ } else if (!strncmp(section_name, ".debug_str_offsets",
+ strlen(".debug_str_offsets"))) {
+ section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
+ sections->insert(std::make_pair(
+ ".debug_str_offsets",
+ std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
+ section_size)));
+ }
+ }
+ sections->insert(std::make_pair(
+ ".debug_str",
+ std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_),
+ string_buffer_size_)));
+ } else if (version_ == 2 || version_ == 5) {
+ uint32_t index = LookupCUv2(dwo_id);
+ if (index == 0) {
+ return;
+ }
+
+ // The index points to a row in each of the section offsets table
+ // and the section size table, where we can read the offsets and sizes
+ // of the contributions to each debug section from the CU whose dwo_id
+ // we are looking for. Row 0 of the section offsets table has the
+ // section ids for each column of the table. The size table begins
+ // with row 1.
+ const char* id_row = offset_table_;
+ const char* offset_row = offset_table_
+ + index * ncolumns_ * sizeof(uint32_t);
+ const char* size_row =
+ size_table_ + (index - 1) * ncolumns_ * sizeof(uint32_t);
+ if (size_row + ncolumns_ * sizeof(uint32_t) > cu_index_ + cu_index_size_) {
+ version_ = 0;
+ return;
+ }
+ for (unsigned int col = 0u; col < ncolumns_; ++col) {
+ uint32_t section_id =
+ byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t*>(id_row)
+ + col * sizeof(uint32_t));
+ uint32_t offset = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(offset_row)
+ + col * sizeof(uint32_t));
+ uint32_t size = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(size_row) + col * sizeof(uint32_t));
+ if (section_id == DW_SECT_ABBREV) {
+ sections->insert(std::make_pair(
+ ".debug_abbrev",
+ std::make_pair(reinterpret_cast<const uint8_t*> (abbrev_data_)
+ + offset, size)));
+ } else if (section_id == DW_SECT_INFO) {
+ sections->insert(std::make_pair(
+ ".debug_info",
+ std::make_pair(reinterpret_cast<const uint8_t*> (info_data_)
+ + offset, size)));
+ } else if (section_id == DW_SECT_STR_OFFSETS) {
+ sections->insert(std::make_pair(
+ ".debug_str_offsets",
+ std::make_pair(reinterpret_cast<const uint8_t*> (str_offsets_data_)
+ + offset, size)));
+ }
+ }
+ sections->insert(std::make_pair(
+ ".debug_str",
+ std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_),
+ string_buffer_size_)));
+ }
+}
+
+int DwpReader::LookupCU(uint64_t dwo_id) {
+ uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
+ uint64_t probe = byte_reader_.ReadEightBytes(
+ reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
+ if (probe != 0 && probe != dwo_id) {
+ uint32_t secondary_hash =
+ (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
+ do {
+ slot = (slot + secondary_hash) & (nslots_ - 1);
+ probe = byte_reader_.ReadEightBytes(
+ reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
+ } while (probe != 0 && probe != dwo_id);
+ }
+ if (probe == 0)
+ return -1;
+ return slot;
+}
+
+uint32_t DwpReader::LookupCUv2(uint64_t dwo_id) {
+ uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
+ uint64_t probe = byte_reader_.ReadEightBytes(
+ reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
+ uint32_t index = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(pindex_) + slot * sizeof(uint32_t));
+ if (index != 0 && probe != dwo_id) {
+ uint32_t secondary_hash =
+ (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
+ do {
+ slot = (slot + secondary_hash) & (nslots_ - 1);
+ probe = byte_reader_.ReadEightBytes(
+ reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
+ index = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t*>(pindex_) + slot * sizeof(uint32_t));
+ } while (index != 0 && probe != dwo_id);
+ }
+ return index;
+}
+
+LineInfo::LineInfo(const uint8_t* buffer, uint64_t buffer_length,
+ ByteReader* reader, const uint8_t* string_buffer,
+ size_t string_buffer_length,
+ const uint8_t* line_string_buffer,
+ size_t line_string_buffer_length, LineInfoHandler* handler):
+ handler_(handler), reader_(reader), buffer_(buffer),
+ string_buffer_(string_buffer),
+ line_string_buffer_(line_string_buffer) {
+#ifndef NDEBUG
+ buffer_length_ = buffer_length;
+ string_buffer_length_ = string_buffer_length;
+ line_string_buffer_length_ = line_string_buffer_length;
+#endif
+ header_.std_opcode_lengths = NULL;
+}
+
+uint64_t LineInfo::Start() {
+ ReadHeader();
+ ReadLines();
+ return after_header_ - buffer_;
+}
+
+void LineInfo::ReadTypesAndForms(const uint8_t** lineptr,
+ uint32_t* content_types,
+ uint32_t* content_forms,
+ uint32_t max_types,
+ uint32_t* format_count) {
+ size_t len;
+
+ uint32_t count = reader_->ReadUnsignedLEB128(*lineptr, &len);
+ *lineptr += len;
+ if (count < 1 || count > max_types) {
+ return;
+ }
+ for (uint32_t col = 0; col < count; ++col) {
+ content_types[col] = reader_->ReadUnsignedLEB128(*lineptr, &len);
+ *lineptr += len;
+ content_forms[col] = reader_->ReadUnsignedLEB128(*lineptr, &len);
+ *lineptr += len;
+ }
+ *format_count = count;
+}
+
+const char* LineInfo::ReadStringForm(uint32_t form, const uint8_t** lineptr) {
+ const char* name = nullptr;
+ if (form == DW_FORM_string) {
+ name = reinterpret_cast<const char*>(*lineptr);
+ *lineptr += strlen(name) + 1;
+ return name;
+ } else if (form == DW_FORM_strp) {
+ uint64_t offset = reader_->ReadOffset(*lineptr);
+ assert(offset < string_buffer_length_);
+ *lineptr += reader_->OffsetSize();
+ if (string_buffer_ != nullptr) {
+ name = reinterpret_cast<const char*>(string_buffer_) + offset;
+ return name;
+ }
+ } else if (form == DW_FORM_line_strp) {
+ uint64_t offset = reader_->ReadOffset(*lineptr);
+ assert(offset < line_string_buffer_length_);
+ *lineptr += reader_->OffsetSize();
+ if (line_string_buffer_ != nullptr) {
+ name = reinterpret_cast<const char*>(line_string_buffer_) + offset;
+ return name;
+ }
+ }
+ // Shouldn't be called with a non-string-form, and
+ // if there is a string form but no string buffer,
+ // that is a problem too.
+ assert(0);
+ return nullptr;
+}
+
+uint64_t LineInfo::ReadUnsignedData(uint32_t form, const uint8_t** lineptr) {
+ size_t len;
+ uint64_t value;
+
+ switch (form) {
+ case DW_FORM_data1:
+ value = reader_->ReadOneByte(*lineptr);
+ *lineptr += 1;
+ return value;
+ case DW_FORM_data2:
+ value = reader_->ReadTwoBytes(*lineptr);
+ *lineptr += 2;
+ return value;
+ case DW_FORM_data4:
+ value = reader_->ReadFourBytes(*lineptr);
+ *lineptr += 4;
+ return value;
+ case DW_FORM_data8:
+ value = reader_->ReadEightBytes(*lineptr);
+ *lineptr += 8;
+ return value;
+ case DW_FORM_udata:
+ value = reader_->ReadUnsignedLEB128(*lineptr, &len);
+ *lineptr += len;
+ return value;
+ default:
+ fprintf(stderr, "Unrecognized data form.");
+ return 0;
+ }
+}
+
+void LineInfo::ReadFileRow(const uint8_t** lineptr,
+ const uint32_t* content_types,
+ const uint32_t* content_forms, uint32_t row,
+ uint32_t format_count) {
+ const char* filename = nullptr;
+ uint64_t dirindex = 0;
+ uint64_t mod_time = 0;
+ uint64_t filelength = 0;
+
+ for (uint32_t col = 0; col < format_count; ++col) {
+ switch (content_types[col]) {
+ case DW_LNCT_path:
+ filename = ReadStringForm(content_forms[col], lineptr);
+ break;
+ case DW_LNCT_directory_index:
+ dirindex = ReadUnsignedData(content_forms[col], lineptr);
+ break;
+ case DW_LNCT_timestamp:
+ mod_time = ReadUnsignedData(content_forms[col], lineptr);
+ break;
+ case DW_LNCT_size:
+ filelength = ReadUnsignedData(content_forms[col], lineptr);
+ break;
+ case DW_LNCT_MD5:
+ // MD5 entries help a debugger sort different versions of files with
+ // the same name. It is always paired with a DW_FORM_data16 and is
+ // unused in this case.
+ *lineptr += 16;
+ break;
+ default:
+ fprintf(stderr, "Unrecognized form in line table header. %d\n",
+ content_types[col]);
+ assert(false);
+ break;
+ }
+ }
+ assert(filename != nullptr);
+ handler_->DefineFile(filename, row, dirindex, mod_time, filelength);
+}
+
+// The header for a debug_line section is mildly complicated, because
+// the line info is very tightly encoded.
+void LineInfo::ReadHeader() {
+ const uint8_t* lineptr = buffer_;
+ size_t initial_length_size;
+
+ const uint64_t initial_length
+ = reader_->ReadInitialLength(lineptr, &initial_length_size);
+
+ lineptr += initial_length_size;
+ header_.total_length = initial_length;
+ assert(buffer_ + initial_length_size + header_.total_length <=
+ buffer_ + buffer_length_);
+
+
+ header_.version = reader_->ReadTwoBytes(lineptr);
+ lineptr += 2;
+
+ if (header_.version >= 5) {
+ uint8_t address_size = reader_->ReadOneByte(lineptr);
+ reader_->SetAddressSize(address_size);
+ lineptr += 1;
+ uint8_t segment_selector_size = reader_->ReadOneByte(lineptr);
+ if (segment_selector_size != 0) {
+ fprintf(stderr,"No support for segmented memory.");
+ }
+ lineptr += 1;
+ } else {
+ // Address size *must* be set by CU ahead of time.
+ assert(reader_->AddressSize() != 0);
+ }
+
+ header_.prologue_length = reader_->ReadOffset(lineptr);
+ lineptr += reader_->OffsetSize();
+
+ header_.min_insn_length = reader_->ReadOneByte(lineptr);
+ lineptr += 1;
+
+ if (header_.version >= 4) {
+ __attribute__((unused)) uint8_t max_ops_per_insn =
+ reader_->ReadOneByte(lineptr);
+ ++lineptr;
+ assert(max_ops_per_insn == 1);
+ }
+
+ header_.default_is_stmt = reader_->ReadOneByte(lineptr);
+ lineptr += 1;
+
+ header_.line_base = *reinterpret_cast<const int8_t*>(lineptr);
+ lineptr += 1;
+
+ header_.line_range = reader_->ReadOneByte(lineptr);
+ lineptr += 1;
+
+ header_.opcode_base = reader_->ReadOneByte(lineptr);
+ lineptr += 1;
+
+ header_.std_opcode_lengths = new std::vector<unsigned char>;
+ header_.std_opcode_lengths->resize(header_.opcode_base + 1);
+ (*header_.std_opcode_lengths)[0] = 0;
+ for (int i = 1; i < header_.opcode_base; i++) {
+ (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
+ lineptr += 1;
+ }
+
+ if (header_.version <= 4) {
+ // Directory zero is assumed to be the compilation directory and special
+ // cased where used. It is not actually stored in the dwarf data. But an
+ // empty entry here avoids off-by-one errors elsewhere in the code.
+ handler_->DefineDir("", 0);
+ // It is legal for the directory entry table to be empty.
+ if (*lineptr) {
+ uint32_t dirindex = 1;
+ while (*lineptr) {
+ const char* dirname = reinterpret_cast<const char*>(lineptr);
+ handler_->DefineDir(dirname, dirindex);
+ lineptr += strlen(dirname) + 1;
+ dirindex++;
+ }
+ }
+ lineptr++;
+ // It is also legal for the file entry table to be empty.
+
+ // Similarly for file zero.
+ handler_->DefineFile("", 0, 0, 0, 0);
+ if (*lineptr) {
+ uint32_t fileindex = 1;
+ size_t len;
+ while (*lineptr) {
+ const char* filename = ReadStringForm(DW_FORM_string, &lineptr);
+
+ uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
+ lineptr += len;
+
+ uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
+ lineptr += len;
+
+ uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
+ lineptr += len;
+ handler_->DefineFile(filename, fileindex,
+ static_cast<uint32_t>(dirindex), mod_time,
+ filelength);
+ fileindex++;
+ }
+ }
+ lineptr++;
+ } else {
+ // Read the DWARF-5 directory table.
+
+ // Dwarf5 supports five different types and forms per directory- and
+ // file-table entry. Theoretically, there could be duplicate entries
+ // in this table, but that would be quite unusual.
+ static const uint32_t kMaxTypesAndForms = 5;
+ uint32_t content_types[kMaxTypesAndForms];
+ uint32_t content_forms[kMaxTypesAndForms];
+ uint32_t format_count;
+ size_t len;
+
+ ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms,
+ &format_count);
+ uint32_t entry_count = reader_->ReadUnsignedLEB128(lineptr, &len);
+ lineptr += len;
+ for (uint32_t row = 0; row < entry_count; ++row) {
+ const char* dirname = nullptr;
+ for (uint32_t col = 0; col < format_count; ++col) {
+ // The path is the only relevant content type for this implementation.
+ if (content_types[col] == DW_LNCT_path) {
+ dirname = ReadStringForm(content_forms[col], &lineptr);
+ }
+ }
+ handler_->DefineDir(dirname, row);
+ }
+
+ // Read the DWARF-5 filename table.
+ ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms,
+ &format_count);
+ entry_count = reader_->ReadUnsignedLEB128(lineptr, &len);
+ lineptr += len;
+
+ for (uint32_t row = 0; row < entry_count; ++row) {
+ ReadFileRow(&lineptr, content_types, content_forms, row, format_count);
+ }
+ }
+ after_header_ = lineptr;
+}
+
+/* static */
+bool LineInfo::ProcessOneOpcode(ByteReader* reader,
+ LineInfoHandler* handler,
+ const struct LineInfoHeader& header,
+ const uint8_t* start,
+ struct LineStateMachine* lsm,
+ size_t* len,
+ uintptr pc,
+ bool* lsm_passes_pc) {
+ size_t oplen = 0;
+ size_t templen;
+ uint8_t opcode = reader->ReadOneByte(start);
+ oplen++;
+ start++;
+
+ // If the opcode is great than the opcode_base, it is a special
+ // opcode. Most line programs consist mainly of special opcodes.
+ if (opcode >= header.opcode_base) {
+ opcode -= header.opcode_base;
+ const int64_t advance_address = (opcode / header.line_range)
+ * header.min_insn_length;
+ const int32_t advance_line = (opcode % header.line_range)
+ + header.line_base;
+
+ // Check if the lsm passes "pc". If so, mark it as passed.
+ if (lsm_passes_pc &&
+ lsm->address <= pc && pc < lsm->address + advance_address) {
+ *lsm_passes_pc = true;
+ }
+
+ lsm->address += advance_address;
+ lsm->line_num += advance_line;
+ lsm->basic_block = true;
+ *len = oplen;
+ return true;
+ }
+
+ // Otherwise, we have the regular opcodes
+ switch (opcode) {
+ case DW_LNS_copy: {
+ lsm->basic_block = false;
+ *len = oplen;
+ return true;
+ }
+
+ case DW_LNS_advance_pc: {
+ uint64_t advance_address = reader->ReadUnsignedLEB128(start, &templen);
+ oplen += templen;
+
+ // Check if the lsm passes "pc". If so, mark it as passed.
+ if (lsm_passes_pc && lsm->address <= pc &&
+ pc < lsm->address + header.min_insn_length * advance_address) {
+ *lsm_passes_pc = true;
+ }
+
+ lsm->address += header.min_insn_length * advance_address;
+ }
+ break;
+ case DW_LNS_advance_line: {
+ const int64_t advance_line = reader->ReadSignedLEB128(start, &templen);
+ oplen += templen;
+ lsm->line_num += static_cast<int32_t>(advance_line);
+
+ // With gcc 4.2.1, we can get the line_no here for the first time
+ // since DW_LNS_advance_line is called after DW_LNE_set_address is
+ // called. So we check if the lsm passes "pc" here, not in
+ // DW_LNE_set_address.
+ if (lsm_passes_pc && lsm->address == pc) {
+ *lsm_passes_pc = true;
+ }
+ }
+ break;
+ case DW_LNS_set_file: {
+ const uint64_t fileno = reader->ReadUnsignedLEB128(start, &templen);
+ oplen += templen;
+ lsm->file_num = static_cast<uint32_t>(fileno);
+ }
+ break;
+ case DW_LNS_set_column: {
+ const uint64_t colno = reader->ReadUnsignedLEB128(start, &templen);
+ oplen += templen;
+ lsm->column_num = static_cast<uint32_t>(colno);
+ }
+ break;
+ case DW_LNS_negate_stmt: {
+ lsm->is_stmt = !lsm->is_stmt;
+ }
+ break;
+ case DW_LNS_set_basic_block: {
+ lsm->basic_block = true;
+ }
+ break;
+ case DW_LNS_fixed_advance_pc: {
+ const uint16_t advance_address = reader->ReadTwoBytes(start);
+ oplen += 2;
+
+ // Check if the lsm passes "pc". If so, mark it as passed.
+ if (lsm_passes_pc &&
+ lsm->address <= pc && pc < lsm->address + advance_address) {
+ *lsm_passes_pc = true;
+ }
+
+ lsm->address += advance_address;
+ }
+ break;
+ case DW_LNS_const_add_pc: {
+ const int64_t advance_address = header.min_insn_length
+ * ((255 - header.opcode_base)
+ / header.line_range);
+
+ // Check if the lsm passes "pc". If so, mark it as passed.
+ if (lsm_passes_pc &&
+ lsm->address <= pc && pc < lsm->address + advance_address) {
+ *lsm_passes_pc = true;
+ }
+
+ lsm->address += advance_address;
+ }
+ break;
+ case DW_LNS_extended_op: {
+ const uint64_t extended_op_len = reader->ReadUnsignedLEB128(start,
+ &templen);
+ start += templen;
+ oplen += templen + extended_op_len;
+
+ const uint64_t extended_op = reader->ReadOneByte(start);
+ start++;
+
+ switch (extended_op) {
+ case DW_LNE_end_sequence: {
+ lsm->end_sequence = true;
+ *len = oplen;
+ return true;
+ }
+ break;
+ case DW_LNE_set_address: {
+ // With gcc 4.2.1, we cannot tell the line_no here since
+ // DW_LNE_set_address is called before DW_LNS_advance_line is
+ // called. So we do not check if the lsm passes "pc" here. See
+ // also the comment in DW_LNS_advance_line.
+ uint64_t address = reader->ReadAddress(start);
+ lsm->address = address;
+ }
+ break;
+ case DW_LNE_define_file: {
+ const char* filename = reinterpret_cast<const char*>(start);
+
+ templen = strlen(filename) + 1;
+ start += templen;
+
+ uint64_t dirindex = reader->ReadUnsignedLEB128(start, &templen);
+ oplen += templen;
+
+ const uint64_t mod_time = reader->ReadUnsignedLEB128(start,
+ &templen);
+ oplen += templen;
+
+ const uint64_t filelength = reader->ReadUnsignedLEB128(start,
+ &templen);
+ oplen += templen;
+
+ if (handler) {
+ handler->DefineFile(filename, -1, static_cast<uint32_t>(dirindex),
+ mod_time, filelength);
+ }
+ }
+ break;
+ }
+ }
+ break;
+
+ default: {
+ // Ignore unknown opcode silently
+ if (header.std_opcode_lengths) {
+ for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
+ reader->ReadUnsignedLEB128(start, &templen);
+ start += templen;
+ oplen += templen;
+ }
+ }
+ }
+ break;
+ }
+ *len = oplen;
+ return false;
+}
+
+void LineInfo::ReadLines() {
+ struct LineStateMachine lsm;
+
+ // lengthstart is the place the length field is based on.
+ // It is the point in the header after the initial length field
+ const uint8_t* lengthstart = buffer_;
+
+ // In 64 bit dwarf, the initial length is 12 bytes, because of the
+ // 0xffffffff at the start.
+ if (reader_->OffsetSize() == 8)
+ lengthstart += 12;
+ else
+ lengthstart += 4;
+
+ const uint8_t* lineptr = after_header_;
+ lsm.Reset(header_.default_is_stmt);
+
+ // The LineInfoHandler interface expects each line's length along
+ // with its address, but DWARF only provides addresses (sans
+ // length), and an end-of-sequence address; one infers the length
+ // from the next address. So we report a line only when we get the
+ // next line's address, or the end-of-sequence address.
+ bool have_pending_line = false;
+ uint64_t pending_address = 0;
+ uint32_t pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
+
+ while (lineptr < lengthstart + header_.total_length) {
+ size_t oplength;
+ bool add_row = ProcessOneOpcode(reader_, handler_, header_,
+ lineptr, &lsm, &oplength, (uintptr)-1,
+ NULL);
+ if (add_row) {
+ if (have_pending_line)
+ handler_->AddLine(pending_address, lsm.address - pending_address,
+ pending_file_num, pending_line_num,
+ pending_column_num);
+ if (lsm.end_sequence) {
+ lsm.Reset(header_.default_is_stmt);
+ have_pending_line = false;
+ } else {
+ pending_address = lsm.address;
+ pending_file_num = lsm.file_num;
+ pending_line_num = lsm.line_num;
+ pending_column_num = lsm.column_num;
+ have_pending_line = true;
+ }
+ }
+ lineptr += oplength;
+ }
+
+ after_header_ = lengthstart + header_.total_length;
+}
+
+bool RangeListReader::ReadRanges(enum DwarfForm form, uint64_t data) {
+ if (form == DW_FORM_sec_offset) {
+ if (cu_info_->version_ <= 4) {
+ return ReadDebugRanges(data);
+ } else {
+ return ReadDebugRngList(data);
+ }
+ } else if (form == DW_FORM_rnglistx) {
+ offset_array_ = cu_info_->ranges_base_;
+ uint64_t index_offset = reader_->OffsetSize() * data;
+ uint64_t range_list_offset =
+ reader_->ReadOffset(cu_info_->buffer_ + offset_array_ + index_offset);
+
+ return ReadDebugRngList(offset_array_ + range_list_offset);
+ }
+ return false;
+}
+
+bool RangeListReader::ReadDebugRanges(uint64_t offset) {
+ const uint64_t max_address =
+ (reader_->AddressSize() == 4) ? 0xffffffffUL
+ : 0xffffffffffffffffULL;
+ const uint64_t entry_size = reader_->AddressSize() * 2;
+ bool list_end = false;
+
+ do {
+ if (offset > cu_info_->size_ - entry_size) {
+ return false; // Invalid range detected
+ }
+
+ uint64_t start_address = reader_->ReadAddress(cu_info_->buffer_ + offset);
+ uint64_t end_address = reader_->ReadAddress(
+ cu_info_->buffer_ + offset + reader_->AddressSize());
+
+ if (start_address == max_address) { // Base address selection
+ cu_info_->base_address_ = end_address;
+ } else if (start_address == 0 && end_address == 0) { // End-of-list
+ handler_->Finish();
+ list_end = true;
+ } else { // Add a range entry
+ handler_->AddRange(start_address + cu_info_->base_address_,
+ end_address + cu_info_->base_address_);
+ }
+
+ offset += entry_size;
+ } while (!list_end);
+
+ return true;
+}
+
+bool RangeListReader::ReadDebugRngList(uint64_t offset) {
+ uint64_t start = 0;
+ uint64_t end = 0;
+ uint64_t range_len = 0;
+ uint64_t index = 0;
+ // A uleb128's length isn't known until after it has been read, so overruns
+ // are only caught after an entire entry.
+ while (offset < cu_info_->size_) {
+ uint8_t entry_type = reader_->ReadOneByte(cu_info_->buffer_ + offset);
+ offset += 1;
+ // Handle each entry type per Dwarf 5 Standard, section 2.17.3.
+ switch (entry_type) {
+ case DW_RLE_end_of_list:
+ handler_->Finish();
+ return true;
+ case DW_RLE_base_addressx:
+ offset += ReadULEB(offset, &index);
+ cu_info_->base_address_ = GetAddressAtIndex(index);
+ break;
+ case DW_RLE_startx_endx:
+ offset += ReadULEB(offset, &index);
+ start = GetAddressAtIndex(index);
+ offset += ReadULEB(offset, &index);
+ end = GetAddressAtIndex(index);
+ handler_->AddRange(start, end);
+ break;
+ case DW_RLE_startx_length:
+ offset += ReadULEB(offset, &index);
+ start = GetAddressAtIndex(index);
+ offset += ReadULEB(offset, &range_len);
+ handler_->AddRange(start, start + range_len);
+ break;
+ case DW_RLE_offset_pair:
+ offset += ReadULEB(offset, &start);
+ offset += ReadULEB(offset, &end);
+ handler_->AddRange(start + cu_info_->base_address_,
+ end + cu_info_->base_address_);
+ break;
+ case DW_RLE_base_address:
+ offset += ReadAddress(offset, &cu_info_->base_address_);
+ break;
+ case DW_RLE_start_end:
+ offset += ReadAddress(offset, &start);
+ offset += ReadAddress(offset, &end);
+ handler_->AddRange(start, end);
+ break;
+ case DW_RLE_start_length:
+ offset += ReadAddress(offset, &start);
+ offset += ReadULEB(offset, &end);
+ handler_->AddRange(start, start + end);
+ break;
+ }
+ }
+ return false;
+}
+
+// A DWARF rule for recovering the address or value of a register, or
+// computing the canonical frame address. There is one subclass of this for
+// each '*Rule' member function in CallFrameInfo::Handler.
+//
+// It's annoying that we have to handle Rules using pointers (because
+// the concrete instances can have an arbitrary size). They're small,
+// so it would be much nicer if we could just handle them by value
+// instead of fretting about ownership and destruction.
+//
+// It seems like all these could simply be instances of std::tr1::bind,
+// except that we need instances to be EqualityComparable, too.
+//
+// This could logically be nested within State, but then the qualified names
+// get horrendous.
+class CallFrameInfo::Rule {
+ public:
+ virtual ~Rule() { }
+
+ // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using
+ // this rule. If REG is kCFARegister, then this rule describes how to compute
+ // the canonical frame address. Return what the HANDLER member function
+ // returned.
+ virtual bool Handle(Handler* handler,
+ uint64_t address, int reg) const = 0;
+
+ // Equality on rules. We use these to decide which rules we need
+ // to report after a DW_CFA_restore_state instruction.
+ virtual bool operator==(const Rule& rhs) const = 0;
+
+ bool operator!=(const Rule& rhs) const { return ! (*this == rhs); }
+
+ // Return a pointer to a copy of this rule.
+ virtual Rule* Copy() const = 0;
+
+ // If this is a base+offset rule, change its base register to REG.
+ // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
+ virtual void SetBaseRegister(unsigned reg) { }
+
+ // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
+ // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
+ virtual void SetOffset(long long offset) { }
+};
+
+// Rule: the value the register had in the caller cannot be recovered.
+class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
+ public:
+ UndefinedRule() { }
+ ~UndefinedRule() { }
+ bool Handle(Handler* handler, uint64_t address, int reg) const {
+ return handler->UndefinedRule(address, reg);
+ }
+ bool operator==(const Rule& rhs) const {
+ // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
+ // been carefully considered; cheap RTTI-like workarounds are forbidden.
+ const UndefinedRule* our_rhs = dynamic_cast<const UndefinedRule*>(&rhs);
+ return (our_rhs != NULL);
+ }
+ Rule* Copy() const { return new UndefinedRule(*this); }
+};
+
+// Rule: the register's value is the same as that it had in the caller.
+class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
+ public:
+ SameValueRule() { }
+ ~SameValueRule() { }
+ bool Handle(Handler* handler, uint64_t address, int reg) const {
+ return handler->SameValueRule(address, reg);
+ }
+ bool operator==(const Rule& rhs) const {
+ // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
+ // been carefully considered; cheap RTTI-like workarounds are forbidden.
+ const SameValueRule* our_rhs = dynamic_cast<const SameValueRule*>(&rhs);
+ return (our_rhs != NULL);
+ }
+ Rule* Copy() const { return new SameValueRule(*this); }
+};
+
+// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER
+// may be CallFrameInfo::Handler::kCFARegister.
+class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
+ public:
+ OffsetRule(int base_register, long offset)
+ : base_register_(base_register), offset_(offset) { }
+ ~OffsetRule() { }
+ bool Handle(Handler* handler, uint64_t address, int reg) const {
+ return handler->OffsetRule(address, reg, base_register_, offset_);
+ }
+ bool operator==(const Rule& rhs) const {
+ // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
+ // been carefully considered; cheap RTTI-like workarounds are forbidden.
+ const OffsetRule* our_rhs = dynamic_cast<const OffsetRule*>(&rhs);
+ return (our_rhs &&
+ base_register_ == our_rhs->base_register_ &&
+ offset_ == our_rhs->offset_);
+ }
+ Rule* Copy() const { return new OffsetRule(*this); }
+ // We don't actually need SetBaseRegister or SetOffset here, since they
+ // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
+ // doesn't make sense to use OffsetRule for computing the CFA: it
+ // computes the address at which a register is saved, not a value.
+ private:
+ int base_register_;
+ long offset_;
+};
+
+// Rule: the value the register had in the caller is the value of
+// BASE_REGISTER plus offset. BASE_REGISTER may be
+// CallFrameInfo::Handler::kCFARegister.
+class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
+ public:
+ ValOffsetRule(int base_register, long offset)
+ : base_register_(base_register), offset_(offset) { }
+ ~ValOffsetRule() { }
+ bool Handle(Handler* handler, uint64_t address, int reg) const {
+ return handler->ValOffsetRule(address, reg, base_register_, offset_);
+ }
+ bool operator==(const Rule& rhs) const {
+ // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
+ // been carefully considered; cheap RTTI-like workarounds are forbidden.
+ const ValOffsetRule* our_rhs = dynamic_cast<const ValOffsetRule*>(&rhs);
+ return (our_rhs &&
+ base_register_ == our_rhs->base_register_ &&
+ offset_ == our_rhs->offset_);
+ }
+ Rule* Copy() const { return new ValOffsetRule(*this); }
+ void SetBaseRegister(unsigned reg) { base_register_ = reg; }
+ void SetOffset(long long offset) { offset_ = offset; }
+ private:
+ int base_register_;
+ long offset_;
+};
+
+// Rule: the register has been saved in another register REGISTER_NUMBER_.
+class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
+ public:
+ explicit RegisterRule(int register_number)
+ : register_number_(register_number) { }
+ ~RegisterRule() { }
+ bool Handle(Handler* handler, uint64_t address, int reg) const {
+ return handler->RegisterRule(address, reg, register_number_);
+ }
+ bool operator==(const Rule& rhs) const {
+ // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
+ // been carefully considered; cheap RTTI-like workarounds are forbidden.
+ const RegisterRule* our_rhs = dynamic_cast<const RegisterRule*>(&rhs);
+ return (our_rhs && register_number_ == our_rhs->register_number_);
+ }
+ Rule* Copy() const { return new RegisterRule(*this); }
+ private:
+ int register_number_;
+};
+
+// Rule: EXPRESSION evaluates to the address at which the register is saved.
+class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
+ public:
+ explicit ExpressionRule(const string& expression)
+ : expression_(expression) { }
+ ~ExpressionRule() { }
+ bool Handle(Handler* handler, uint64_t address, int reg) const {
+ return handler->ExpressionRule(address, reg, expression_);
+ }
+ bool operator==(const Rule& rhs) const {
+ // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
+ // been carefully considered; cheap RTTI-like workarounds are forbidden.
+ const ExpressionRule* our_rhs = dynamic_cast<const ExpressionRule*>(&rhs);
+ return (our_rhs && expression_ == our_rhs->expression_);
+ }
+ Rule* Copy() const { return new ExpressionRule(*this); }
+ private:
+ string expression_;
+};
+
+// Rule: EXPRESSION evaluates to the address at which the register is saved.
+class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
+ public:
+ explicit ValExpressionRule(const string& expression)
+ : expression_(expression) { }
+ ~ValExpressionRule() { }
+ bool Handle(Handler* handler, uint64_t address, int reg) const {
+ return handler->ValExpressionRule(address, reg, expression_);
+ }
+ bool operator==(const Rule& rhs) const {
+ // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
+ // been carefully considered; cheap RTTI-like workarounds are forbidden.
+ const ValExpressionRule* our_rhs =
+ dynamic_cast<const ValExpressionRule*>(&rhs);
+ return (our_rhs && expression_ == our_rhs->expression_);
+ }
+ Rule* Copy() const { return new ValExpressionRule(*this); }
+ private:
+ string expression_;
+};
+
+// A map from register numbers to rules.
+class CallFrameInfo::RuleMap {
+ public:
+ RuleMap() : cfa_rule_(NULL) { }
+ RuleMap(const RuleMap& rhs) : cfa_rule_(NULL) { *this = rhs; }
+ ~RuleMap() { Clear(); }
+
+ RuleMap& operator=(const RuleMap& rhs);
+
+ // Set the rule for computing the CFA to RULE. Take ownership of RULE.
+ void SetCFARule(Rule* rule) { delete cfa_rule_; cfa_rule_ = rule; }
+
+ // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
+ // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
+ // DW_CFA_def_cfa_register, and for detecting references to the CFA before
+ // a rule for it has been established.
+ Rule* CFARule() const { return cfa_rule_; }
+
+ // Return the rule for REG, or NULL if there is none. The caller takes
+ // ownership of the result.
+ Rule* RegisterRule(int reg) const;
+
+ // Set the rule for computing REG to RULE. Take ownership of RULE.
+ void SetRegisterRule(int reg, Rule* rule);
+
+ // Make all the appropriate calls to HANDLER as if we were changing from
+ // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
+ // DW_CFA_restore_state, where lots of rules can change simultaneously.
+ // Return true if all handlers returned true; otherwise, return false.
+ bool HandleTransitionTo(Handler* handler, uint64_t address,
+ const RuleMap& new_rules) const;
+
+ private:
+ // A map from register numbers to Rules.
+ typedef std::map<int, Rule*> RuleByNumber;
+
+ // Remove all register rules and clear cfa_rule_.
+ void Clear();
+
+ // The rule for computing the canonical frame address. This RuleMap owns
+ // this rule.
+ Rule* cfa_rule_;
+
+ // A map from register numbers to postfix expressions to recover
+ // their values. This RuleMap owns the Rules the map refers to.
+ RuleByNumber registers_;
+};
+
+CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) {
+ Clear();
+ // Since each map owns the rules it refers to, assignment must copy them.
+ if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
+ for (RuleByNumber::const_iterator it = rhs.registers_.begin();
+ it != rhs.registers_.end(); it++)
+ registers_[it->first] = it->second->Copy();
+ return *this;
+}
+
+CallFrameInfo::Rule* CallFrameInfo::RuleMap::RegisterRule(int reg) const {
+ assert(reg != Handler::kCFARegister);
+ RuleByNumber::const_iterator it = registers_.find(reg);
+ if (it != registers_.end())
+ return it->second->Copy();
+ else
+ return NULL;
+}
+
+void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule* rule) {
+ assert(reg != Handler::kCFARegister);
+ assert(rule);
+ Rule** slot = &registers_[reg];
+ delete *slot;
+ *slot = rule;
+}
+
+bool CallFrameInfo::RuleMap::HandleTransitionTo(
+ Handler* handler,
+ uint64_t address,
+ const RuleMap& new_rules) const {
+ // Transition from cfa_rule_ to new_rules.cfa_rule_.
+ if (cfa_rule_ && new_rules.cfa_rule_) {
+ if (*cfa_rule_ != *new_rules.cfa_rule_ &&
+ !new_rules.cfa_rule_->Handle(handler, address,
+ Handler::kCFARegister))
+ return false;
+ } else if (cfa_rule_) {
+ // this RuleMap has a CFA rule but new_rules doesn't.
+ // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
+ // it's garbage input. The instruction interpreter should have
+ // detected this and warned, so take no action here.
+ } else if (new_rules.cfa_rule_) {
+ // This shouldn't be possible: NEW_RULES is some prior state, and
+ // there's no way to remove entries.
+ assert(0);
+ } else {
+ // Both CFA rules are empty. No action needed.
+ }
+
+ // Traverse the two maps in order by register number, and report
+ // whatever differences we find.
+ RuleByNumber::const_iterator old_it = registers_.begin();
+ RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
+ while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
+ if (old_it->first < new_it->first) {
+ // This RuleMap has an entry for old_it->first, but NEW_RULES
+ // doesn't.
+ //
+ // This isn't really the right thing to do, but since CFI generally
+ // only mentions callee-saves registers, and GCC's convention for
+ // callee-saves registers is that they are unchanged, it's a good
+ // approximation.
+ if (!handler->SameValueRule(address, old_it->first))
+ return false;
+ old_it++;
+ } else if (old_it->first > new_it->first) {
+ // NEW_RULES has entry for new_it->first, but this RuleMap
+ // doesn't. This shouldn't be possible: NEW_RULES is some prior
+ // state, and there's no way to remove entries.
+ assert(0);
+ } else {
+ // Both maps have an entry for this register. Report the new
+ // rule if it is different.
+ if (*old_it->second != *new_it->second &&
+ !new_it->second->Handle(handler, address, new_it->first))
+ return false;
+ new_it++, old_it++;
+ }
+ }
+ // Finish off entries from this RuleMap with no counterparts in new_rules.
+ while (old_it != registers_.end()) {
+ if (!handler->SameValueRule(address, old_it->first))
+ return false;
+ old_it++;
+ }
+ // Since we only make transitions from a rule set to some previously
+ // saved rule set, and we can only add rules to the map, NEW_RULES
+ // must have fewer rules than *this.
+ assert(new_it == new_rules.registers_.end());
+
+ return true;
+}
+
+// Remove all register rules and clear cfa_rule_.
+void CallFrameInfo::RuleMap::Clear() {
+ delete cfa_rule_;
+ cfa_rule_ = NULL;
+ for (RuleByNumber::iterator it = registers_.begin();
+ it != registers_.end(); it++)
+ delete it->second;
+ registers_.clear();
+}
+
+// The state of the call frame information interpreter as it processes
+// instructions from a CIE and FDE.
+class CallFrameInfo::State {
+ public:
+ // Create a call frame information interpreter state with the given
+ // reporter, reader, handler, and initial call frame info address.
+ State(ByteReader* reader, Handler* handler, Reporter* reporter,
+ uint64_t address)
+ : reader_(reader), handler_(handler), reporter_(reporter),
+ address_(address), entry_(NULL), cursor_(NULL) { }
+
+ // Interpret instructions from CIE, save the resulting rule set for
+ // DW_CFA_restore instructions, and return true. On error, report
+ // the problem to reporter_ and return false.
+ bool InterpretCIE(const CIE& cie);
+
+ // Interpret instructions from FDE, and return true. On error,
+ // report the problem to reporter_ and return false.
+ bool InterpretFDE(const FDE& fde);
+
+ private:
+ // The operands of a CFI instruction, for ParseOperands.
+ struct Operands {
+ unsigned register_number; // A register number.
+ uint64_t offset; // An offset or address.
+ long signed_offset; // A signed offset.
+ string expression; // A DWARF expression.
+ };
+
+ // Parse CFI instruction operands from STATE's instruction stream as
+ // described by FORMAT. On success, populate OPERANDS with the
+ // results, and return true. On failure, report the problem and
+ // return false.
+ //
+ // Each character of FORMAT should be one of the following:
+ //
+ // 'r' unsigned LEB128 register number (OPERANDS->register_number)
+ // 'o' unsigned LEB128 offset (OPERANDS->offset)
+ // 's' signed LEB128 offset (OPERANDS->signed_offset)
+ // 'a' machine-size address (OPERANDS->offset)
+ // (If the CIE has a 'z' augmentation string, 'a' uses the
+ // encoding specified by the 'R' argument.)
+ // '1' a one-byte offset (OPERANDS->offset)
+ // '2' a two-byte offset (OPERANDS->offset)
+ // '4' a four-byte offset (OPERANDS->offset)
+ // '8' an eight-byte offset (OPERANDS->offset)
+ // 'e' a DW_FORM_block holding a (OPERANDS->expression)
+ // DWARF expression
+ bool ParseOperands(const char* format, Operands* operands);
+
+ // Interpret one CFI instruction from STATE's instruction stream, update
+ // STATE, report any rule changes to handler_, and return true. On
+ // failure, report the problem and return false.
+ bool DoInstruction();
+
+ // The following Do* member functions are subroutines of DoInstruction,
+ // factoring out the actual work of operations that have several
+ // different encodings.
+
+ // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
+ // return true. On failure, report and return false. (Used for
+ // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
+ bool DoDefCFA(unsigned base_register, long offset);
+
+ // Change the offset of the CFA rule to OFFSET, and return true. On
+ // failure, report and return false. (Subroutine for
+ // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
+ bool DoDefCFAOffset(long offset);
+
+ // Specify that REG can be recovered using RULE, and return true. On
+ // failure, report and return false.
+ bool DoRule(unsigned reg, Rule* rule);
+
+ // Specify that REG can be found at OFFSET from the CFA, and return true.
+ // On failure, report and return false. (Subroutine for DW_CFA_offset,
+ // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
+ bool DoOffset(unsigned reg, long offset);
+
+ // Specify that the caller's value for REG is the CFA plus OFFSET,
+ // and return true. On failure, report and return false. (Subroutine
+ // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
+ bool DoValOffset(unsigned reg, long offset);
+
+ // Restore REG to the rule established in the CIE, and return true. On
+ // failure, report and return false. (Subroutine for DW_CFA_restore and
+ // DW_CFA_restore_extended.)
+ bool DoRestore(unsigned reg);
+
+ // Return the section offset of the instruction at cursor. For use
+ // in error messages.
+ uint64_t CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
+
+ // Report that entry_ is incomplete, and return false. For brevity.
+ bool ReportIncomplete() {
+ reporter_->Incomplete(entry_->offset, entry_->kind);
+ return false;
+ }
+
+ // For reading multi-byte values with the appropriate endianness.
+ ByteReader* reader_;
+
+ // The handler to which we should report the data we find.
+ Handler* handler_;
+
+ // For reporting problems in the info we're parsing.
+ Reporter* reporter_;
+
+ // The code address to which the next instruction in the stream applies.
+ uint64_t address_;
+
+ // The entry whose instructions we are currently processing. This is
+ // first a CIE, and then an FDE.
+ const Entry* entry_;
+
+ // The next instruction to process.
+ const uint8_t* cursor_;
+
+ // The current set of rules.
+ RuleMap rules_;
+
+ // The set of rules established by the CIE, used by DW_CFA_restore
+ // and DW_CFA_restore_extended. We set this after interpreting the
+ // CIE's instructions.
+ RuleMap cie_rules_;
+
+ // A stack of saved states, for DW_CFA_remember_state and
+ // DW_CFA_restore_state.
+ std::stack<RuleMap> saved_rules_;
+};
+
+bool CallFrameInfo::State::InterpretCIE(const CIE& cie) {
+ entry_ = &cie;
+ cursor_ = entry_->instructions;
+ while (cursor_ < entry_->end)
+ if (!DoInstruction())
+ return false;
+ // Note the rules established by the CIE, for use by DW_CFA_restore
+ // and DW_CFA_restore_extended.
+ cie_rules_ = rules_;
+ return true;
+}
+
+bool CallFrameInfo::State::InterpretFDE(const FDE& fde) {
+ entry_ = &fde;
+ cursor_ = entry_->instructions;
+ while (cursor_ < entry_->end)
+ if (!DoInstruction())
+ return false;
+ return true;
+}
+
+bool CallFrameInfo::State::ParseOperands(const char* format,
+ Operands* operands) {
+ size_t len;
+ const char* operand;
+
+ for (operand = format; *operand; operand++) {
+ size_t bytes_left = entry_->end - cursor_;
+ switch (*operand) {
+ case 'r':
+ operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
+ if (len > bytes_left) return ReportIncomplete();
+ cursor_ += len;
+ break;
+
+ case 'o':
+ operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
+ if (len > bytes_left) return ReportIncomplete();
+ cursor_ += len;
+ break;
+
+ case 's':
+ operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
+ if (len > bytes_left) return ReportIncomplete();
+ cursor_ += len;
+ break;
+
+ case 'a':
+ operands->offset =
+ reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
+ &len);
+ if (len > bytes_left) return ReportIncomplete();
+ cursor_ += len;
+ break;
+
+ case '1':
+ if (1 > bytes_left) return ReportIncomplete();
+ operands->offset = static_cast<unsigned char>(*cursor_++);
+ break;
+
+ case '2':
+ if (2 > bytes_left) return ReportIncomplete();
+ operands->offset = reader_->ReadTwoBytes(cursor_);
+ cursor_ += 2;
+ break;
+
+ case '4':
+ if (4 > bytes_left) return ReportIncomplete();
+ operands->offset = reader_->ReadFourBytes(cursor_);
+ cursor_ += 4;
+ break;
+
+ case '8':
+ if (8 > bytes_left) return ReportIncomplete();
+ operands->offset = reader_->ReadEightBytes(cursor_);
+ cursor_ += 8;
+ break;
+
+ case 'e': {
+ size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
+ if (len > bytes_left || expression_length > bytes_left - len)
+ return ReportIncomplete();
+ cursor_ += len;
+ operands->expression = string(reinterpret_cast<const char*>(cursor_),
+ expression_length);
+ cursor_ += expression_length;
+ break;
+ }
+
+ default:
+ assert(0);
+ }
+ }
+
+ return true;
+}
+
+bool CallFrameInfo::State::DoInstruction() {
+ CIE* cie = entry_->cie;
+ Operands ops;
+
+ // Our entry's kind should have been set by now.
+ assert(entry_->kind != kUnknown);
+
+ // We shouldn't have been invoked unless there were more
+ // instructions to parse.
+ assert(cursor_ < entry_->end);
+
+ unsigned opcode = *cursor_++;
+ if ((opcode & 0xc0) != 0) {
+ switch (opcode & 0xc0) {
+ // Advance the address.
+ case DW_CFA_advance_loc: {
+ size_t code_offset = opcode & 0x3f;
+ address_ += code_offset * cie->code_alignment_factor;
+ break;
+ }
+
+ // Find a register at an offset from the CFA.
+ case DW_CFA_offset:
+ if (!ParseOperands("o", &ops) ||
+ !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // Restore the rule established for a register by the CIE.
+ case DW_CFA_restore:
+ if (!DoRestore(opcode & 0x3f)) return false;
+ break;
+
+ // The 'if' above should have excluded this possibility.
+ default:
+ assert(0);
+ }
+
+ // Return here, so the big switch below won't be indented.
+ return true;
+ }
+
+ switch (opcode) {
+ // Set the address.
+ case DW_CFA_set_loc:
+ if (!ParseOperands("a", &ops)) return false;
+ address_ = ops.offset;
+ break;
+
+ // Advance the address.
+ case DW_CFA_advance_loc1:
+ if (!ParseOperands("1", &ops)) return false;
+ address_ += ops.offset * cie->code_alignment_factor;
+ break;
+
+ // Advance the address.
+ case DW_CFA_advance_loc2:
+ if (!ParseOperands("2", &ops)) return false;
+ address_ += ops.offset * cie->code_alignment_factor;
+ break;
+
+ // Advance the address.
+ case DW_CFA_advance_loc4:
+ if (!ParseOperands("4", &ops)) return false;
+ address_ += ops.offset * cie->code_alignment_factor;
+ break;
+
+ // Advance the address.
+ case DW_CFA_MIPS_advance_loc8:
+ if (!ParseOperands("8", &ops)) return false;
+ address_ += ops.offset * cie->code_alignment_factor;
+ break;
+
+ // Compute the CFA by adding an offset to a register.
+ case DW_CFA_def_cfa:
+ if (!ParseOperands("ro", &ops) ||
+ !DoDefCFA(ops.register_number, ops.offset))
+ return false;
+ break;
+
+ // Compute the CFA by adding an offset to a register.
+ case DW_CFA_def_cfa_sf:
+ if (!ParseOperands("rs", &ops) ||
+ !DoDefCFA(ops.register_number,
+ ops.signed_offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // Change the base register used to compute the CFA.
+ case DW_CFA_def_cfa_register: {
+ if (!ParseOperands("r", &ops)) return false;
+ Rule* cfa_rule = rules_.CFARule();
+ if (!cfa_rule) {
+ if (!DoDefCFA(ops.register_number, ops.offset)) {
+ reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ } else {
+ cfa_rule->SetBaseRegister(ops.register_number);
+ if (!cfa_rule->Handle(handler_, address_,
+ Handler::kCFARegister))
+ return false;
+ }
+ break;
+ }
+
+ // Change the offset used to compute the CFA.
+ case DW_CFA_def_cfa_offset:
+ if (!ParseOperands("o", &ops) ||
+ !DoDefCFAOffset(ops.offset))
+ return false;
+ break;
+
+ // Change the offset used to compute the CFA.
+ case DW_CFA_def_cfa_offset_sf:
+ if (!ParseOperands("s", &ops) ||
+ !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // Specify an expression whose value is the CFA.
+ case DW_CFA_def_cfa_expression: {
+ if (!ParseOperands("e", &ops))
+ return false;
+ Rule* rule = new ValExpressionRule(ops.expression);
+ rules_.SetCFARule(rule);
+ if (!rule->Handle(handler_, address_,
+ Handler::kCFARegister))
+ return false;
+ break;
+ }
+
+ // The register's value cannot be recovered.
+ case DW_CFA_undefined: {
+ if (!ParseOperands("r", &ops) ||
+ !DoRule(ops.register_number, new UndefinedRule()))
+ return false;
+ break;
+ }
+
+ // The register's value is unchanged from its value in the caller.
+ case DW_CFA_same_value: {
+ if (!ParseOperands("r", &ops) ||
+ !DoRule(ops.register_number, new SameValueRule()))
+ return false;
+ break;
+ }
+
+ // Find a register at an offset from the CFA.
+ case DW_CFA_offset_extended:
+ if (!ParseOperands("ro", &ops) ||
+ !DoOffset(ops.register_number,
+ ops.offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register is saved at an offset from the CFA.
+ case DW_CFA_offset_extended_sf:
+ if (!ParseOperands("rs", &ops) ||
+ !DoOffset(ops.register_number,
+ ops.signed_offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register is saved at an offset from the CFA.
+ case DW_CFA_GNU_negative_offset_extended:
+ if (!ParseOperands("ro", &ops) ||
+ !DoOffset(ops.register_number,
+ -ops.offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register's value is the sum of the CFA plus an offset.
+ case DW_CFA_val_offset:
+ if (!ParseOperands("ro", &ops) ||
+ !DoValOffset(ops.register_number,
+ ops.offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register's value is the sum of the CFA plus an offset.
+ case DW_CFA_val_offset_sf:
+ if (!ParseOperands("rs", &ops) ||
+ !DoValOffset(ops.register_number,
+ ops.signed_offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register has been saved in another register.
+ case DW_CFA_register: {
+ if (!ParseOperands("ro", &ops) ||
+ !DoRule(ops.register_number, new RegisterRule(ops.offset)))
+ return false;
+ break;
+ }
+
+ // An expression yields the address at which the register is saved.
+ case DW_CFA_expression: {
+ if (!ParseOperands("re", &ops) ||
+ !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
+ return false;
+ break;
+ }
+
+ // An expression yields the caller's value for the register.
+ case DW_CFA_val_expression: {
+ if (!ParseOperands("re", &ops) ||
+ !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
+ return false;
+ break;
+ }
+
+ // Restore the rule established for a register by the CIE.
+ case DW_CFA_restore_extended:
+ if (!ParseOperands("r", &ops) ||
+ !DoRestore( ops.register_number))
+ return false;
+ break;
+
+ // Save the current set of rules on a stack.
+ case DW_CFA_remember_state:
+ saved_rules_.push(rules_);
+ break;
+
+ // Pop the current set of rules off the stack.
+ case DW_CFA_restore_state: {
+ if (saved_rules_.empty()) {
+ reporter_->EmptyStateStack(entry_->offset, entry_->kind,
+ CursorOffset());
+ return false;
+ }
+ const RuleMap& new_rules = saved_rules_.top();
+ if (rules_.CFARule() && !new_rules.CFARule()) {
+ reporter_->ClearingCFARule(entry_->offset, entry_->kind,
+ CursorOffset());
+ return false;
+ }
+ rules_.HandleTransitionTo(handler_, address_, new_rules);
+ rules_ = new_rules;
+ saved_rules_.pop();
+ break;
+ }
+
+ // No operation. (Padding instruction.)
+ case DW_CFA_nop:
+ break;
+
+ // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
+ // are saved in registers 24 through 31 (%i0-%i7), and registers
+ // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
+ // (0-15 * the register size). The register numbers must be
+ // hard-coded. A GNU extension, and not a pretty one.
+ case DW_CFA_GNU_window_save: {
+ // Save %o0-%o7 in %i0-%i7.
+ for (int i = 8; i < 16; i++)
+ if (!DoRule(i, new RegisterRule(i + 16)))
+ return false;
+ // Save %l0-%l7 and %i0-%i7 at the CFA.
+ for (int i = 16; i < 32; i++)
+ // Assume that the byte reader's address size is the same as
+ // the architecture's register size. !@#%*^ hilarious.
+ if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
+ (i - 16) * reader_->AddressSize())))
+ return false;
+ break;
+ }
+
+ // I'm not sure what this is. GDB doesn't use it for unwinding.
+ case DW_CFA_GNU_args_size:
+ if (!ParseOperands("o", &ops)) return false;
+ break;
+
+ // An opcode we don't recognize.
+ default: {
+ reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
+ Rule* rule = new ValOffsetRule(base_register, offset);
+ rules_.SetCFARule(rule);
+ return rule->Handle(handler_, address_,
+ Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
+ Rule* cfa_rule = rules_.CFARule();
+ if (!cfa_rule) {
+ reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ cfa_rule->SetOffset(offset);
+ return cfa_rule->Handle(handler_, address_,
+ Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoRule(unsigned reg, Rule* rule) {
+ rules_.SetRegisterRule(reg, rule);
+ return rule->Handle(handler_, address_, reg);
+}
+
+bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
+ if (!rules_.CFARule()) {
+ reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ return DoRule(reg,
+ new OffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
+ if (!rules_.CFARule()) {
+ reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ return DoRule(reg,
+ new ValOffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoRestore(unsigned reg) {
+ // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
+ if (entry_->kind == kCIE) {
+ reporter_->RestoreInCIE(entry_->offset, CursorOffset());
+ return false;
+ }
+ Rule* rule = cie_rules_.RegisterRule(reg);
+ if (!rule) {
+ // This isn't really the right thing to do, but since CFI generally
+ // only mentions callee-saves registers, and GCC's convention for
+ // callee-saves registers is that they are unchanged, it's a good
+ // approximation.
+ rule = new SameValueRule();
+ }
+ return DoRule(reg, rule);
+}
+
+bool CallFrameInfo::ReadEntryPrologue(const uint8_t* cursor, Entry* entry) {
+ const uint8_t* buffer_end = buffer_ + buffer_length_;
+
+ // Initialize enough of ENTRY for use in error reporting.
+ entry->offset = cursor - buffer_;
+ entry->start = cursor;
+ entry->kind = kUnknown;
+ entry->end = NULL;
+
+ // Read the initial length. This sets reader_'s offset size.
+ size_t length_size;
+ uint64_t length = reader_->ReadInitialLength(cursor, &length_size);
+ if (length_size > size_t(buffer_end - cursor))
+ return ReportIncomplete(entry);
+ cursor += length_size;
+
+ // In a .eh_frame section, a length of zero marks the end of the series
+ // of entries.
+ if (length == 0 && eh_frame_) {
+ entry->kind = kTerminator;
+ entry->end = cursor;
+ return true;
+ }
+
+ // Validate the length.
+ if (length > size_t(buffer_end - cursor))
+ return ReportIncomplete(entry);
+
+ // The length is the number of bytes after the initial length field;
+ // we have that position handy at this point, so compute the end
+ // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
+ // and the length didn't fit in a size_t, we would have rejected it
+ // above.)
+ entry->end = cursor + length;
+
+ // Parse the next field: either the offset of a CIE or a CIE id.
+ size_t offset_size = reader_->OffsetSize();
+ if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
+ entry->id = reader_->ReadOffset(cursor);
+
+ // Don't advance cursor past id field yet; in .eh_frame data we need
+ // the id's position to compute the section offset of an FDE's CIE.
+
+ // Now we can decide what kind of entry this is.
+ if (eh_frame_) {
+ // In .eh_frame data, an ID of zero marks the entry as a CIE, and
+ // anything else is an offset from the id field of the FDE to the start
+ // of the CIE.
+ if (entry->id == 0) {
+ entry->kind = kCIE;
+ } else {
+ entry->kind = kFDE;
+ // Turn the offset from the id into an offset from the buffer's start.
+ entry->id = (cursor - buffer_) - entry->id;
+ }
+ } else {
+ // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
+ // offset size for the entry) marks the entry as a CIE, and anything
+ // else is the offset of the CIE from the beginning of the section.
+ if (offset_size == 4)
+ entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
+ else {
+ assert(offset_size == 8);
+ entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
+ }
+ }
+
+ // Now advance cursor past the id.
+ cursor += offset_size;
+
+ // The fields specific to this kind of entry start here.
+ entry->fields = cursor;
+
+ entry->cie = NULL;
+
+ return true;
+}
+
+bool CallFrameInfo::ReadCIEFields(CIE* cie) {
+ const uint8_t* cursor = cie->fields;
+ size_t len;
+
+ assert(cie->kind == kCIE);
+
+ // Prepare for early exit.
+ cie->version = 0;
+ cie->augmentation.clear();
+ cie->code_alignment_factor = 0;
+ cie->data_alignment_factor = 0;
+ cie->return_address_register = 0;
+ cie->has_z_augmentation = false;
+ cie->pointer_encoding = DW_EH_PE_absptr;
+ cie->instructions = 0;
+
+ // Parse the version number.
+ if (cie->end - cursor < 1)
+ return ReportIncomplete(cie);
+ cie->version = reader_->ReadOneByte(cursor);
+ cursor++;
+
+ // If we don't recognize the version, we can't parse any more fields of the
+ // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
+ // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
+ // the difference between those versions seems to be the same as for
+ // .debug_frame.
+ if (cie->version < 1 || cie->version > 4) {
+ reporter_->UnrecognizedVersion(cie->offset, cie->version);
+ return false;
+ }
+
+ const uint8_t* augmentation_start = cursor;
+ const uint8_t* augmentation_end =
+ reinterpret_cast<const uint8_t*>(memchr(augmentation_start, '\0',
+ cie->end - augmentation_start));
+ if (! augmentation_end) return ReportIncomplete(cie);
+ cursor = augmentation_end;
+ cie->augmentation = string(reinterpret_cast<const char*>(augmentation_start),
+ cursor - augmentation_start);
+ // Skip the terminating '\0'.
+ cursor++;
+
+ // Is this CFI augmented?
+ if (!cie->augmentation.empty()) {
+ // Is it an augmentation we recognize?
+ if (cie->augmentation[0] == DW_Z_augmentation_start) {
+ // Linux C++ ABI 'z' augmentation, used for exception handling data.
+ cie->has_z_augmentation = true;
+ } else {
+ // Not an augmentation we recognize. Augmentations can have arbitrary
+ // effects on the form of rest of the content, so we have to give up.
+ reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+ return false;
+ }
+ }
+
+ if (cie->version >= 4) {
+ cie->address_size = *cursor++;
+ if (cie->address_size != 8 && cie->address_size != 4) {
+ reporter_->UnexpectedAddressSize(cie->offset, cie->address_size);
+ return false;
+ }
+
+ cie->segment_size = *cursor++;
+ if (cie->segment_size != 0) {
+ reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size);
+ return false;
+ }
+ }
+
+ // Parse the code alignment factor.
+ cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
+ if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+ cursor += len;
+
+ // Parse the data alignment factor.
+ cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
+ if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+ cursor += len;
+
+ // Parse the return address register. This is a ubyte in version 1, and
+ // a ULEB128 in version 3.
+ if (cie->version == 1) {
+ if (cursor >= cie->end) return ReportIncomplete(cie);
+ cie->return_address_register = uint8_t(*cursor++);
+ } else {
+ cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
+ if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+ cursor += len;
+ }
+
+ // If we have a 'z' augmentation string, find the augmentation data and
+ // use the augmentation string to parse it.
+ if (cie->has_z_augmentation) {
+ uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
+ if (size_t(cie->end - cursor) < len + data_size)
+ return ReportIncomplete(cie);
+ cursor += len;
+ const uint8_t* data = cursor;
+ cursor += data_size;
+ const uint8_t* data_end = cursor;
+
+ cie->has_z_lsda = false;
+ cie->has_z_personality = false;
+ cie->has_z_signal_frame = false;
+
+ // Walk the augmentation string, and extract values from the
+ // augmentation data as the string directs.
+ for (size_t i = 1; i < cie->augmentation.size(); i++) {
+ switch (cie->augmentation[i]) {
+ case DW_Z_has_LSDA:
+ // The CIE's augmentation data holds the language-specific data
+ // area pointer's encoding, and the FDE's augmentation data holds
+ // the pointer itself.
+ cie->has_z_lsda = true;
+ // Fetch the LSDA encoding from the augmentation data.
+ if (data >= data_end) return ReportIncomplete(cie);
+ cie->lsda_encoding = DwarfPointerEncoding(*data++);
+ if (!reader_->ValidEncoding(cie->lsda_encoding)) {
+ reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
+ return false;
+ }
+ // Don't check if the encoding is usable here --- we haven't
+ // read the FDE's fields yet, so we're not prepared for
+ // DW_EH_PE_funcrel, although that's a fine encoding for the
+ // LSDA to use, since it appears in the FDE.
+ break;
+
+ case DW_Z_has_personality_routine:
+ // The CIE's augmentation data holds the personality routine
+ // pointer's encoding, followed by the pointer itself.
+ cie->has_z_personality = true;
+ // Fetch the personality routine pointer's encoding from the
+ // augmentation data.
+ if (data >= data_end) return ReportIncomplete(cie);
+ cie->personality_encoding = DwarfPointerEncoding(*data++);
+ if (!reader_->ValidEncoding(cie->personality_encoding)) {
+ reporter_->InvalidPointerEncoding(cie->offset,
+ cie->personality_encoding);
+ return false;
+ }
+ if (!reader_->UsableEncoding(cie->personality_encoding)) {
+ reporter_->UnusablePointerEncoding(cie->offset,
+ cie->personality_encoding);
+ return false;
+ }
+ // Fetch the personality routine's pointer itself from the data.
+ cie->personality_address =
+ reader_->ReadEncodedPointer(data, cie->personality_encoding,
+ &len);
+ if (len > size_t(data_end - data))
+ return ReportIncomplete(cie);
+ data += len;
+ break;
+
+ case DW_Z_has_FDE_address_encoding:
+ // The CIE's augmentation data holds the pointer encoding to use
+ // for addresses in the FDE.
+ if (data >= data_end) return ReportIncomplete(cie);
+ cie->pointer_encoding = DwarfPointerEncoding(*data++);
+ if (!reader_->ValidEncoding(cie->pointer_encoding)) {
+ reporter_->InvalidPointerEncoding(cie->offset,
+ cie->pointer_encoding);
+ return false;
+ }
+ if (!reader_->UsableEncoding(cie->pointer_encoding)) {
+ reporter_->UnusablePointerEncoding(cie->offset,
+ cie->pointer_encoding);
+ return false;
+ }
+ break;
+
+ case DW_Z_is_signal_trampoline:
+ // Frames using this CIE are signal delivery frames.
+ cie->has_z_signal_frame = true;
+ break;
+
+ default:
+ // An augmentation we don't recognize.
+ reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+ return false;
+ }
+ }
+ }
+
+ // The CIE's instructions start here.
+ cie->instructions = cursor;
+
+ return true;
+}
+
+bool CallFrameInfo::ReadFDEFields(FDE* fde) {
+ const uint8_t* cursor = fde->fields;
+ size_t size;
+
+ fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
+ &size);
+ if (size > size_t(fde->end - cursor))
+ return ReportIncomplete(fde);
+ cursor += size;
+ reader_->SetFunctionBase(fde->address);
+
+ // For the length, we strip off the upper nybble of the encoding used for
+ // the starting address.
+ DwarfPointerEncoding length_encoding =
+ DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
+ fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
+ if (size > size_t(fde->end - cursor))
+ return ReportIncomplete(fde);
+ cursor += size;
+
+ // If the CIE has a 'z' augmentation string, then augmentation data
+ // appears here.
+ if (fde->cie->has_z_augmentation) {
+ uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
+ if (size_t(fde->end - cursor) < size + data_size)
+ return ReportIncomplete(fde);
+ cursor += size;
+
+ // In the abstract, we should walk the augmentation string, and extract
+ // items from the FDE's augmentation data as we encounter augmentation
+ // string characters that specify their presence: the ordering of items
+ // in the augmentation string determines the arrangement of values in
+ // the augmentation data.
+ //
+ // In practice, there's only ever one value in FDE augmentation data
+ // that we support --- the LSDA pointer --- and we have to bail if we
+ // see any unrecognized augmentation string characters. So if there is
+ // anything here at all, we know what it is, and where it starts.
+ if (fde->cie->has_z_lsda) {
+ // Check whether the LSDA's pointer encoding is usable now: only once
+ // we've parsed the FDE's starting address do we call reader_->
+ // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
+ // usable.
+ if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
+ reporter_->UnusablePointerEncoding(fde->cie->offset,
+ fde->cie->lsda_encoding);
+ return false;
+ }
+
+ fde->lsda_address =
+ reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
+ if (size > data_size)
+ return ReportIncomplete(fde);
+ // Ideally, we would also complain here if there were unconsumed
+ // augmentation data.
+ }
+
+ cursor += data_size;
+ }
+
+ // The FDE's instructions start after those.
+ fde->instructions = cursor;
+
+ return true;
+}
+
+bool CallFrameInfo::Start() {
+ const uint8_t* buffer_end = buffer_ + buffer_length_;
+ const uint8_t* cursor;
+ bool all_ok = true;
+ const uint8_t* entry_end;
+ bool ok;
+
+ // Traverse all the entries in buffer_, skipping CIEs and offering
+ // FDEs to the handler.
+ for (cursor = buffer_; cursor < buffer_end;
+ cursor = entry_end, all_ok = all_ok && ok) {
+ FDE fde;
+
+ // Make it easy to skip this entry with 'continue': assume that
+ // things are not okay until we've checked all the data, and
+ // prepare the address of the next entry.
+ ok = false;
+
+ // Read the entry's prologue.
+ if (!ReadEntryPrologue(cursor, &fde)) {
+ if (!fde.end) {
+ // If we couldn't even figure out this entry's extent, then we
+ // must stop processing entries altogether.
+ all_ok = false;
+ break;
+ }
+ entry_end = fde.end;
+ continue;
+ }
+
+ // The next iteration picks up after this entry.
+ entry_end = fde.end;
+
+ // Did we see an .eh_frame terminating mark?
+ if (fde.kind == kTerminator) {
+ // If there appears to be more data left in the section after the
+ // terminating mark, warn the user. But this is just a warning;
+ // we leave all_ok true.
+ if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
+ break;
+ }
+
+ // In this loop, we skip CIEs. We only parse them fully when we
+ // parse an FDE that refers to them. This limits our memory
+ // consumption (beyond the buffer itself) to that needed to
+ // process the largest single entry.
+ if (fde.kind != kFDE) {
+ ok = true;
+ continue;
+ }
+
+ // Validate the CIE pointer.
+ if (fde.id > buffer_length_) {
+ reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
+ continue;
+ }
+
+ CIE cie;
+
+ // Parse this FDE's CIE header.
+ if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
+ continue;
+ // This had better be an actual CIE.
+ if (cie.kind != kCIE) {
+ reporter_->BadCIEId(fde.offset, fde.id);
+ continue;
+ }
+ if (!ReadCIEFields(&cie))
+ continue;
+
+ // TODO(nbilling): This could lead to strange behavior if a single buffer
+ // contained a mixture of DWARF versions as well as address sizes. Not
+ // sure if it's worth handling such a case.
+
+ // DWARF4 CIE specifies address_size, so use it for this call frame.
+ if (cie.version >= 4) {
+ reader_->SetAddressSize(cie.address_size);
+ }
+
+ // We now have the values that govern both the CIE and the FDE.
+ cie.cie = &cie;
+ fde.cie = &cie;
+
+ // Parse the FDE's header.
+ if (!ReadFDEFields(&fde))
+ continue;
+
+ // Call Entry to ask the consumer if they're interested.
+ if (!handler_->Entry(fde.offset, fde.address, fde.size,
+ cie.version, cie.augmentation,
+ cie.return_address_register)) {
+ // The handler isn't interested in this entry. That's not an error.
+ ok = true;
+ continue;
+ }
+
+ if (cie.has_z_augmentation) {
+ // Report the personality routine address, if we have one.
+ if (cie.has_z_personality) {
+ if (!handler_
+ ->PersonalityRoutine(cie.personality_address,
+ IsIndirectEncoding(cie.personality_encoding)))
+ continue;
+ }
+
+ // Report the language-specific data area address, if we have one.
+ if (cie.has_z_lsda) {
+ if (!handler_
+ ->LanguageSpecificDataArea(fde.lsda_address,
+ IsIndirectEncoding(cie.lsda_encoding)))
+ continue;
+ }
+
+ // If this is a signal-handling frame, report that.
+ if (cie.has_z_signal_frame) {
+ if (!handler_->SignalHandler())
+ continue;
+ }
+ }
+
+ // Interpret the CIE's instructions, and then the FDE's instructions.
+ State state(reader_, handler_, reporter_, fde.address);
+ ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
+
+ // Tell the ByteReader that the function start address from the
+ // FDE header is no longer valid.
+ reader_->ClearFunctionBase();
+
+ // Report the end of the entry.
+ handler_->End();
+ }
+
+ return all_ok;
+}
+
+const char* CallFrameInfo::KindName(EntryKind kind) {
+ if (kind == CallFrameInfo::kUnknown)
+ return "entry";
+ else if (kind == CallFrameInfo::kCIE)
+ return "common information entry";
+ else if (kind == CallFrameInfo::kFDE)
+ return "frame description entry";
+ else {
+ assert (kind == CallFrameInfo::kTerminator);
+ return ".eh_frame sequence terminator";
+ }
+}
+
+bool CallFrameInfo::ReportIncomplete(Entry* entry) {
+ reporter_->Incomplete(entry->offset, entry->kind);
+ return false;
+}
+
+void CallFrameInfo::Reporter::Incomplete(uint64_t offset,
+ CallFrameInfo::EntryKind kind) {
+ fprintf(stderr,
+ "%s: CFI %s at offset 0x%" PRIx64 " in '%s': entry ends early\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str());
+}
+
+void CallFrameInfo::Reporter::EarlyEHTerminator(uint64_t offset) {
+ fprintf(stderr,
+ "%s: CFI at offset 0x%" PRIx64 " in '%s': saw end-of-data marker"
+ " before end of section contents\n",
+ filename_.c_str(), offset, section_.c_str());
+}
+
+void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64_t offset,
+ uint64_t cie_offset) {
+ fprintf(stderr,
+ "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
+ " CIE pointer is out of range: 0x%" PRIx64 "\n",
+ filename_.c_str(), offset, section_.c_str(), cie_offset);
+}
+
+void CallFrameInfo::Reporter::BadCIEId(uint64_t offset, uint64_t cie_offset) {
+ fprintf(stderr,
+ "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
+ " CIE pointer does not point to a CIE: 0x%" PRIx64 "\n",
+ filename_.c_str(), offset, section_.c_str(), cie_offset);
+}
+
+void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64_t offset,
+ uint8_t address_size) {
+ fprintf(stderr,
+ "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
+ " CIE specifies unexpected address size: %d\n",
+ filename_.c_str(), offset, section_.c_str(), address_size);
+}
+
+void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64_t offset,
+ uint8_t segment_size) {
+ fprintf(stderr,
+ "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
+ " CIE specifies unexpected segment size: %d\n",
+ filename_.c_str(), offset, section_.c_str(), segment_size);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedVersion(uint64_t offset, int version) {
+ fprintf(stderr,
+ "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
+ " CIE specifies unrecognized version: %d\n",
+ filename_.c_str(), offset, section_.c_str(), version);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64_t offset,
+ const string& aug) {
+ fprintf(stderr,
+ "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
+ " CIE specifies unrecognized augmentation: '%s'\n",
+ filename_.c_str(), offset, section_.c_str(), aug.c_str());
+}
+
+void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64_t offset,
+ uint8_t encoding) {
+ fprintf(stderr,
+ "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
+ " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
+ filename_.c_str(), offset, section_.c_str(), encoding);
+}
+
+void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64_t offset,
+ uint8_t encoding) {
+ fprintf(stderr,
+ "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
+ " 'z' augmentation specifies a pointer encoding for which"
+ " we have no base address: 0x%02x\n",
+ filename_.c_str(), offset, section_.c_str(), encoding);
+}
+
+void CallFrameInfo::Reporter::RestoreInCIE(uint64_t offset, uint64_t insn_offset) {
+ fprintf(stderr,
+ "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
+ " the DW_CFA_restore instruction at offset 0x%" PRIx64
+ " cannot be used in a common information entry\n",
+ filename_.c_str(), offset, section_.c_str(), insn_offset);
+}
+
+void CallFrameInfo::Reporter::BadInstruction(uint64_t offset,
+ CallFrameInfo::EntryKind kind,
+ uint64_t insn_offset) {
+ fprintf(stderr,
+ "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
+ " the instruction at offset 0x%" PRIx64 " is unrecognized\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind),
+ offset, section_.c_str(), insn_offset);
+}
+
+void CallFrameInfo::Reporter::NoCFARule(uint64_t offset,
+ CallFrameInfo::EntryKind kind,
+ uint64_t insn_offset) {
+ fprintf(stderr,
+ "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
+ " the instruction at offset 0x%" PRIx64 " assumes that a CFA rule has"
+ " been set, but none has been set\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str(), insn_offset);
+}
+
+void CallFrameInfo::Reporter::EmptyStateStack(uint64_t offset,
+ CallFrameInfo::EntryKind kind,
+ uint64_t insn_offset) {
+ fprintf(stderr,
+ "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
+ " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
+ " should pop a saved state from the stack, but the stack is empty\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str(), insn_offset);
+}
+
+void CallFrameInfo::Reporter::ClearingCFARule(uint64_t offset,
+ CallFrameInfo::EntryKind kind,
+ uint64_t insn_offset) {
+ fprintf(stderr,
+ "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
+ " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
+ " would clear the CFA rule in effect\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str(), insn_offset);
+}
+
+} // namespace google_breakpad
diff --git a/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.h b/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.h
new file mode 100644
index 0000000000..97e5fea90a
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/dwarf2reader.h
@@ -0,0 +1,1494 @@
+// -*- mode: C++ -*-
+
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file contains definitions related to the DWARF2/3 reader and
+// it's handler interfaces.
+// The DWARF2/3 specification can be found at
+// http://dwarf.freestandards.org and should be considered required
+// reading if you wish to modify the implementation.
+// Only a cursory attempt is made to explain terminology that is
+// used here, as it is much better explained in the standard documents
+#ifndef COMMON_DWARF_DWARF2READER_H__
+#define COMMON_DWARF_DWARF2READER_H__
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <list>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+#include <memory>
+
+#include "common/dwarf/bytereader.h"
+#include "common/dwarf/dwarf2enums.h"
+#include "common/dwarf/types.h"
+#include "common/using_std_string.h"
+#include "common/dwarf/elf_reader.h"
+
+namespace google_breakpad {
+struct LineStateMachine;
+class Dwarf2Handler;
+class LineInfoHandler;
+class DwpReader;
+
+// This maps from a string naming a section to a pair containing a
+// the data for the section, and the size of the section.
+typedef std::map<string, std::pair<const uint8_t*, uint64_t> > SectionMap;
+
+// Abstract away the difference between elf and mach-o section names.
+// Elf-names use ".section_name, mach-o uses "__section_name". Pass "name" in
+// the elf form, ".section_name".
+const SectionMap::const_iterator GetSectionByName(const SectionMap&
+ sections, const char* name);
+
+// Most of the time, this struct functions as a simple attribute and form pair.
+// However, Dwarf5 DW_FORM_implicit_const means that a form may have its value
+// in line in the abbrev table, and that value must be associated with the
+// pair until the attr's value is needed.
+struct AttrForm {
+ AttrForm(enum DwarfAttribute attr, enum DwarfForm form, uint64_t value) :
+ attr_(attr), form_(form), value_(value) { }
+
+ enum DwarfAttribute attr_;
+ enum DwarfForm form_;
+ uint64_t value_;
+};
+typedef std::list<AttrForm> AttributeList;
+typedef AttributeList::iterator AttributeIterator;
+typedef AttributeList::const_iterator ConstAttributeIterator;
+
+struct LineInfoHeader {
+ uint64_t total_length;
+ uint16_t version;
+ uint64_t prologue_length;
+ uint8_t min_insn_length; // insn stands for instructin
+ bool default_is_stmt; // stmt stands for statement
+ int8_t line_base;
+ uint8_t line_range;
+ uint8_t opcode_base;
+ // Use a pointer so that signalsafe_addr2line is able to use this structure
+ // without heap allocation problem.
+ std::vector<unsigned char>* std_opcode_lengths;
+};
+
+class LineInfo {
+ public:
+
+ // Initializes a .debug_line reader. Buffer and buffer length point
+ // to the beginning and length of the line information to read.
+ // Reader is a ByteReader class that has the endianness set
+ // properly.
+ LineInfo(const uint8_t* buffer, uint64_t buffer_length,
+ ByteReader* reader, const uint8_t* string_buffer,
+ size_t string_buffer_length, const uint8_t* line_string_buffer,
+ size_t line_string_buffer_length, LineInfoHandler* handler);
+
+ virtual ~LineInfo() {
+ if (header_.std_opcode_lengths) {
+ delete header_.std_opcode_lengths;
+ }
+ }
+
+ // Start processing line info, and calling callbacks in the handler.
+ // Consumes the line number information for a single compilation unit.
+ // Returns the number of bytes processed.
+ uint64_t Start();
+
+ // Process a single line info opcode at START using the state
+ // machine at LSM. Return true if we should define a line using the
+ // current state of the line state machine. Place the length of the
+ // opcode in LEN.
+ // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm
+ // passes the address of PC. In other words, LSM_PASSES_PC will be
+ // set to true, if the following condition is met.
+ //
+ // lsm's old address < PC <= lsm's new address
+ static bool ProcessOneOpcode(ByteReader* reader,
+ LineInfoHandler* handler,
+ const struct LineInfoHeader& header,
+ const uint8_t* start,
+ struct LineStateMachine* lsm,
+ size_t* len,
+ uintptr pc,
+ bool* lsm_passes_pc);
+
+ private:
+ // Reads the DWARF2/3 header for this line info.
+ void ReadHeader();
+
+ // Reads the DWARF2/3 line information
+ void ReadLines();
+
+ // Read the DWARF5 types and forms for the file and directory tables.
+ void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types,
+ uint32_t* content_forms, uint32_t max_types,
+ uint32_t* format_count);
+
+ // Read a row from the dwarf5 LineInfo file table.
+ void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types,
+ const uint32_t* content_forms, uint32_t row,
+ uint32_t format_count);
+
+ // Read and return the data at *lineptr according to form. Advance
+ // *lineptr appropriately.
+ uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr);
+
+ // Read and return the data at *lineptr according to form. Advance
+ // *lineptr appropriately.
+ const char* ReadStringForm(uint32_t form, const uint8_t** lineptr);
+
+ // The associated handler to call processing functions in
+ LineInfoHandler* handler_;
+
+ // The associated ByteReader that handles endianness issues for us
+ ByteReader* reader_;
+
+ // A DWARF line info header. This is not the same size as in the actual file,
+ // as the one in the file may have a 32 bit or 64 bit lengths
+
+ struct LineInfoHeader header_;
+
+ // buffer is the buffer for our line info, starting at exactly where
+ // the line info to read is. after_header is the place right after
+ // the end of the line information header.
+ const uint8_t* buffer_;
+#ifndef NDEBUG
+ uint64_t buffer_length_;
+#endif
+ // Convenience pointers into .debug_str and .debug_line_str. These exactly
+ // correspond to those in the compilation unit.
+ const uint8_t* string_buffer_;
+#ifndef NDEBUG
+ uint64_t string_buffer_length_;
+#endif
+ const uint8_t* line_string_buffer_;
+#ifndef NDEBUG
+ uint64_t line_string_buffer_length_;
+#endif
+
+ const uint8_t* after_header_;
+};
+
+// This class is the main interface between the line info reader and
+// the client. The virtual functions inside this get called for
+// interesting events that happen during line info reading. The
+// default implementation does nothing
+
+class LineInfoHandler {
+ public:
+ LineInfoHandler() { }
+
+ virtual ~LineInfoHandler() { }
+
+ // Called when we define a directory. NAME is the directory name,
+ // DIR_NUM is the directory number
+ virtual void DefineDir(const string& name, uint32_t dir_num) { }
+
+ // Called when we define a filename. NAME is the filename, FILE_NUM
+ // is the file number which is -1 if the file index is the next
+ // index after the last numbered index (this happens when files are
+ // dynamically defined by the line program), DIR_NUM is the
+ // directory index for the directory name of this file, MOD_TIME is
+ // the modification time of the file, and LENGTH is the length of
+ // the file
+ virtual void DefineFile(const string& name, int32_t file_num,
+ uint32_t dir_num, uint64_t mod_time,
+ uint64_t length) { }
+
+ // Called when the line info reader has a new line, address pair
+ // ready for us. ADDRESS is the address of the code, LENGTH is the
+ // length of its machine code in bytes, FILE_NUM is the file number
+ // containing the code, LINE_NUM is the line number in that file for
+ // the code, and COLUMN_NUM is the column number the code starts at,
+ // if we know it (0 otherwise).
+ virtual void AddLine(uint64_t address, uint64_t length,
+ uint32_t file_num, uint32_t line_num, uint32_t column_num) { }
+};
+
+class RangeListHandler {
+ public:
+ RangeListHandler() { }
+
+ virtual ~RangeListHandler() { }
+
+ // Add a range.
+ virtual void AddRange(uint64_t begin, uint64_t end) { };
+
+ // Finish processing the range list.
+ virtual void Finish() { };
+};
+
+class RangeListReader {
+ public:
+ // Reading a range list requires quite a bit of information
+ // from the compilation unit. Package it conveniently.
+ struct CURangesInfo {
+ CURangesInfo() :
+ version_(0), base_address_(0), ranges_base_(0),
+ buffer_(nullptr), size_(0), addr_buffer_(nullptr),
+ addr_buffer_size_(0), addr_base_(0) { }
+
+ uint16_t version_;
+ // Ranges base address. Ordinarily the CU's low_pc.
+ uint64_t base_address_;
+ // Offset into .debug_rnglists for this CU's rangelists.
+ uint64_t ranges_base_;
+ // Contents of either .debug_ranges or .debug_rnglists.
+ const uint8_t* buffer_;
+ uint64_t size_;
+ // Contents of .debug_addr. This cu's contribution starts at
+ // addr_base_
+ const uint8_t* addr_buffer_;
+ uint64_t addr_buffer_size_;
+ uint64_t addr_base_;
+ };
+
+ RangeListReader(ByteReader* reader, CURangesInfo* cu_info,
+ RangeListHandler* handler) :
+ reader_(reader), cu_info_(cu_info), handler_(handler),
+ offset_array_(0) { }
+
+ // Read ranges from cu_info as specified by form and data.
+ bool ReadRanges(enum DwarfForm form, uint64_t data);
+
+ private:
+ // Read dwarf4 .debug_ranges at offset.
+ bool ReadDebugRanges(uint64_t offset);
+ // Read dwarf5 .debug_rngslist at offset.
+ bool ReadDebugRngList(uint64_t offset);
+
+ // Convenience functions to handle the mechanics of reading entries in the
+ // ranges section.
+ uint64_t ReadULEB(uint64_t offset, uint64_t* value) {
+ size_t len;
+ *value = reader_->ReadUnsignedLEB128(cu_info_->buffer_ + offset, &len);
+ return len;
+ }
+
+ uint64_t ReadAddress(uint64_t offset, uint64_t* value) {
+ *value = reader_->ReadAddress(cu_info_->buffer_ + offset);
+ return reader_->AddressSize();
+ }
+
+ // Read the address at this CU's addr_index in the .debug_addr section.
+ uint64_t GetAddressAtIndex(uint64_t addr_index) {
+ assert(cu_info_->addr_buffer_ != nullptr);
+ uint64_t offset =
+ cu_info_->addr_base_ + addr_index * reader_->AddressSize();
+ assert(offset < cu_info_->addr_buffer_size_);
+ return reader_->ReadAddress(cu_info_->addr_buffer_ + offset);
+ }
+
+ ByteReader* reader_;
+ CURangesInfo* cu_info_;
+ RangeListHandler* handler_;
+ uint64_t offset_array_;
+};
+
+// This class is the main interface between the reader and the
+// client. The virtual functions inside this get called for
+// interesting events that happen during DWARF2 reading.
+// The default implementation skips everything.
+class Dwarf2Handler {
+ public:
+ Dwarf2Handler() { }
+
+ virtual ~Dwarf2Handler() { }
+
+ // Start to process a compilation unit at OFFSET from the beginning of the
+ // .debug_info section. Return false if you would like to skip this
+ // compilation unit.
+ virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size,
+ uint8_t offset_size, uint64_t cu_length,
+ uint8_t dwarf_version) { return false; }
+
+ // When processing a skeleton compilation unit, resulting from a split
+ // DWARF compilation, once the skeleton debug info has been read,
+ // the reader will call this function to ask the client if it needs
+ // the full debug info from the .dwo or .dwp file. Return true if
+ // you need it, or false to skip processing the split debug info.
+ virtual bool NeedSplitDebugInfo() { return true; }
+
+ // Start to process a split compilation unit at OFFSET from the beginning of
+ // the debug_info section in the .dwp/.dwo file. Return false if you would
+ // like to skip this compilation unit.
+ virtual bool StartSplitCompilationUnit(uint64_t offset,
+ uint64_t cu_length) { return false; }
+
+ // Start to process a DIE at OFFSET from the beginning of the .debug_info
+ // section. Return false if you would like to skip this DIE.
+ virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; }
+
+ // Called when we have an attribute with unsigned data to give to our
+ // handler. The attribute is for the DIE at OFFSET from the beginning of the
+ // .debug_info section. Its name is ATTR, its form is FORM, and its value is
+ // DATA.
+ virtual void ProcessAttributeUnsigned(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data) { }
+
+ // Called when we have an attribute with signed data to give to our handler.
+ // The attribute is for the DIE at OFFSET from the beginning of the
+ // .debug_info section. Its name is ATTR, its form is FORM, and its value is
+ // DATA.
+ virtual void ProcessAttributeSigned(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ int64_t data) { }
+
+ // Called when we have an attribute whose value is a reference to
+ // another DIE. The attribute belongs to the DIE at OFFSET from the
+ // beginning of the .debug_info section. Its name is ATTR, its form
+ // is FORM, and the offset of the DIE being referred to from the
+ // beginning of the .debug_info section is DATA.
+ virtual void ProcessAttributeReference(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data) { }
+
+ // Called when we have an attribute with a buffer of data to give to our
+ // handler. The attribute is for the DIE at OFFSET from the beginning of the
+ // .debug_info section. Its name is ATTR, its form is FORM, DATA points to
+ // the buffer's contents, and its length in bytes is LENGTH. The buffer is
+ // owned by the caller, not the callee, and may not persist for very long.
+ // If you want the data to be available later, it needs to be copied.
+ virtual void ProcessAttributeBuffer(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const uint8_t* data,
+ uint64_t len) { }
+
+ // Called when we have an attribute with string data to give to our handler.
+ // The attribute is for the DIE at OFFSET from the beginning of the
+ // .debug_info section. Its name is ATTR, its form is FORM, and its value is
+ // DATA.
+ virtual void ProcessAttributeString(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const string& data) { }
+
+ // Called when we have an attribute whose value is the 64-bit signature
+ // of a type unit in the .debug_types section. OFFSET is the offset of
+ // the DIE whose attribute we're reporting. ATTR and FORM are the
+ // attribute's name and form. SIGNATURE is the type unit's signature.
+ virtual void ProcessAttributeSignature(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t signature) { }
+
+ // Called when finished processing the DIE at OFFSET.
+ // Because DWARF2/3 specifies a tree of DIEs, you may get starts
+ // before ends of the previous DIE, as we process children before
+ // ending the parent.
+ virtual void EndDIE(uint64_t offset) { }
+
+};
+
+// The base of DWARF2/3 debug info is a DIE (Debugging Information
+// Entry.
+// DWARF groups DIE's into a tree and calls the root of this tree a
+// "compilation unit". Most of the time, there is one compilation
+// unit in the .debug_info section for each file that had debug info
+// generated.
+// Each DIE consists of
+
+// 1. a tag specifying a thing that is being described (ie
+// DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc
+// 2. attributes (such as DW_AT_location for location in memory,
+// DW_AT_name for name), and data for each attribute.
+// 3. A flag saying whether the DIE has children or not
+
+// In order to gain some amount of compression, the format of
+// each DIE (tag name, attributes and data forms for the attributes)
+// are stored in a separate table called the "abbreviation table".
+// This is done because a large number of DIEs have the exact same tag
+// and list of attributes, but different data for those attributes.
+// As a result, the .debug_info section is just a stream of data, and
+// requires reading of the .debug_abbrev section to say what the data
+// means.
+
+// As a warning to the user, it should be noted that the reason for
+// using absolute offsets from the beginning of .debug_info is that
+// DWARF2/3 supports referencing DIE's from other DIE's by their offset
+// from either the current compilation unit start, *or* the beginning
+// of the .debug_info section. This means it is possible to reference
+// a DIE in one compilation unit from a DIE in another compilation
+// unit. This style of reference is usually used to eliminate
+// duplicated information that occurs across compilation
+// units, such as base types, etc. GCC 3.4+ support this with
+// -feliminate-dwarf2-dups. Other toolchains will sometimes do
+// duplicate elimination in the linker.
+
+class CompilationUnit {
+ public:
+
+ // Initialize a compilation unit. This requires a map of sections,
+ // the offset of this compilation unit in the .debug_info section, a
+ // ByteReader, and a Dwarf2Handler class to call callbacks in.
+ CompilationUnit(const string& path, const SectionMap& sections,
+ uint64_t offset, ByteReader* reader, Dwarf2Handler* handler);
+ virtual ~CompilationUnit() {
+ if (abbrevs_) delete abbrevs_;
+ }
+
+ // Initialize a compilation unit from a .dwo or .dwp file.
+ // In this case, we need the .debug_addr section from the
+ // executable file that contains the corresponding skeleton
+ // compilation unit. We also inherit the Dwarf2Handler from
+ // the executable file, and call it as if we were still
+ // processing the original compilation unit.
+ void SetSplitDwarf(const uint8_t* addr_buffer, uint64_t addr_buffer_length,
+ uint64_t addr_base, uint64_t ranges_base, uint64_t dwo_id);
+
+ // Begin reading a Dwarf2 compilation unit, and calling the
+ // callbacks in the Dwarf2Handler
+
+ // Return the full length of the compilation unit, including
+ // headers. This plus the starting offset passed to the constructor
+ // is the offset of the end of the compilation unit --- and the
+ // start of the next compilation unit, if there is one.
+ uint64_t Start();
+
+ private:
+
+ // This struct represents a single DWARF2/3 abbreviation
+ // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a
+ // tag and a list of attributes, as well as the data form of each attribute.
+ struct Abbrev {
+ uint64_t number;
+ enum DwarfTag tag;
+ bool has_children;
+ AttributeList attributes;
+ };
+
+ // A DWARF2/3 compilation unit header. This is not the same size as
+ // in the actual file, as the one in the file may have a 32 bit or
+ // 64 bit length.
+ struct CompilationUnitHeader {
+ uint64_t length;
+ uint16_t version;
+ uint64_t abbrev_offset;
+ uint8_t address_size;
+ } header_;
+
+ // Reads the DWARF2/3 header for this compilation unit.
+ void ReadHeader();
+
+ // Reads the DWARF2/3 abbreviations for this compilation unit
+ void ReadAbbrevs();
+
+ // Read the abbreviation offset for this compilation unit
+ size_t ReadAbbrevOffset(const uint8_t* headerptr);
+
+ // Read the address size for this compilation unit
+ size_t ReadAddressSize(const uint8_t* headerptr);
+
+ // Read the DWO id from a split or skeleton compilation unit header
+ size_t ReadDwoId(const uint8_t* headerptr);
+
+ // Read the type signature from a type or split type compilation unit header
+ size_t ReadTypeSignature(const uint8_t* headerptr);
+
+ // Read the DWO id from a split or skeleton compilation unit header
+ size_t ReadTypeOffset(const uint8_t* headerptr);
+
+ // Processes a single DIE for this compilation unit and return a new
+ // pointer just past the end of it
+ const uint8_t* ProcessDIE(uint64_t dieoffset,
+ const uint8_t* start,
+ const Abbrev& abbrev);
+
+ // Processes a single attribute and return a new pointer just past the
+ // end of it
+ const uint8_t* ProcessAttribute(uint64_t dieoffset,
+ const uint8_t* start,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t implicit_const);
+
+ // Special version of ProcessAttribute, for finding str_offsets_base and
+ // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5.
+ const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset,
+ const uint8_t* start,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t implicit_const);
+
+ // Called when we have an attribute with unsigned data to give to
+ // our handler. The attribute is for the DIE at OFFSET from the
+ // beginning of compilation unit, has a name of ATTR, a form of
+ // FORM, and the actual data of the attribute is in DATA.
+ // If we see a DW_AT_GNU_dwo_id attribute, save the value so that
+ // we can find the debug info in a .dwo or .dwp file.
+ void ProcessAttributeUnsigned(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t data) {
+ if (attr == DW_AT_GNU_dwo_id) {
+ dwo_id_ = data;
+ }
+ else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) {
+ addr_base_ = data;
+ }
+ else if (attr == DW_AT_str_offsets_base) {
+ str_offsets_base_ = data;
+ }
+ else if (attr == DW_AT_GNU_ranges_base || attr == DW_AT_rnglists_base) {
+ ranges_base_ = data;
+ }
+ // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5,
+ // that base will apply to DW_AT_ranges attributes in the
+ // skeleton CU as well as in the .dwo/.dwp files.
+ else if (attr == DW_AT_ranges && is_split_dwarf_) {
+ data += ranges_base_;
+ }
+ handler_->ProcessAttributeUnsigned(offset, attr, form, data);
+ }
+
+ // Called when we have an attribute with signed data to give to
+ // our handler. The attribute is for the DIE at OFFSET from the
+ // beginning of compilation unit, has a name of ATTR, a form of
+ // FORM, and the actual data of the attribute is in DATA.
+ void ProcessAttributeSigned(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ int64_t data) {
+ handler_->ProcessAttributeSigned(offset, attr, form, data);
+ }
+
+ // Called when we have an attribute with a buffer of data to give to
+ // our handler. The attribute is for the DIE at OFFSET from the
+ // beginning of compilation unit, has a name of ATTR, a form of
+ // FORM, and the actual data of the attribute is in DATA, and the
+ // length of the buffer is LENGTH.
+ void ProcessAttributeBuffer(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const uint8_t* data,
+ uint64_t len) {
+ handler_->ProcessAttributeBuffer(offset, attr, form, data, len);
+ }
+
+ // Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx,
+ // DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4.
+ // Retrieves the data and calls through to ProcessAttributeString.
+ void ProcessFormStringIndex(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t str_index);
+
+ // Called when we have an attribute with string data to give to
+ // our handler. The attribute is for the DIE at OFFSET from the
+ // beginning of compilation unit, has a name of ATTR, a form of
+ // FORM, and the actual data of the attribute is in DATA.
+ // If we see a DW_AT_GNU_dwo_name attribute, save the value so
+ // that we can find the debug info in a .dwo or .dwp file.
+ void ProcessAttributeString(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const char* data) {
+ if (attr == DW_AT_GNU_dwo_name || attr == DW_AT_dwo_name)
+ dwo_name_ = data;
+ handler_->ProcessAttributeString(offset, attr, form, data);
+ }
+
+ // Called to handle common portions of DW_FORM_addrx and variations, as well
+ // as DW_FORM_GNU_addr_index.
+ void ProcessAttributeAddrIndex(uint64_t offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64_t addr_index) {
+ const uint8_t* addr_ptr =
+ addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize();
+ ProcessAttributeUnsigned(
+ offset, attr, form, reader_->ReadAddress(addr_ptr));
+ }
+
+ // Processes all DIEs for this compilation unit
+ void ProcessDIEs();
+
+ // Skips the die with attributes specified in ABBREV starting at
+ // START, and return the new place to position the stream to.
+ const uint8_t* SkipDIE(const uint8_t* start, const Abbrev& abbrev);
+
+ // Skips the attribute starting at START, with FORM, and return the
+ // new place to position the stream to.
+ const uint8_t* SkipAttribute(const uint8_t* start, enum DwarfForm form);
+
+ // Process the actual debug information in a split DWARF file.
+ void ProcessSplitDwarf();
+
+ // Read the debug sections from a .dwo file.
+ void ReadDebugSectionsFromDwo(ElfReader* elf_reader,
+ SectionMap* sections);
+
+ // Path of the file containing the debug information.
+ const string path_;
+
+ // Offset from section start is the offset of this compilation unit
+ // from the beginning of the .debug_info section.
+ uint64_t offset_from_section_start_;
+
+ // buffer is the buffer for our CU, starting at .debug_info + offset
+ // passed in from constructor.
+ // after_header points to right after the compilation unit header.
+ const uint8_t* buffer_;
+ uint64_t buffer_length_;
+ const uint8_t* after_header_;
+
+ // The associated ByteReader that handles endianness issues for us
+ ByteReader* reader_;
+
+ // The map of sections in our file to buffers containing their data
+ const SectionMap& sections_;
+
+ // The associated handler to call processing functions in
+ Dwarf2Handler* handler_;
+
+ // Set of DWARF2/3 abbreviations for this compilation unit. Indexed
+ // by abbreviation number, which means that abbrevs_[0] is not
+ // valid.
+ std::vector<Abbrev>* abbrevs_;
+
+ // String section buffer and length, if we have a string section.
+ // This is here to avoid doing a section lookup for strings in
+ // ProcessAttribute, which is in the hot path for DWARF2 reading.
+ const uint8_t* string_buffer_;
+ uint64_t string_buffer_length_;
+
+ // Similarly for .debug_line_string.
+ const uint8_t* line_string_buffer_;
+ uint64_t line_string_buffer_length_;
+
+ // String offsets section buffer and length, if we have a string offsets
+ // section (.debug_str_offsets or .debug_str_offsets.dwo).
+ const uint8_t* str_offsets_buffer_;
+ uint64_t str_offsets_buffer_length_;
+
+ // Address section buffer and length, if we have an address section
+ // (.debug_addr).
+ const uint8_t* addr_buffer_;
+ uint64_t addr_buffer_length_;
+
+ // Flag indicating whether this compilation unit is part of a .dwo
+ // or .dwp file. If true, we are reading this unit because a
+ // skeleton compilation unit in an executable file had a
+ // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute.
+ // In a .dwo file, we expect the string offsets section to
+ // have a ".dwo" suffix, and we will use the ".debug_addr" section
+ // associated with the skeleton compilation unit.
+ bool is_split_dwarf_;
+
+ // Flag indicating if it's a Type Unit (only applicable to DWARF v5).
+ bool is_type_unit_;
+
+ // The value of the DW_AT_GNU_dwo_id attribute, if any.
+ uint64_t dwo_id_;
+
+ // The value of the DW_AT_GNU_type_signature attribute, if any.
+ uint64_t type_signature_;
+
+ // The value of the DW_AT_GNU_type_offset attribute, if any.
+ size_t type_offset_;
+
+ // The value of the DW_AT_GNU_dwo_name attribute, if any.
+ const char* dwo_name_;
+
+ // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute
+ // from the skeleton CU.
+ uint64_t skeleton_dwo_id_;
+
+ // The value of the DW_AT_GNU_ranges_base or DW_AT_rnglists_base attribute,
+ // if any.
+ uint64_t ranges_base_;
+
+ // The value of the DW_AT_GNU_addr_base attribute, if any.
+ uint64_t addr_base_;
+
+ // The value of DW_AT_str_offsets_base attribute, if any.
+ uint64_t str_offsets_base_;
+
+ // True if we have already looked for a .dwp file.
+ bool have_checked_for_dwp_;
+
+ // Path to the .dwp file.
+ string dwp_path_;
+
+ // ByteReader for the DWP file.
+ std::unique_ptr<ByteReader> dwp_byte_reader_;
+
+ // DWP reader.
+ std::unique_ptr<DwpReader> dwp_reader_;
+};
+
+// A Reader for a .dwp file. Supports the fetching of DWARF debug
+// info for a given dwo_id.
+//
+// There are two versions of .dwp files. In both versions, the
+// .dwp file is an ELF file containing only debug sections.
+// In Version 1, the file contains many copies of each debug
+// section, one for each .dwo file that is packaged in the .dwp
+// file, and the .debug_cu_index section maps from the dwo_id
+// to a set of section indexes. In Version 2, the file contains
+// one of each debug section, and the .debug_cu_index section
+// maps from the dwo_id to a set of offsets and lengths that
+// identify each .dwo file's contribution to the larger sections.
+
+class DwpReader {
+ public:
+ DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader);
+
+ ~DwpReader();
+
+ // Read the CU index and initialize data members.
+ void Initialize();
+
+ // Read the debug sections for the given dwo_id.
+ void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections);
+
+ private:
+ // Search a v1 hash table for "dwo_id". Returns the slot index
+ // where the dwo_id was found, or -1 if it was not found.
+ int LookupCU(uint64_t dwo_id);
+
+ // Search a v2 hash table for "dwo_id". Returns the row index
+ // in the offsets and sizes tables, or 0 if it was not found.
+ uint32_t LookupCUv2(uint64_t dwo_id);
+
+ // The ELF reader for the .dwp file.
+ ElfReader* elf_reader_;
+
+ // The ByteReader for the .dwp file.
+ const ByteReader& byte_reader_;
+
+ // Pointer to the .debug_cu_index section.
+ const char* cu_index_;
+
+ // Size of the .debug_cu_index section.
+ size_t cu_index_size_;
+
+ // Pointer to the .debug_str.dwo section.
+ const char* string_buffer_;
+
+ // Size of the .debug_str.dwo section.
+ size_t string_buffer_size_;
+
+ // Version of the .dwp file. We support versions 1 and 2 currently.
+ int version_;
+
+ // Number of columns in the section tables (version 2).
+ unsigned int ncolumns_;
+
+ // Number of units in the section tables (version 2).
+ unsigned int nunits_;
+
+ // Number of slots in the hash table.
+ unsigned int nslots_;
+
+ // Pointer to the beginning of the hash table.
+ const char* phash_;
+
+ // Pointer to the beginning of the index table.
+ const char* pindex_;
+
+ // Pointer to the beginning of the section index pool (version 1).
+ const char* shndx_pool_;
+
+ // Pointer to the beginning of the section offset table (version 2).
+ const char* offset_table_;
+
+ // Pointer to the beginning of the section size table (version 2).
+ const char* size_table_;
+
+ // Contents of the sections of interest (version 2).
+ const char* abbrev_data_;
+ size_t abbrev_size_;
+ const char* info_data_;
+ size_t info_size_;
+ const char* str_offsets_data_;
+ size_t str_offsets_size_;
+};
+
+// This class is a reader for DWARF's Call Frame Information. CFI
+// describes how to unwind stack frames --- even for functions that do
+// not follow fixed conventions for saving registers, whose frame size
+// varies as they execute, etc.
+//
+// CFI describes, at each machine instruction, how to compute the
+// stack frame's base address, how to find the return address, and
+// where to find the saved values of the caller's registers (if the
+// callee has stashed them somewhere to free up the registers for its
+// own use).
+//
+// For example, suppose we have a function whose machine code looks
+// like this (imagine an assembly language that looks like C, for a
+// machine with 32-bit registers, and a stack that grows towards lower
+// addresses):
+//
+// func: ; entry point; return address at sp
+// func+0: sp = sp - 16 ; allocate space for stack frame
+// func+1: sp[12] = r0 ; save r0 at sp+12
+// ... ; other code, not frame-related
+// func+10: sp -= 4; *sp = x ; push some x on the stack
+// ... ; other code, not frame-related
+// func+20: r0 = sp[16] ; restore saved r0
+// func+21: sp += 20 ; pop whole stack frame
+// func+22: pc = *sp; sp += 4 ; pop return address and jump to it
+//
+// DWARF CFI is (a very compressed representation of) a table with a
+// row for each machine instruction address and a column for each
+// register showing how to restore it, if possible.
+//
+// A special column named "CFA", for "Canonical Frame Address", tells how
+// to compute the base address of the frame; registers' entries may
+// refer to the CFA in describing where the registers are saved.
+//
+// Another special column, named "RA", represents the return address.
+//
+// For example, here is a complete (uncompressed) table describing the
+// function above:
+//
+// insn cfa r0 r1 ... ra
+// =======================================
+// func+0: sp cfa[0]
+// func+1: sp+16 cfa[0]
+// func+2: sp+16 cfa[-4] cfa[0]
+// func+11: sp+20 cfa[-4] cfa[0]
+// func+21: sp+20 cfa[0]
+// func+22: sp cfa[0]
+//
+// Some things to note here:
+//
+// - Each row describes the state of affairs *before* executing the
+// instruction at the given address. Thus, the row for func+0
+// describes the state before we allocate the stack frame. In the
+// next row, the formula for computing the CFA has changed,
+// reflecting that allocation.
+//
+// - The other entries are written in terms of the CFA; this allows
+// them to remain unchanged as the stack pointer gets bumped around.
+// For example, the rule for recovering the return address (the "ra"
+// column) remains unchanged throughout the function, even as the
+// stack pointer takes on three different offsets from the return
+// address.
+//
+// - Although we haven't shown it, most calling conventions designate
+// "callee-saves" and "caller-saves" registers. The callee must
+// preserve the values of callee-saves registers; if it uses them,
+// it must save their original values somewhere, and restore them
+// before it returns. In contrast, the callee is free to trash
+// caller-saves registers; if the callee uses these, it will
+// probably not bother to save them anywhere, and the CFI will
+// probably mark their values as "unrecoverable".
+//
+// (However, since the caller cannot assume the callee was going to
+// save them, caller-saves registers are probably dead in the caller
+// anyway, so compilers usually don't generate CFA for caller-saves
+// registers.)
+//
+// - Exactly where the CFA points is a matter of convention that
+// depends on the architecture and ABI in use. In the example, the
+// CFA is the value the stack pointer had upon entry to the
+// function, pointing at the saved return address. But on the x86,
+// the call frame information generated by GCC follows the
+// convention that the CFA is the address *after* the saved return
+// address.
+//
+// But by definition, the CFA remains constant throughout the
+// lifetime of the frame. This makes it a useful value for other
+// columns to refer to. It is also gives debuggers a useful handle
+// for identifying a frame.
+//
+// If you look at the table above, you'll notice that a given entry is
+// often the same as the one immediately above it: most instructions
+// change only one or two aspects of the stack frame, if they affect
+// it at all. The DWARF format takes advantage of this fact, and
+// reduces the size of the data by mentioning only the addresses and
+// columns at which changes take place. So for the above, DWARF CFI
+// data would only actually mention the following:
+//
+// insn cfa r0 r1 ... ra
+// =======================================
+// func+0: sp cfa[0]
+// func+1: sp+16
+// func+2: cfa[-4]
+// func+11: sp+20
+// func+21: r0
+// func+22: sp
+//
+// In fact, this is the way the parser reports CFI to the consumer: as
+// a series of statements of the form, "At address X, column Y changed
+// to Z," and related conventions for describing the initial state.
+//
+// Naturally, it would be impractical to have to scan the entire
+// program's CFI, noting changes as we go, just to recover the
+// unwinding rules in effect at one particular instruction. To avoid
+// this, CFI data is grouped into "entries", each of which covers a
+// specified range of addresses and begins with a complete statement
+// of the rules for all recoverable registers at that starting
+// address. Each entry typically covers a single function.
+//
+// Thus, to compute the contents of a given row of the table --- that
+// is, rules for recovering the CFA, RA, and registers at a given
+// instruction --- the consumer should find the entry that covers that
+// instruction's address, start with the initial state supplied at the
+// beginning of the entry, and work forward until it has processed all
+// the changes up to and including those for the present instruction.
+//
+// There are seven kinds of rules that can appear in an entry of the
+// table:
+//
+// - "undefined": The given register is not preserved by the callee;
+// its value cannot be recovered.
+//
+// - "same value": This register has the same value it did in the callee.
+//
+// - offset(N): The register is saved at offset N from the CFA.
+//
+// - val_offset(N): The value the register had in the caller is the
+// CFA plus offset N. (This is usually only useful for describing
+// the stack pointer.)
+//
+// - register(R): The register's value was saved in another register R.
+//
+// - expression(E): Evaluating the DWARF expression E using the
+// current frame's registers' values yields the address at which the
+// register was saved.
+//
+// - val_expression(E): Evaluating the DWARF expression E using the
+// current frame's registers' values yields the value the register
+// had in the caller.
+
+class CallFrameInfo {
+ public:
+ // The different kinds of entries one finds in CFI. Used internally,
+ // and for error reporting.
+ enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
+
+ // The handler class to which the parser hands the parsed call frame
+ // information. Defined below.
+ class Handler;
+
+ // A reporter class, which CallFrameInfo uses to report errors
+ // encountered while parsing call frame information. Defined below.
+ class Reporter;
+
+ // Create a DWARF CFI parser. BUFFER points to the contents of the
+ // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
+ // REPORTER is an error reporter the parser should use to report
+ // problems. READER is a ByteReader instance that has the endianness and
+ // address size set properly. Report the data we find to HANDLER.
+ //
+ // This class can also parse Linux C++ exception handling data, as found
+ // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
+ // placed in loadable segments so that it is present in the program's
+ // address space, and is interpreted by the C++ runtime to search the
+ // call stack for a handler interested in the exception being thrown,
+ // actually pop the frames, and find cleanup code to run.
+ //
+ // There are two differences between the call frame information described
+ // in the DWARF standard and the exception handling data Linux places in
+ // the .eh_frame section:
+ //
+ // - Exception handling data uses uses a different format for call frame
+ // information entry headers. The distinguished CIE id, the way FDEs
+ // refer to their CIEs, and the way the end of the series of entries is
+ // determined are all slightly different.
+ //
+ // If the constructor's EH_FRAME argument is true, then the
+ // CallFrameInfo parses the entry headers as Linux C++ exception
+ // handling data. If EH_FRAME is false or omitted, the CallFrameInfo
+ // parses standard DWARF call frame information.
+ //
+ // - Linux C++ exception handling data uses CIE augmentation strings
+ // beginning with 'z' to specify the presence of additional data after
+ // the CIE and FDE headers and special encodings used for addresses in
+ // frame description entries.
+ //
+ // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
+ // exception handling data if you have supplied READER with the base
+ // addresses needed to interpret the pointer encodings that 'z'
+ // augmentations can specify. See the ByteReader interface for details
+ // about the base addresses. See the CallFrameInfo::Handler interface
+ // for details about the additional information one might find in
+ // 'z'-augmented data.
+ //
+ // Thus:
+ //
+ // - If you are parsing standard DWARF CFI, as found in a .debug_frame
+ // section, you should pass false for the EH_FRAME argument, or omit
+ // it, and you need not worry about providing READER with the
+ // additional base addresses.
+ //
+ // - If you want to parse Linux C++ exception handling data from a
+ // .eh_frame section, you should pass EH_FRAME as true, and call
+ // READER's Set*Base member functions before calling our Start method.
+ //
+ // - If you want to parse DWARF CFI that uses the 'z' augmentations
+ // (although I don't think any toolchain ever emits such data), you
+ // could pass false for EH_FRAME, but call READER's Set*Base members.
+ //
+ // The extensions the Linux C++ ABI makes to DWARF for exception
+ // handling are described here, rather poorly:
+ // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+ // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+ //
+ // The mechanics of C++ exception handling, personality routines,
+ // and language-specific data areas are described here, rather nicely:
+ // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
+ CallFrameInfo(const uint8_t* buffer, size_t buffer_length,
+ ByteReader* reader, Handler* handler, Reporter* reporter,
+ bool eh_frame = false)
+ : buffer_(buffer), buffer_length_(buffer_length),
+ reader_(reader), handler_(handler), reporter_(reporter),
+ eh_frame_(eh_frame) { }
+
+ ~CallFrameInfo() { }
+
+ // Parse the entries in BUFFER, reporting what we find to HANDLER.
+ // Return true if we reach the end of the section successfully, or
+ // false if we encounter an error.
+ bool Start();
+
+ // Return the textual name of KIND. For error reporting.
+ static const char* KindName(EntryKind kind);
+
+ private:
+
+ struct CIE;
+
+ // A CFI entry, either an FDE or a CIE.
+ struct Entry {
+ // The starting offset of the entry in the section, for error
+ // reporting.
+ size_t offset;
+
+ // The start of this entry in the buffer.
+ const uint8_t* start;
+
+ // Which kind of entry this is.
+ //
+ // We want to be able to use this for error reporting even while we're
+ // in the midst of parsing. Error reporting code may assume that kind,
+ // offset, and start fields are valid, although kind may be kUnknown.
+ EntryKind kind;
+
+ // The end of this entry's common prologue (initial length and id), and
+ // the start of this entry's kind-specific fields.
+ const uint8_t* fields;
+
+ // The start of this entry's instructions.
+ const uint8_t* instructions;
+
+ // The address past the entry's last byte in the buffer. (Note that
+ // since offset points to the entry's initial length field, and the
+ // length field is the number of bytes after that field, this is not
+ // simply buffer_ + offset + length.)
+ const uint8_t* end;
+
+ // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
+ // CIE, and the offset of the associated CIE in an FDE.
+ uint64_t id;
+
+ // The CIE that applies to this entry, if we've parsed it. If this is a
+ // CIE, then this field points to this structure.
+ CIE* cie;
+ };
+
+ // A common information entry (CIE).
+ struct CIE: public Entry {
+ uint8_t version; // CFI data version number
+ string augmentation; // vendor format extension markers
+ uint64_t code_alignment_factor; // scale for code address adjustments
+ int data_alignment_factor; // scale for stack pointer adjustments
+ unsigned return_address_register; // which register holds the return addr
+
+ // True if this CIE includes Linux C++ ABI 'z' augmentation data.
+ bool has_z_augmentation;
+
+ // Parsed 'z' augmentation data. These are meaningful only if
+ // has_z_augmentation is true.
+ bool has_z_lsda; // The 'z' augmentation included 'L'.
+ bool has_z_personality; // The 'z' augmentation included 'P'.
+ bool has_z_signal_frame; // The 'z' augmentation included 'S'.
+
+ // If has_z_lsda is true, this is the encoding to be used for language-
+ // specific data area pointers in FDEs.
+ DwarfPointerEncoding lsda_encoding;
+
+ // If has_z_personality is true, this is the encoding used for the
+ // personality routine pointer in the augmentation data.
+ DwarfPointerEncoding personality_encoding;
+
+ // If has_z_personality is true, this is the address of the personality
+ // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
+ // address where the personality routine's address is stored.
+ uint64_t personality_address;
+
+ // This is the encoding used for addresses in the FDE header and
+ // in DW_CFA_set_loc instructions. This is always valid, whether
+ // or not we saw a 'z' augmentation string; its default value is
+ // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
+ DwarfPointerEncoding pointer_encoding;
+
+ // These were only introduced in DWARF4, so will not be set in older
+ // versions.
+ uint8_t address_size;
+ uint8_t segment_size;
+ };
+
+ // A frame description entry (FDE).
+ struct FDE: public Entry {
+ uint64_t address; // start address of described code
+ uint64_t size; // size of described code, in bytes
+
+ // If cie->has_z_lsda is true, then this is the language-specific data
+ // area's address --- or its address's address, if cie->lsda_encoding
+ // has the DW_EH_PE_indirect bit set.
+ uint64_t lsda_address;
+ };
+
+ // Internal use.
+ class Rule;
+ class UndefinedRule;
+ class SameValueRule;
+ class OffsetRule;
+ class ValOffsetRule;
+ class RegisterRule;
+ class ExpressionRule;
+ class ValExpressionRule;
+ class RuleMap;
+ class State;
+
+ // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
+ // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
+ // data to parse. On success, populate ENTRY as appropriate, and return
+ // true. On failure, report the problem, and return false. Even if we
+ // return false, set ENTRY->end to the first byte after the entry if we
+ // were able to figure that out, or NULL if we weren't.
+ bool ReadEntryPrologue(const uint8_t* cursor, Entry* entry);
+
+ // Parse the fields of a CIE after the entry prologue, including any 'z'
+ // augmentation data. Assume that the 'Entry' fields of CIE are
+ // populated; use CIE->fields and CIE->end as the start and limit for
+ // parsing. On success, populate the rest of *CIE, and return true; on
+ // failure, report the problem and return false.
+ bool ReadCIEFields(CIE* cie);
+
+ // Parse the fields of an FDE after the entry prologue, including any 'z'
+ // augmentation data. Assume that the 'Entry' fields of *FDE are
+ // initialized; use FDE->fields and FDE->end as the start and limit for
+ // parsing. Assume that FDE->cie is fully initialized. On success,
+ // populate the rest of *FDE, and return true; on failure, report the
+ // problem and return false.
+ bool ReadFDEFields(FDE* fde);
+
+ // Report that ENTRY is incomplete, and return false. This is just a
+ // trivial wrapper for invoking reporter_->Incomplete; it provides a
+ // little brevity.
+ bool ReportIncomplete(Entry* entry);
+
+ // Return true if ENCODING has the DW_EH_PE_indirect bit set.
+ static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
+ return encoding & DW_EH_PE_indirect;
+ }
+
+ // The contents of the DWARF .debug_info section we're parsing.
+ const uint8_t* buffer_;
+ size_t buffer_length_;
+
+ // For reading multi-byte values with the appropriate endianness.
+ ByteReader* reader_;
+
+ // The handler to which we should report the data we find.
+ Handler* handler_;
+
+ // For reporting problems in the info we're parsing.
+ Reporter* reporter_;
+
+ // True if we are processing .eh_frame-format data.
+ bool eh_frame_;
+};
+
+// The handler class for CallFrameInfo. The a CFI parser calls the
+// member functions of a handler object to report the data it finds.
+class CallFrameInfo::Handler {
+ public:
+ // The pseudo-register number for the canonical frame address.
+ enum { kCFARegister = -1 };
+
+ Handler() { }
+ virtual ~Handler() { }
+
+ // The parser has found CFI for the machine code at ADDRESS,
+ // extending for LENGTH bytes. OFFSET is the offset of the frame
+ // description entry in the section, for use in error messages.
+ // VERSION is the version number of the CFI format. AUGMENTATION is
+ // a string describing any producer-specific extensions present in
+ // the data. RETURN_ADDRESS is the number of the register that holds
+ // the address to which the function should return.
+ //
+ // Entry should return true to process this CFI, or false to skip to
+ // the next entry.
+ //
+ // The parser invokes Entry for each Frame Description Entry (FDE)
+ // it finds. The parser doesn't report Common Information Entries
+ // to the handler explicitly; instead, if the handler elects to
+ // process a given FDE, the parser reiterates the appropriate CIE's
+ // contents at the beginning of the FDE's rules.
+ virtual bool Entry(size_t offset, uint64_t address, uint64_t length,
+ uint8_t version, const string& augmentation,
+ unsigned return_address) = 0;
+
+ // When the Entry function returns true, the parser calls these
+ // handler functions repeatedly to describe the rules for recovering
+ // registers at each instruction in the given range of machine code.
+ // Immediately after a call to Entry, the handler should assume that
+ // the rule for each callee-saves register is "unchanged" --- that
+ // is, that the register still has the value it had in the caller.
+ //
+ // If a *Rule function returns true, we continue processing this entry's
+ // instructions. If a *Rule function returns false, we stop evaluating
+ // instructions, and skip to the next entry. Either way, we call End
+ // before going on to the next entry.
+ //
+ // In all of these functions, if the REG parameter is kCFARegister, then
+ // the rule describes how to find the canonical frame address.
+ // kCFARegister may be passed as a BASE_REGISTER argument, meaning that
+ // the canonical frame address should be used as the base address for the
+ // computation. All other REG values will be positive.
+
+ // At ADDRESS, register REG's value is not recoverable.
+ virtual bool UndefinedRule(uint64_t address, int reg) = 0;
+
+ // At ADDRESS, register REG's value is the same as that it had in
+ // the caller.
+ virtual bool SameValueRule(uint64_t address, int reg) = 0;
+
+ // At ADDRESS, register REG has been saved at offset OFFSET from
+ // BASE_REGISTER.
+ virtual bool OffsetRule(uint64_t address, int reg,
+ int base_register, long offset) = 0;
+
+ // At ADDRESS, the caller's value of register REG is the current
+ // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
+ // address at which the register's value is saved.)
+ virtual bool ValOffsetRule(uint64_t address, int reg,
+ int base_register, long offset) = 0;
+
+ // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
+ // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
+ // BASE_REGISTER is the "home" for REG's saved value: if you want to
+ // assign to a variable whose home is REG in the calling frame, you
+ // should put the value in BASE_REGISTER.
+ virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0;
+
+ // At ADDRESS, the DWARF expression EXPRESSION yields the address at
+ // which REG was saved.
+ virtual bool ExpressionRule(uint64_t address, int reg,
+ const string& expression) = 0;
+
+ // At ADDRESS, the DWARF expression EXPRESSION yields the caller's
+ // value for REG. (This rule doesn't provide an address at which the
+ // register's value is saved.)
+ virtual bool ValExpressionRule(uint64_t address, int reg,
+ const string& expression) = 0;
+
+ // Indicate that the rules for the address range reported by the
+ // last call to Entry are complete. End should return true if
+ // everything is okay, or false if an error has occurred and parsing
+ // should stop.
+ virtual bool End() = 0;
+
+ // Handler functions for Linux C++ exception handling data. These are
+ // only called if the data includes 'z' augmentation strings.
+
+ // The Linux C++ ABI uses an extension of the DWARF CFI format to
+ // walk the stack to propagate exceptions from the throw to the
+ // appropriate catch, and do the appropriate cleanups along the way.
+ // CFI entries used for exception handling have two additional data
+ // associated with them:
+ //
+ // - The "language-specific data area" describes which exception
+ // types the function has 'catch' clauses for, and indicates how
+ // to go about re-entering the function at the appropriate catch
+ // clause. If the exception is not caught, it describes the
+ // destructors that must run before the frame is popped.
+ //
+ // - The "personality routine" is responsible for interpreting the
+ // language-specific data area's contents, and deciding whether
+ // the exception should continue to propagate down the stack,
+ // perhaps after doing some cleanup for this frame, or whether the
+ // exception will be caught here.
+ //
+ // In principle, the language-specific data area is opaque to
+ // everybody but the personality routine. In practice, these values
+ // may be useful or interesting to readers with extra context, and
+ // we have to at least skip them anyway, so we might as well report
+ // them to the handler.
+
+ // This entry's exception handling personality routine's address is
+ // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+ // which the routine's address is stored. The default definition for
+ // this handler function simply returns true, allowing parsing of
+ // the entry to continue.
+ virtual bool PersonalityRoutine(uint64_t address, bool indirect) {
+ return true;
+ }
+
+ // This entry's language-specific data area (LSDA) is located at
+ // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+ // which the area's address is stored. The default definition for
+ // this handler function simply returns true, allowing parsing of
+ // the entry to continue.
+ virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) {
+ return true;
+ }
+
+ // This entry describes a signal trampoline --- this frame is the
+ // caller of a signal handler. The default definition for this
+ // handler function simply returns true, allowing parsing of the
+ // entry to continue.
+ //
+ // The best description of the rationale for and meaning of signal
+ // trampoline CFI entries seems to be in the GCC bug database:
+ // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
+ virtual bool SignalHandler() { return true; }
+};
+
+// The CallFrameInfo class makes calls on an instance of this class to
+// report errors or warn about problems in the data it is parsing. The
+// default definitions of these methods print a message to stderr, but
+// you can make a derived class that overrides them.
+class CallFrameInfo::Reporter {
+ public:
+ // Create an error reporter which attributes troubles to the section
+ // named SECTION in FILENAME.
+ //
+ // Normally SECTION would be .debug_frame, but the Mac puts CFI data
+ // in a Mach-O section named __debug_frame. If we support
+ // Linux-style exception handling data, we could be reading an
+ // .eh_frame section.
+ Reporter(const string& filename,
+ const string& section = ".debug_frame")
+ : filename_(filename), section_(section) { }
+ virtual ~Reporter() { }
+
+ // The CFI entry at OFFSET ends too early to be well-formed. KIND
+ // indicates what kind of entry it is; KIND can be kUnknown if we
+ // haven't parsed enough of the entry to tell yet.
+ virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind);
+
+ // The .eh_frame data has a four-byte zero at OFFSET where the next
+ // entry's length would be; this is a terminator. However, the buffer
+ // length as given to the CallFrameInfo constructor says there should be
+ // more data.
+ virtual void EarlyEHTerminator(uint64_t offset);
+
+ // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
+ // section is not that large.
+ virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset);
+
+ // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
+ // there is not a CIE.
+ virtual void BadCIEId(uint64_t offset, uint64_t cie_offset);
+
+ // The FDE at OFFSET refers to a CIE with an address size we don't know how
+ // to handle.
+ virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size);
+
+ // The FDE at OFFSET refers to a CIE with an segment descriptor size we
+ // don't know how to handle.
+ virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size);
+
+ // The FDE at OFFSET refers to a CIE with version number VERSION,
+ // which we don't recognize. We cannot parse DWARF CFI if it uses
+ // a version number we don't recognize.
+ virtual void UnrecognizedVersion(uint64_t offset, int version);
+
+ // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
+ // which we don't recognize. We cannot parse DWARF CFI if it uses
+ // augmentations we don't recognize.
+ virtual void UnrecognizedAugmentation(uint64_t offset,
+ const string& augmentation);
+
+ // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
+ // a valid encoding.
+ virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding);
+
+ // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
+ // on a base address which has not been supplied.
+ virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding);
+
+ // The CIE at OFFSET contains a DW_CFA_restore instruction at
+ // INSN_OFFSET, which may not appear in a CIE.
+ virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset);
+
+ // The entry at OFFSET, of kind KIND, has an unrecognized
+ // instruction at INSN_OFFSET.
+ virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind,
+ uint64_t insn_offset);
+
+ // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+ // KIND, establishes a rule that cites the CFA, but we have not
+ // established a CFA rule yet.
+ virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind,
+ uint64_t insn_offset);
+
+ // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+ // KIND, is a DW_CFA_restore_state instruction, but the stack of
+ // saved states is empty.
+ virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind,
+ uint64_t insn_offset);
+
+ // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
+ // at OFFSET, of kind KIND, would restore a state that has no CFA
+ // rule, whereas the current state does have a CFA rule. This is
+ // bogus input, which the CallFrameInfo::Handler interface doesn't
+ // (and shouldn't) have any way to report.
+ virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind,
+ uint64_t insn_offset);
+
+ protected:
+ // The name of the file whose CFI we're reading.
+ string filename_;
+
+ // The name of the CFI section in that file.
+ string section_;
+};
+
+} // namespace google_breakpad
+
+#endif // UTIL_DEBUGINFO_DWARF2READER_H__
diff --git a/contrib/libs/breakpad/src/common/dwarf/elf_reader.cc b/contrib/libs/breakpad/src/common/dwarf/elf_reader.cc
new file mode 100644
index 0000000000..c6d9f08a93
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/elf_reader.cc
@@ -0,0 +1,1274 @@
+// Copyright 2005 Google Inc. All Rights Reserved.
+// Author: chatham@google.com (Andrew Chatham)
+// Author: satorux@google.com (Satoru Takabayashi)
+//
+// Code for reading in ELF files.
+//
+// For information on the ELF format, see
+// http://www.x86.org/ftp/manuals/tools/elf.pdf
+//
+// I also liked:
+// http://www.caldera.com/developers/gabi/1998-04-29/contents.html
+//
+// A note about types: When dealing with the file format, we use types
+// like Elf32_Word, but in the public interfaces we treat all
+// addresses as uint64. As a result, we should be able to symbolize
+// 64-bit binaries from a 32-bit process (which we don't do,
+// anyway). size_t should therefore be avoided, except where required
+// by things like mmap().
+//
+// Although most of this code can deal with arbitrary ELF files of
+// either word size, the public ElfReader interface only examines
+// files loaded into the current address space, which must all match
+// the machine's native word size. This code cannot handle ELF files
+// with a non-native byte ordering.
+//
+// TODO(chatham): It would be nice if we could accomplish this task
+// without using malloc(), so we could use it as the process is dying.
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE // needed for pread()
+#endif
+
+#include <fcntl.h>
+#include <limits.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+// TODO(saugustine): Add support for compressed debug.
+// Also need to add configure tests for zlib.
+//#include "zlib.h"
+
+#include "third_party/musl/include/elf.h"
+#include "elf_reader.h"
+#include "common/using_std_string.h"
+
+// EM_AARCH64 is not defined by elf.h of GRTE v3 on x86.
+// TODO(dougkwan): Remove this when v17 is retired.
+#if !defined(EM_AARCH64)
+#define EM_AARCH64 183 /* ARM AARCH64 */
+#endif
+
+// Map Linux macros to their Apple equivalents.
+#if __APPLE__
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
+#endif // __LITTLE_ENDIAN
+#ifndef __BIG_ENDIAN
+#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
+#endif // __BIG_ENDIAN
+#ifndef __BYTE_ORDER
+#define __BYTE_ORDER __BYTE_ORDER__
+#endif // __BYTE_ORDER
+#endif // __APPLE__
+
+// TODO(dthomson): Can be removed once all Java code is using the Google3
+// launcher. We need to avoid processing PLT functions as it causes memory
+// fragmentation in malloc, which is fixed in tcmalloc - and if the Google3
+// launcher is used the JVM will then use tcmalloc. b/13735638
+//DEFINE_bool(elfreader_process_dynsyms, true,
+// "Activate PLT function processing");
+
+using std::vector;
+
+namespace {
+
+// The lowest bit of an ARM symbol value is used to indicate a Thumb address.
+const int kARMThumbBitOffset = 0;
+
+// Converts an ARM Thumb symbol value to a true aligned address value.
+template <typename T>
+T AdjustARMThumbSymbolValue(const T& symbol_table_value) {
+ return symbol_table_value & ~(1 << kARMThumbBitOffset);
+}
+
+// Names of PLT-related sections.
+const char kElfPLTRelSectionName[] = ".rel.plt"; // Use Rel struct.
+const char kElfPLTRelaSectionName[] = ".rela.plt"; // Use Rela struct.
+const char kElfPLTSectionName[] = ".plt";
+const char kElfDynSymSectionName[] = ".dynsym";
+
+const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes.
+const int kARMPLTCodeSize = 0xc;
+const int kAARCH64PLTCodeSize = 0x10;
+
+const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry.
+const int kARMPLT0Size = 0x14;
+const int kAARCH64PLT0Size = 0x20;
+
+// Suffix for PLT functions when it needs to be explicitly identified as such.
+const char kPLTFunctionSuffix[] = "@plt";
+
+} // namespace
+
+namespace google_breakpad {
+
+template <class ElfArch> class ElfReaderImpl;
+
+// 32-bit and 64-bit ELF files are processed exactly the same, except
+// for various field sizes. Elf32 and Elf64 encompass all of the
+// differences between the two formats, and all format-specific code
+// in this file is templated on one of them.
+class Elf32 {
+ public:
+ typedef Elf32_Ehdr Ehdr;
+ typedef Elf32_Shdr Shdr;
+ typedef Elf32_Phdr Phdr;
+ typedef Elf32_Word Word;
+ typedef Elf32_Sym Sym;
+ typedef Elf32_Rel Rel;
+ typedef Elf32_Rela Rela;
+
+ // What should be in the EI_CLASS header.
+ static const int kElfClass = ELFCLASS32;
+
+ // Given a symbol pointer, return the binding type (eg STB_WEAK).
+ static char Bind(const Elf32_Sym* sym) {
+ return ELF32_ST_BIND(sym->st_info);
+ }
+ // Given a symbol pointer, return the symbol type (eg STT_FUNC).
+ static char Type(const Elf32_Sym* sym) {
+ return ELF32_ST_TYPE(sym->st_info);
+ }
+
+ // Extract the symbol index from the r_info field of a relocation.
+ static int r_sym(const Elf32_Word r_info) {
+ return ELF32_R_SYM(r_info);
+ }
+};
+
+
+class Elf64 {
+ public:
+ typedef Elf64_Ehdr Ehdr;
+ typedef Elf64_Shdr Shdr;
+ typedef Elf64_Phdr Phdr;
+ typedef Elf64_Word Word;
+ typedef Elf64_Sym Sym;
+ typedef Elf64_Rel Rel;
+ typedef Elf64_Rela Rela;
+
+ // What should be in the EI_CLASS header.
+ static const int kElfClass = ELFCLASS64;
+
+ static char Bind(const Elf64_Sym* sym) {
+ return ELF64_ST_BIND(sym->st_info);
+ }
+ static char Type(const Elf64_Sym* sym) {
+ return ELF64_ST_TYPE(sym->st_info);
+ }
+ static int r_sym(const Elf64_Xword r_info) {
+ return ELF64_R_SYM(r_info);
+ }
+};
+
+
+// ElfSectionReader mmaps a section of an ELF file ("section" is ELF
+// terminology). The ElfReaderImpl object providing the section header
+// must exist for the lifetime of this object.
+//
+// The motivation for mmaping individual sections of the file is that
+// many Google executables are large enough when unstripped that we
+// have to worry about running out of virtual address space.
+//
+// For compressed sections we have no choice but to allocate memory.
+template<class ElfArch>
+class ElfSectionReader {
+ public:
+ ElfSectionReader(const char* name, const string& path, int fd,
+ const typename ElfArch::Shdr& section_header)
+ : contents_aligned_(NULL),
+ contents_(NULL),
+ header_(section_header) {
+ // Back up to the beginning of the page we're interested in.
+ const size_t additional = header_.sh_offset % getpagesize();
+ const size_t offset_aligned = header_.sh_offset - additional;
+ section_size_ = header_.sh_size;
+ size_aligned_ = section_size_ + additional;
+ // If the section has been stripped or is empty, do not attempt
+ // to process its contents.
+ if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0)
+ return;
+ contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED,
+ fd, offset_aligned);
+ // Set where the offset really should begin.
+ contents_ = reinterpret_cast<char*>(contents_aligned_) +
+ (header_.sh_offset - offset_aligned);
+
+ // Check for and handle any compressed contents.
+ //if (strncmp(name, ".zdebug_", strlen(".zdebug_")) == 0)
+ // DecompressZlibContents();
+ // TODO(saugustine): Add support for proposed elf-section flag
+ // "SHF_COMPRESS".
+ }
+
+ ~ElfSectionReader() {
+ if (contents_aligned_ != NULL)
+ munmap(contents_aligned_, size_aligned_);
+ else
+ delete[] contents_;
+ }
+
+ // Return the section header for this section.
+ typename ElfArch::Shdr const& header() const { return header_; }
+
+ // Return memory at the given offset within this section.
+ const char* GetOffset(typename ElfArch::Word bytes) const {
+ return contents_ + bytes;
+ }
+
+ const char* contents() const { return contents_; }
+ size_t section_size() const { return section_size_; }
+
+ private:
+ // page-aligned file contents
+ void* contents_aligned_;
+ // contents as usable by the client. For non-compressed sections,
+ // pointer within contents_aligned_ to where the section data
+ // begins; for compressed sections, pointer to the decompressed
+ // data.
+ char* contents_;
+ // size of contents_aligned_
+ size_t size_aligned_;
+ // size of contents.
+ size_t section_size_;
+ const typename ElfArch::Shdr header_;
+};
+
+// An iterator over symbols in a given section. It handles walking
+// through the entries in the specified section and mapping symbol
+// entries to their names in the appropriate string table (in
+// another section).
+template<class ElfArch>
+class SymbolIterator {
+ public:
+ SymbolIterator(ElfReaderImpl<ElfArch>* reader,
+ typename ElfArch::Word section_type)
+ : symbol_section_(reader->GetSectionByType(section_type)),
+ string_section_(NULL),
+ num_symbols_in_section_(0),
+ symbol_within_section_(0) {
+
+ // If this section type doesn't exist, leave
+ // num_symbols_in_section_ as zero, so this iterator is already
+ // done().
+ if (symbol_section_ != NULL) {
+ num_symbols_in_section_ = symbol_section_->header().sh_size /
+ symbol_section_->header().sh_entsize;
+
+ // Symbol sections have sh_link set to the section number of
+ // the string section containing the symbol names.
+ string_section_ = reader->GetSection(symbol_section_->header().sh_link);
+ }
+ }
+
+ // Return true iff we have passed all symbols in this section.
+ bool done() const {
+ return symbol_within_section_ >= num_symbols_in_section_;
+ }
+
+ // Advance to the next symbol in this section.
+ // REQUIRES: !done()
+ void Next() { ++symbol_within_section_; }
+
+ // Return a pointer to the current symbol.
+ // REQUIRES: !done()
+ const typename ElfArch::Sym* GetSymbol() const {
+ return reinterpret_cast<const typename ElfArch::Sym*>(
+ symbol_section_->GetOffset(symbol_within_section_ *
+ symbol_section_->header().sh_entsize));
+ }
+
+ // Return the name of the current symbol, NULL if it has none.
+ // REQUIRES: !done()
+ const char* GetSymbolName() const {
+ int name_offset = GetSymbol()->st_name;
+ if (name_offset == 0)
+ return NULL;
+ return string_section_->GetOffset(name_offset);
+ }
+
+ int GetCurrentSymbolIndex() const {
+ return symbol_within_section_;
+ }
+
+ private:
+ const ElfSectionReader<ElfArch>* const symbol_section_;
+ const ElfSectionReader<ElfArch>* string_section_;
+ int num_symbols_in_section_;
+ int symbol_within_section_;
+};
+
+
+// Copied from strings/strutil.h. Per chatham,
+// this library should not depend on strings.
+
+static inline bool MyHasSuffixString(const string& str, const string& suffix) {
+ int len = str.length();
+ int suflen = suffix.length();
+ return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0);
+}
+
+
+// ElfReader loads an ELF binary and can provide information about its
+// contents. It is most useful for matching addresses to function
+// names. It does not understand debugging formats (eg dwarf2), so it
+// can't print line numbers. It takes a path to an elf file and a
+// readable file descriptor for that file, which it does not assume
+// ownership of.
+template<class ElfArch>
+class ElfReaderImpl {
+ public:
+ explicit ElfReaderImpl(const string& path, int fd)
+ : path_(path),
+ fd_(fd),
+ section_headers_(NULL),
+ program_headers_(NULL),
+ opd_section_(NULL),
+ base_for_text_(0),
+ plts_supported_(false),
+ plt_code_size_(0),
+ plt0_size_(0),
+ visited_relocation_entries_(false) {
+ string error;
+ is_dwp_ = MyHasSuffixString(path, ".dwp");
+ ParseHeaders(fd, path);
+ // Currently we need some extra information for PowerPC64 binaries
+ // including a way to read the .opd section for function descriptors and a
+ // way to find the linked base for function symbols.
+ if (header_.e_machine == EM_PPC64) {
+ // "opd_section_" must always be checked for NULL before use.
+ opd_section_ = GetSectionInfoByName(".opd", &opd_info_);
+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {
+ const char* name = GetSectionName(section_headers_[k].sh_name);
+ if (strncmp(name, ".text", strlen(".text")) == 0) {
+ base_for_text_ =
+ section_headers_[k].sh_addr - section_headers_[k].sh_offset;
+ break;
+ }
+ }
+ }
+ // Turn on PLTs.
+ if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) {
+ plt_code_size_ = kX86PLTCodeSize;
+ plt0_size_ = kX86PLT0Size;
+ plts_supported_ = true;
+ } else if (header_.e_machine == EM_ARM) {
+ plt_code_size_ = kARMPLTCodeSize;
+ plt0_size_ = kARMPLT0Size;
+ plts_supported_ = true;
+ } else if (header_.e_machine == EM_AARCH64) {
+ plt_code_size_ = kAARCH64PLTCodeSize;
+ plt0_size_ = kAARCH64PLT0Size;
+ plts_supported_ = true;
+ }
+ }
+
+ ~ElfReaderImpl() {
+ for (unsigned int i = 0u; i < sections_.size(); ++i)
+ delete sections_[i];
+ delete [] section_headers_;
+ delete [] program_headers_;
+ }
+
+ // Examine the headers of the file and return whether the file looks
+ // like an ELF file for this architecture. Takes an already-open
+ // file descriptor for the candidate file, reading in the prologue
+ // to see if the ELF file appears to match the current
+ // architecture. If error is non-NULL, it will be set with a reason
+ // in case of failure.
+ static bool IsArchElfFile(int fd, string* error) {
+ unsigned char header[EI_NIDENT];
+ if (pread(fd, header, sizeof(header), 0) != sizeof(header)) {
+ if (error != NULL) *error = "Could not read header";
+ return false;
+ }
+
+ if (memcmp(header, ELFMAG, SELFMAG) != 0) {
+ if (error != NULL) *error = "Missing ELF magic";
+ return false;
+ }
+
+ if (header[EI_CLASS] != ElfArch::kElfClass) {
+ if (error != NULL) *error = "Different word size";
+ return false;
+ }
+
+ int endian = 0;
+ if (header[EI_DATA] == ELFDATA2LSB)
+ endian = __LITTLE_ENDIAN;
+ else if (header[EI_DATA] == ELFDATA2MSB)
+ endian = __BIG_ENDIAN;
+ if (endian != __BYTE_ORDER) {
+ if (error != NULL) *error = "Different byte order";
+ return false;
+ }
+
+ return true;
+ }
+
+ // Return true if we can use this symbol in Address-to-Symbol map.
+ bool CanUseSymbol(const char* name, const typename ElfArch::Sym* sym) {
+ // For now we only save FUNC and NOTYPE symbols. For now we just
+ // care about functions, but some functions written in assembler
+ // don't have a proper ELF type attached to them, so we store
+ // NOTYPE symbols as well. The remaining significant type is
+ // OBJECT (eg global variables), which represent about 25% of
+ // the symbols in a typical google3 binary.
+ if (ElfArch::Type(sym) != STT_FUNC &&
+ ElfArch::Type(sym) != STT_NOTYPE) {
+ return false;
+ }
+
+ // Target specific filtering.
+ switch (header_.e_machine) {
+ case EM_AARCH64:
+ case EM_ARM:
+ // Filter out '$x' special local symbols used by tools
+ return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL;
+ case EM_X86_64:
+ // Filter out read-only constants like .LC123.
+ return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL;
+ default:
+ return true;
+ }
+ }
+
+ // Iterate over the symbols in a section, either SHT_DYNSYM or
+ // SHT_SYMTAB. Add all symbols to the given SymbolMap.
+ /*
+ void GetSymbolPositions(SymbolMap* symbols,
+ typename ElfArch::Word section_type,
+ uint64_t mem_offset,
+ uint64_t file_offset) {
+ // This map is used to filter out "nested" functions.
+ // See comment below.
+ AddrToSymMap addr_to_sym_map;
+ for (SymbolIterator<ElfArch> it(this, section_type);
+ !it.done(); it.Next()) {
+ const char* name = it.GetSymbolName();
+ if (name == NULL)
+ continue;
+ const typename ElfArch::Sym* sym = it.GetSymbol();
+ if (CanUseSymbol(name, sym)) {
+ const int sec = sym->st_shndx;
+
+ // We don't support special section indices. The most common
+ // is SHN_ABS, for absolute symbols used deep in the bowels of
+ // glibc. Also ignore any undefined symbols.
+ if (sec == SHN_UNDEF ||
+ (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) {
+ continue;
+ }
+
+ const typename ElfArch::Shdr& hdr = section_headers_[sec];
+
+ // Adjust for difference between where we expected to mmap
+ // this section, and where it was actually mmapped.
+ const int64_t expected_base = hdr.sh_addr - hdr.sh_offset;
+ const int64_t real_base = mem_offset - file_offset;
+ const int64_t adjust = real_base - expected_base;
+
+ uint64_t start = sym->st_value + adjust;
+
+ // Adjust function symbols for PowerPC64 by dereferencing and adjusting
+ // the function descriptor to get the function address.
+ if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) {
+ const uint64_t opd_addr =
+ AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);
+ // Only adjust the returned value if the function address was found.
+ if (opd_addr != sym->st_value) {
+ const int64_t adjust_function_symbols =
+ real_base - base_for_text_;
+ start = opd_addr + adjust_function_symbols;
+ }
+ }
+
+ addr_to_sym_map.push_back(std::make_pair(start, sym));
+ }
+ }
+ std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter);
+ addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(),
+ addr_to_sym_map.end(), &AddrToSymEquals),
+ addr_to_sym_map.end());
+
+ // Squeeze out any "nested functions".
+ // Nested functions are not allowed in C, but libc plays tricks.
+ //
+ // For example, here is disassembly of /lib64/tls/libc-2.3.5.so:
+ // 0x00000000000aa380 <read+0>: cmpl $0x0,0x2781b9(%rip)
+ // 0x00000000000aa387 <read+7>: jne 0xaa39b <read+27>
+ // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax
+ // 0x00000000000aa390 <__read_nocancel+7>: syscall
+ // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax
+ // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef <read+111>
+ // 0x00000000000aa39a <__read_nocancel+17>: retq
+ // 0x00000000000aa39b <read+27>: sub $0x28,%rsp
+ // 0x00000000000aa39f <read+31>: mov %rdi,0x8(%rsp)
+ // ...
+ // Without removing __read_nocancel, symbolizer will return NULL
+ // given e.g. 0xaa39f (because the lower bound is __read_nocancel,
+ // but 0xaa39f is beyond its end.
+ if (addr_to_sym_map.empty()) {
+ return;
+ }
+ const ElfSectionReader<ElfArch>* const symbol_section =
+ this->GetSectionByType(section_type);
+ const ElfSectionReader<ElfArch>* const string_section =
+ this->GetSection(symbol_section->header().sh_link);
+
+ typename AddrToSymMap::iterator curr = addr_to_sym_map.begin();
+ // Always insert the first symbol.
+ symbols->AddSymbol(string_section->GetOffset(curr->second->st_name),
+ curr->first, curr->second->st_size);
+ typename AddrToSymMap::iterator prev = curr++;
+ for (; curr != addr_to_sym_map.end(); ++curr) {
+ const uint64_t prev_addr = prev->first;
+ const uint64_t curr_addr = curr->first;
+ const typename ElfArch::Sym* const prev_sym = prev->second;
+ const typename ElfArch::Sym* const curr_sym = curr->second;
+ if (prev_addr + prev_sym->st_size <= curr_addr ||
+ // The next condition is true if two symbols overlap like this:
+ //
+ // Previous symbol |----------------------------|
+ // Current symbol |-------------------------------|
+ //
+ // These symbols are not found in google3 codebase, but in
+ // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so.
+ //
+ // 0619e040 00000046 t CardTableModRefBS::write_region_work()
+ // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work()
+ //
+ // We allow overlapped symbols rather than ignore these.
+ // Due to the way SymbolMap::GetSymbolAtPosition() works,
+ // lookup for any address in [curr_addr, curr_addr + its size)
+ // (e.g. 0619e071) will produce the current symbol,
+ // which is the desired outcome.
+ prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) {
+ const char* name = string_section->GetOffset(curr_sym->st_name);
+ symbols->AddSymbol(name, curr_addr, curr_sym->st_size);
+ prev = curr;
+ } else {
+ // Current symbol is "nested" inside previous one like this:
+ //
+ // Previous symbol |----------------------------|
+ // Current symbol |---------------------|
+ //
+ // This happens within glibc, e.g. __read_nocancel is nested
+ // "inside" __read. Ignore "inner" symbol.
+ //DCHECK_LE(curr_addr + curr_sym->st_size,
+ // prev_addr + prev_sym->st_size);
+ ;
+ }
+ }
+ }
+*/
+
+ void VisitSymbols(typename ElfArch::Word section_type,
+ ElfReader::SymbolSink* sink) {
+ VisitSymbols(section_type, sink, -1, -1, false);
+ }
+
+ void VisitSymbols(typename ElfArch::Word section_type,
+ ElfReader::SymbolSink* sink,
+ int symbol_binding,
+ int symbol_type,
+ bool get_raw_symbol_values) {
+ for (SymbolIterator<ElfArch> it(this, section_type);
+ !it.done(); it.Next()) {
+ const char* name = it.GetSymbolName();
+ if (!name) continue;
+ const typename ElfArch::Sym* sym = it.GetSymbol();
+ if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) &&
+ (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) {
+ typename ElfArch::Sym symbol = *sym;
+ // Add a PLT symbol in addition to the main undefined symbol.
+ // Only do this for SHT_DYNSYM, because PLT symbols are dynamic.
+ int symbol_index = it.GetCurrentSymbolIndex();
+ // TODO(dthomson): Can be removed once all Java code is using the
+ // Google3 launcher.
+ if (section_type == SHT_DYNSYM &&
+ static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size() &&
+ symbols_plt_offsets_[symbol_index] != 0) {
+ string plt_name = string(name) + kPLTFunctionSuffix;
+ if (plt_function_names_[symbol_index].empty()) {
+ plt_function_names_[symbol_index] = plt_name;
+ } else if (plt_function_names_[symbol_index] != plt_name) {
+ ;
+ }
+ sink->AddSymbol(plt_function_names_[symbol_index].c_str(),
+ symbols_plt_offsets_[it.GetCurrentSymbolIndex()],
+ plt_code_size_);
+ }
+ if (!get_raw_symbol_values)
+ AdjustSymbolValue(&symbol);
+ sink->AddSymbol(name, symbol.st_value, symbol.st_size);
+ }
+ }
+ }
+
+ void VisitRelocationEntries() {
+ if (visited_relocation_entries_) {
+ return;
+ }
+ visited_relocation_entries_ = true;
+
+ if (!plts_supported_) {
+ return;
+ }
+ // First determine if PLTs exist. If not, then there is nothing to do.
+ ElfReader::SectionInfo plt_section_info;
+ const char* plt_section =
+ GetSectionInfoByName(kElfPLTSectionName, &plt_section_info);
+ if (!plt_section) {
+ return;
+ }
+ if (plt_section_info.size == 0) {
+ return;
+ }
+
+ // The PLTs could be referenced by either a Rel or Rela (Rel with Addend)
+ // section.
+ ElfReader::SectionInfo rel_section_info;
+ ElfReader::SectionInfo rela_section_info;
+ const char* rel_section =
+ GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info);
+ const char* rela_section =
+ GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info);
+
+ const typename ElfArch::Rel* rel =
+ reinterpret_cast<const typename ElfArch::Rel*>(rel_section);
+ const typename ElfArch::Rela* rela =
+ reinterpret_cast<const typename ElfArch::Rela*>(rela_section);
+
+ if (!rel_section && !rela_section) {
+ return;
+ }
+
+ // Use either Rel or Rela section, depending on which one exists.
+ size_t section_size = rel_section ? rel_section_info.size
+ : rela_section_info.size;
+ size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel)
+ : sizeof(typename ElfArch::Rela);
+
+ // Determine the number of entries in the dynamic symbol table.
+ ElfReader::SectionInfo dynsym_section_info;
+ const char* dynsym_section =
+ GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info);
+ // The dynsym section might not exist, or it might be empty. In either case
+ // there is nothing to be done so return.
+ if (!dynsym_section || dynsym_section_info.size == 0) {
+ return;
+ }
+ size_t num_dynamic_symbols =
+ dynsym_section_info.size / dynsym_section_info.entsize;
+ symbols_plt_offsets_.resize(num_dynamic_symbols, 0);
+
+ // TODO(dthomson): Can be removed once all Java code is using the
+ // Google3 launcher.
+ // Make storage room for PLT function name strings.
+ plt_function_names_.resize(num_dynamic_symbols);
+
+ for (size_t i = 0; i < section_size / entry_size; ++i) {
+ // Determine symbol index from the |r_info| field.
+ int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info
+ : rela[i].r_info);
+ if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) {
+ continue;
+ }
+ symbols_plt_offsets_[sym_index] =
+ plt_section_info.addr + plt0_size_ + i * plt_code_size_;
+ }
+ }
+
+ // Return an ElfSectionReader for the first section of the given
+ // type by iterating through all section headers. Returns NULL if
+ // the section type is not found.
+ const ElfSectionReader<ElfArch>* GetSectionByType(
+ typename ElfArch::Word section_type) {
+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {
+ if (section_headers_[k].sh_type == section_type) {
+ return GetSection(k);
+ }
+ }
+ return NULL;
+ }
+
+ // Return the name of section "shndx". Returns NULL if the section
+ // is not found.
+ const char* GetSectionNameByIndex(int shndx) {
+ return GetSectionName(section_headers_[shndx].sh_name);
+ }
+
+ // Return a pointer to section "shndx", and store the size in
+ // "size". Returns NULL if the section is not found.
+ const char* GetSectionContentsByIndex(int shndx, size_t* size) {
+ const ElfSectionReader<ElfArch>* section = GetSection(shndx);
+ if (section != NULL) {
+ *size = section->section_size();
+ return section->contents();
+ }
+ return NULL;
+ }
+
+ // Return a pointer to the first section of the given name by
+ // iterating through all section headers, and store the size in
+ // "size". Returns NULL if the section name is not found.
+ const char* GetSectionContentsByName(const string& section_name,
+ size_t* size) {
+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {
+ // When searching for sections in a .dwp file, the sections
+ // we're looking for will always be at the end of the section
+ // table, so reverse the direction of iteration.
+ int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;
+ const char* name = GetSectionName(section_headers_[shndx].sh_name);
+ if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {
+ const ElfSectionReader<ElfArch>* section = GetSection(shndx);
+ if (section == NULL) {
+ return NULL;
+ } else {
+ *size = section->section_size();
+ return section->contents();
+ }
+ }
+ }
+ return NULL;
+ }
+
+ // This is like GetSectionContentsByName() but it returns a lot of extra
+ // information about the section.
+ const char* GetSectionInfoByName(const string& section_name,
+ ElfReader::SectionInfo* info) {
+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {
+ // When searching for sections in a .dwp file, the sections
+ // we're looking for will always be at the end of the section
+ // table, so reverse the direction of iteration.
+ int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;
+ const char* name = GetSectionName(section_headers_[shndx].sh_name);
+ if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {
+ const ElfSectionReader<ElfArch>* section = GetSection(shndx);
+ if (section == NULL) {
+ return NULL;
+ } else {
+ info->type = section->header().sh_type;
+ info->flags = section->header().sh_flags;
+ info->addr = section->header().sh_addr;
+ info->offset = section->header().sh_offset;
+ info->size = section->header().sh_size;
+ info->link = section->header().sh_link;
+ info->info = section->header().sh_info;
+ info->addralign = section->header().sh_addralign;
+ info->entsize = section->header().sh_entsize;
+ return section->contents();
+ }
+ }
+ }
+ return NULL;
+ }
+
+ // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD
+ // segments are present. This is the address an ELF image was linked
+ // (by static linker) to be loaded at. Usually (but not always) 0 for
+ // shared libraries and position-independent executables.
+ uint64_t VaddrOfFirstLoadSegment() const {
+ // Relocatable objects (of type ET_REL) do not have LOAD segments.
+ if (header_.e_type == ET_REL) {
+ return 0;
+ }
+ for (int i = 0; i < GetNumProgramHeaders(); ++i) {
+ if (program_headers_[i].p_type == PT_LOAD) {
+ return program_headers_[i].p_vaddr;
+ }
+ }
+ return 0;
+ }
+
+ // According to the LSB ("ELF special sections"), sections with debug
+ // info are prefixed by ".debug". The names are not specified, but they
+ // look like ".debug_line", ".debug_info", etc.
+ bool HasDebugSections() {
+ // Debug sections are likely to be near the end, so reverse the
+ // direction of iteration.
+ for (int k = GetNumSections() - 1; k >= 0; --k) {
+ const char* name = GetSectionName(section_headers_[k].sh_name);
+ if (strncmp(name, ".debug", strlen(".debug")) == 0) return true;
+ if (strncmp(name, ".zdebug", strlen(".zdebug")) == 0) return true;
+ }
+ return false;
+ }
+
+ bool IsDynamicSharedObject() const {
+ return header_.e_type == ET_DYN;
+ }
+
+ // Return the number of sections.
+ uint64_t GetNumSections() const {
+ if (HasManySections())
+ return first_section_header_.sh_size;
+ return header_.e_shnum;
+ }
+
+ private:
+ typedef vector<pair<uint64_t, const typename ElfArch::Sym*> > AddrToSymMap;
+
+ static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs,
+ const typename AddrToSymMap::value_type& rhs) {
+ return lhs.first < rhs.first;
+ }
+
+ static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs,
+ const typename AddrToSymMap::value_type& rhs) {
+ return lhs.first == rhs.first;
+ }
+
+ // Does this ELF file have too many sections to fit in the program header?
+ bool HasManySections() const {
+ return header_.e_shnum == SHN_UNDEF;
+ }
+
+ // Return the number of program headers.
+ int GetNumProgramHeaders() const {
+ if (HasManySections() && header_.e_phnum == 0xffff &&
+ first_section_header_.sh_info != 0)
+ return first_section_header_.sh_info;
+ return header_.e_phnum;
+ }
+
+ // Return the index of the string table.
+ int GetStringTableIndex() const {
+ if (HasManySections()) {
+ if (header_.e_shstrndx == 0xffff)
+ return first_section_header_.sh_link;
+ else if (header_.e_shstrndx >= GetNumSections())
+ return 0;
+ }
+ return header_.e_shstrndx;
+ }
+
+ // Given an offset into the section header string table, return the
+ // section name.
+ const char* GetSectionName(typename ElfArch::Word sh_name) {
+ const ElfSectionReader<ElfArch>* shstrtab =
+ GetSection(GetStringTableIndex());
+ if (shstrtab != NULL) {
+ return shstrtab->GetOffset(sh_name);
+ }
+ return NULL;
+ }
+
+ // Return an ElfSectionReader for the given section. The reader will
+ // be freed when this object is destroyed.
+ const ElfSectionReader<ElfArch>* GetSection(int num) {
+ const char* name;
+ // Hard-coding the name for the section-name string table prevents
+ // infinite recursion.
+ if (num == GetStringTableIndex())
+ name = ".shstrtab";
+ else
+ name = GetSectionNameByIndex(num);
+ ElfSectionReader<ElfArch>*& reader = sections_[num];
+ if (reader == NULL)
+ reader = new ElfSectionReader<ElfArch>(name, path_, fd_,
+ section_headers_[num]);
+ return reader;
+ }
+
+ // Parse out the overall header information from the file and assert
+ // that it looks sane. This contains information like the magic
+ // number and target architecture.
+ bool ParseHeaders(int fd, const string& path) {
+ // Read in the global ELF header.
+ if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) {
+ return false;
+ }
+
+ // Must be an executable, dynamic shared object or relocatable object
+ if (header_.e_type != ET_EXEC &&
+ header_.e_type != ET_DYN &&
+ header_.e_type != ET_REL) {
+ return false;
+ }
+ // Need a section header.
+ if (header_.e_shoff == 0) {
+ return false;
+ }
+
+ if (header_.e_shnum == SHN_UNDEF) {
+ // The number of sections in the program header is only a 16-bit value. In
+ // the event of overflow (greater than SHN_LORESERVE sections), e_shnum
+ // will read SHN_UNDEF and the true number of section header table entries
+ // is found in the sh_size field of the first section header.
+ // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html
+ if (pread(fd, &first_section_header_, sizeof(first_section_header_),
+ header_.e_shoff) != sizeof(first_section_header_)) {
+ return false;
+ }
+ }
+
+ // Dynamically allocate enough space to store the section headers
+ // and read them out of the file.
+ const int section_headers_size =
+ GetNumSections() * sizeof(*section_headers_);
+ section_headers_ = new typename ElfArch::Shdr[section_headers_size];
+ if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) !=
+ section_headers_size) {
+ return false;
+ }
+
+ // Dynamically allocate enough space to store the program headers
+ // and read them out of the file.
+ //const int program_headers_size =
+ // GetNumProgramHeaders() * sizeof(*program_headers_);
+ program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()];
+
+ // Presize the sections array for efficiency.
+ sections_.resize(GetNumSections(), NULL);
+ return true;
+ }
+
+ // Given the "value" of a function descriptor return the address of the
+ // function (i.e. the dereferenced value). Otherwise return "value".
+ uint64_t AdjustPPC64FunctionDescriptorSymbolValue(uint64_t value) {
+ if (opd_section_ != NULL &&
+ opd_info_.addr <= value &&
+ value < opd_info_.addr + opd_info_.size) {
+ uint64_t offset = value - opd_info_.addr;
+ return (*reinterpret_cast<const uint64_t*>(opd_section_ + offset));
+ }
+ return value;
+ }
+
+ void AdjustSymbolValue(typename ElfArch::Sym* sym) {
+ switch (header_.e_machine) {
+ case EM_ARM:
+ // For ARM architecture, if the LSB of the function symbol offset is set,
+ // it indicates a Thumb function. This bit should not be taken literally.
+ // Clear it.
+ if (ElfArch::Type(sym) == STT_FUNC)
+ sym->st_value = AdjustARMThumbSymbolValue(sym->st_value);
+ break;
+ case EM_386:
+ // No adjustment needed for Intel x86 architecture. However, explicitly
+ // define this case as we use it quite often.
+ break;
+ case EM_PPC64:
+ // PowerPC64 currently has function descriptors as part of the ABI.
+ // Function symbols need to be adjusted accordingly.
+ if (ElfArch::Type(sym) == STT_FUNC)
+ sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);
+ break;
+ default:
+ break;
+ }
+ }
+
+ friend class SymbolIterator<ElfArch>;
+
+ // The file we're reading.
+ const string path_;
+ // Open file descriptor for path_. Not owned by this object.
+ const int fd_;
+
+ // The global header of the ELF file.
+ typename ElfArch::Ehdr header_;
+
+ // The header of the first section. This may be used to supplement the ELF
+ // file header.
+ typename ElfArch::Shdr first_section_header_;
+
+ // Array of GetNumSections() section headers, allocated when we read
+ // in the global header.
+ typename ElfArch::Shdr* section_headers_;
+
+ // Array of GetNumProgramHeaders() program headers, allocated when we read
+ // in the global header.
+ typename ElfArch::Phdr* program_headers_;
+
+ // An array of pointers to ElfSectionReaders. Sections are
+ // mmaped as they're needed and not released until this object is
+ // destroyed.
+ vector<ElfSectionReader<ElfArch>*> sections_;
+
+ // For PowerPC64 we need to keep track of function descriptors when looking up
+ // values for funtion symbols values. Function descriptors are kept in the
+ // .opd section and are dereferenced to find the function address.
+ ElfReader::SectionInfo opd_info_;
+ const char* opd_section_; // Must be checked for NULL before use.
+ int64_t base_for_text_;
+
+ // Read PLT-related sections for the current architecture.
+ bool plts_supported_;
+ // Code size of each PLT function for the current architecture.
+ size_t plt_code_size_;
+ // Size of the special first entry in the .plt section that calls the runtime
+ // loader resolution routine, and that all other entries jump to when doing
+ // lazy symbol binding.
+ size_t plt0_size_;
+
+ // Maps a dynamic symbol index to a PLT offset.
+ // The vector entry index is the dynamic symbol index.
+ std::vector<uint64_t> symbols_plt_offsets_;
+
+ // Container for PLT function name strings. These strings are passed by
+ // reference to SymbolSink::AddSymbol() so they need to be stored somewhere.
+ std::vector<string> plt_function_names_;
+
+ bool visited_relocation_entries_;
+
+ // True if this is a .dwp file.
+ bool is_dwp_;
+};
+
+ElfReader::ElfReader(const string& path)
+ : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) {
+ // linux 2.6.XX kernel can show deleted files like this:
+ // /var/run/nscd/dbYLJYaE (deleted)
+ // and the kernel-supplied vdso and vsyscall mappings like this:
+ // [vdso]
+ // [vsyscall]
+ if (MyHasSuffixString(path, " (deleted)"))
+ return;
+ if (path == "[vdso]")
+ return;
+ if (path == "[vsyscall]")
+ return;
+
+ fd_ = open(path.c_str(), O_RDONLY);
+}
+
+ElfReader::~ElfReader() {
+ if (fd_ != -1)
+ close(fd_);
+ if (impl32_ != NULL)
+ delete impl32_;
+ if (impl64_ != NULL)
+ delete impl64_;
+}
+
+
+// The only word-size specific part of this file is IsNativeElfFile().
+#if ULONG_MAX == 0xffffffff
+#define NATIVE_ELF_ARCH Elf32
+#elif ULONG_MAX == 0xffffffffffffffff
+#define NATIVE_ELF_ARCH Elf64
+#else
+#error "Invalid word size"
+#endif
+
+template <typename ElfArch>
+static bool IsElfFile(const int fd, const string& path) {
+ if (fd < 0)
+ return false;
+ if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) {
+ // No error message here. IsElfFile gets called many times.
+ return false;
+ }
+ return true;
+}
+
+bool ElfReader::IsNativeElfFile() const {
+ return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_);
+}
+
+bool ElfReader::IsElf32File() const {
+ return IsElfFile<Elf32>(fd_, path_);
+}
+
+bool ElfReader::IsElf64File() const {
+ return IsElfFile<Elf64>(fd_, path_);
+}
+
+/*
+void ElfReader::AddSymbols(SymbolMap* symbols,
+ uint64_t mem_offset, uint64_t file_offset,
+ uint64_t length) {
+ if (fd_ < 0)
+ return;
+ // TODO(chatham): Actually use the information about file offset and
+ // the length of the mapped section. On some machines the data
+ // section gets mapped as executable, and we'll end up reading the
+ // file twice and getting some of the offsets wrong.
+ if (IsElf32File()) {
+ GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB,
+ mem_offset, file_offset);
+ GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM,
+ mem_offset, file_offset);
+ } else if (IsElf64File()) {
+ GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB,
+ mem_offset, file_offset);
+ GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM,
+ mem_offset, file_offset);
+ }
+}
+*/
+
+void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink) {
+ VisitSymbols(sink, -1, -1);
+}
+
+void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink,
+ int symbol_binding,
+ int symbol_type) {
+ VisitSymbols(sink, symbol_binding, symbol_type, false);
+}
+
+void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink,
+ int symbol_binding,
+ int symbol_type,
+ bool get_raw_symbol_values) {
+ if (IsElf32File()) {
+ GetImpl32()->VisitRelocationEntries();
+ GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,
+ get_raw_symbol_values);
+ GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,
+ get_raw_symbol_values);
+ } else if (IsElf64File()) {
+ GetImpl64()->VisitRelocationEntries();
+ GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,
+ get_raw_symbol_values);
+ GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,
+ get_raw_symbol_values);
+ }
+}
+
+uint64_t ElfReader::VaddrOfFirstLoadSegment() {
+ if (IsElf32File()) {
+ return GetImpl32()->VaddrOfFirstLoadSegment();
+ } else if (IsElf64File()) {
+ return GetImpl64()->VaddrOfFirstLoadSegment();
+ } else {
+ return 0;
+ }
+}
+
+const char* ElfReader::GetSectionName(int shndx) {
+ if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return NULL;
+ if (IsElf32File()) {
+ return GetImpl32()->GetSectionNameByIndex(shndx);
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetSectionNameByIndex(shndx);
+ } else {
+ return NULL;
+ }
+}
+
+uint64_t ElfReader::GetNumSections() {
+ if (IsElf32File()) {
+ return GetImpl32()->GetNumSections();
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetNumSections();
+ } else {
+ return 0;
+ }
+}
+
+const char* ElfReader::GetSectionByIndex(int shndx, size_t* size) {
+ if (IsElf32File()) {
+ return GetImpl32()->GetSectionContentsByIndex(shndx, size);
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetSectionContentsByIndex(shndx, size);
+ } else {
+ return NULL;
+ }
+}
+
+const char* ElfReader::GetSectionByName(const string& section_name,
+ size_t* size) {
+ if (IsElf32File()) {
+ return GetImpl32()->GetSectionContentsByName(section_name, size);
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetSectionContentsByName(section_name, size);
+ } else {
+ return NULL;
+ }
+}
+
+const char* ElfReader::GetSectionInfoByName(const string& section_name,
+ SectionInfo* info) {
+ if (IsElf32File()) {
+ return GetImpl32()->GetSectionInfoByName(section_name, info);
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetSectionInfoByName(section_name, info);
+ } else {
+ return NULL;
+ }
+}
+
+bool ElfReader::SectionNamesMatch(const string& name, const string& sh_name) {
+ if ((name.find(".debug_", 0) == 0) && (sh_name.find(".zdebug_", 0) == 0)) {
+ const string name_suffix(name, strlen(".debug_"));
+ const string sh_name_suffix(sh_name, strlen(".zdebug_"));
+ return name_suffix == sh_name_suffix;
+ }
+ return name == sh_name;
+}
+
+bool ElfReader::IsDynamicSharedObject() {
+ if (IsElf32File()) {
+ return GetImpl32()->IsDynamicSharedObject();
+ } else if (IsElf64File()) {
+ return GetImpl64()->IsDynamicSharedObject();
+ } else {
+ return false;
+ }
+}
+
+ElfReaderImpl<Elf32>* ElfReader::GetImpl32() {
+ if (impl32_ == NULL) {
+ impl32_ = new ElfReaderImpl<Elf32>(path_, fd_);
+ }
+ return impl32_;
+}
+
+ElfReaderImpl<Elf64>* ElfReader::GetImpl64() {
+ if (impl64_ == NULL) {
+ impl64_ = new ElfReaderImpl<Elf64>(path_, fd_);
+ }
+ return impl64_;
+}
+
+// Return true if file is an ELF binary of ElfArch, with unstripped
+// debug info (debug_only=true) or symbol table (debug_only=false).
+// Otherwise, return false.
+template <typename ElfArch>
+static bool IsNonStrippedELFBinaryImpl(const string& path, const int fd,
+ bool debug_only) {
+ if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false;
+ ElfReaderImpl<ElfArch> elf_reader(path, fd);
+ return debug_only ?
+ elf_reader.HasDebugSections()
+ : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL);
+}
+
+// Helper for the IsNon[Debug]StrippedELFBinary functions.
+static bool IsNonStrippedELFBinaryHelper(const string& path,
+ bool debug_only) {
+ const int fd = open(path.c_str(), O_RDONLY);
+ if (fd == -1) {
+ return false;
+ }
+
+ if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) ||
+ IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) {
+ close(fd);
+ return true;
+ }
+ close(fd);
+ return false;
+}
+
+bool ElfReader::IsNonStrippedELFBinary(const string& path) {
+ return IsNonStrippedELFBinaryHelper(path, false);
+}
+
+bool ElfReader::IsNonDebugStrippedELFBinary(const string& path) {
+ return IsNonStrippedELFBinaryHelper(path, true);
+}
+} // namespace google_breakpad
diff --git a/contrib/libs/breakpad/src/common/dwarf/elf_reader.h b/contrib/libs/breakpad/src/common/dwarf/elf_reader.h
new file mode 100644
index 0000000000..13ac39beb8
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/elf_reader.h
@@ -0,0 +1,167 @@
+// Copyright 2005 Google Inc. All Rights Reserved.
+// Author: chatham@google.com (Andrew Chatham)
+// Author: satorux@google.com (Satoru Takabayashi)
+//
+// ElfReader handles reading in ELF. It can extract symbols from the
+// current process, which may be used to symbolize stack traces
+// without having to make a potentially dangerous call to fork().
+//
+// ElfReader dynamically allocates memory, so it is not appropriate to
+// use once the address space might be corrupted, such as during
+// process death.
+//
+// ElfReader supports both 32-bit and 64-bit ELF binaries.
+
+#ifndef COMMON_DWARF_ELF_READER_H__
+#define COMMON_DWARF_ELF_READER_H__
+
+#include <string>
+#include <vector>
+
+#include "common/dwarf/types.h"
+#include "common/using_std_string.h"
+
+using std::vector;
+using std::pair;
+
+namespace google_breakpad {
+
+class SymbolMap;
+class Elf32;
+class Elf64;
+template<typename ElfArch>
+class ElfReaderImpl;
+
+class ElfReader {
+ public:
+ explicit ElfReader(const string& path);
+ ~ElfReader();
+
+ // Parse the ELF prologue of this file and return whether it was
+ // successfully parsed and matches the word size and byte order of
+ // the current process.
+ bool IsNativeElfFile() const;
+
+ // Similar to IsNativeElfFile but checks if it's a 32-bit ELF file.
+ bool IsElf32File() const;
+
+ // Similar to IsNativeElfFile but checks if it's a 64-bit ELF file.
+ bool IsElf64File() const;
+
+ // Checks if it's an ELF file of type ET_DYN (shared object file).
+ bool IsDynamicSharedObject();
+
+ // Add symbols in the given ELF file into the provided SymbolMap,
+ // assuming that the file has been loaded into the specified
+ // offset.
+ //
+ // The remaining arguments are typically taken from a
+ // ProcMapsIterator (base/sysinfo.h) and describe which portions of
+ // the ELF file are mapped into which parts of memory:
+ //
+ // mem_offset - position at which the segment is mapped into memory
+ // file_offset - offset in the file where the mapping begins
+ // length - length of the mapped segment
+ void AddSymbols(SymbolMap* symbols,
+ uint64_t mem_offset, uint64_t file_offset,
+ uint64_t length);
+
+ class SymbolSink {
+ public:
+ virtual ~SymbolSink() {}
+ virtual void AddSymbol(const char* name, uint64_t address,
+ uint64_t size) = 0;
+ };
+
+ // Like AddSymbols above, but with no address correction.
+ // Processes any SHT_SYMTAB section, followed by any SHT_DYNSYM section.
+ void VisitSymbols(SymbolSink* sink);
+
+ // Like VisitSymbols above, but for a specific symbol binding/type.
+ // A negative value for the binding and type parameters means any
+ // binding or type.
+ void VisitSymbols(SymbolSink* sink, int symbol_binding, int symbol_type);
+
+ // Like VisitSymbols above but can optionally export raw symbol values instead
+ // of adjusted ones.
+ void VisitSymbols(SymbolSink* sink, int symbol_binding, int symbol_type,
+ bool get_raw_symbol_values);
+
+ // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD
+ // segments are present. This is the address an ELF image was linked
+ // (by static linker) to be loaded at. Usually (but not always) 0 for
+ // shared libraries and position-independent executables.
+ uint64_t VaddrOfFirstLoadSegment();
+
+ // Return the name of section "shndx". Returns NULL if the section
+ // is not found.
+ const char* GetSectionName(int shndx);
+
+ // Return the number of sections in the given ELF file.
+ uint64_t GetNumSections();
+
+ // Get section "shndx" from the given ELF file. On success, return
+ // the pointer to the section and store the size in "size".
+ // On error, return NULL. The returned section data is only valid
+ // until the ElfReader gets destroyed.
+ const char* GetSectionByIndex(int shndx, size_t* size);
+
+ // Get section with "section_name" (ex. ".text", ".symtab") in the
+ // given ELF file. On success, return the pointer to the section
+ // and store the size in "size". On error, return NULL. The
+ // returned section data is only valid until the ElfReader gets
+ // destroyed.
+ const char* GetSectionByName(const string& section_name, size_t* size);
+
+ // This is like GetSectionByName() but it returns a lot of extra information
+ // about the section. The SectionInfo structure is almost identical to
+ // the typedef struct Elf64_Shdr defined in <elf.h>, but is redefined
+ // here so that the many short macro names in <elf.h> don't have to be
+ // added to our already cluttered namespace.
+ struct SectionInfo {
+ uint32_t type; // Section type (SHT_xxx constant from elf.h).
+ uint64_t flags; // Section flags (SHF_xxx constants from elf.h).
+ uint64_t addr; // Section virtual address at execution.
+ uint64_t offset; // Section file offset.
+ uint64_t size; // Section size in bytes.
+ uint32_t link; // Link to another section.
+ uint32_t info; // Additional section information.
+ uint64_t addralign; // Section alignment.
+ uint64_t entsize; // Entry size if section holds a table.
+ };
+ const char* GetSectionInfoByName(const string& section_name,
+ SectionInfo* info);
+
+ // Check if "path" is an ELF binary that has not been stripped of symbol
+ // tables. This function supports both 32-bit and 64-bit ELF binaries.
+ static bool IsNonStrippedELFBinary(const string& path);
+
+ // Check if "path" is an ELF binary that has not been stripped of debug
+ // info. Unlike IsNonStrippedELFBinary, this function will return
+ // false for binaries passed through "strip -S".
+ static bool IsNonDebugStrippedELFBinary(const string& path);
+
+ // Match a requested section name with the section name as it
+ // appears in the elf-file, adjusting for compressed debug section
+ // names. For example, returns true if name == ".debug_abbrev" and
+ // sh_name == ".zdebug_abbrev"
+ static bool SectionNamesMatch(const string& name, const string& sh_name);
+
+ private:
+ // Lazily initialize impl32_ and return it.
+ ElfReaderImpl<Elf32>* GetImpl32();
+ // Ditto for impl64_.
+ ElfReaderImpl<Elf64>* GetImpl64();
+
+ // Path of the file we're reading.
+ const string path_;
+ // Read-only file descriptor for the file. May be -1 if there was an
+ // error during open.
+ int fd_;
+ ElfReaderImpl<Elf32>* impl32_;
+ ElfReaderImpl<Elf64>* impl64_;
+};
+
+} // namespace google_breakpad
+
+#endif // COMMON_DWARF_ELF_READER_H__
diff --git a/contrib/libs/breakpad/src/common/dwarf/line_state_machine.h b/contrib/libs/breakpad/src/common/dwarf/line_state_machine.h
new file mode 100644
index 0000000000..b0f3f4907d
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/line_state_machine.h
@@ -0,0 +1,63 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#ifndef COMMON_DWARF_LINE_STATE_MACHINE_H__
+#define COMMON_DWARF_LINE_STATE_MACHINE_H__
+
+#include <stdint.h>
+
+namespace google_breakpad {
+
+// This is the format of a DWARF2/3 line state machine that we process
+// opcodes using. There is no need for anything outside the lineinfo
+// processor to know how this works.
+struct LineStateMachine {
+ void Reset(bool default_is_stmt) {
+ file_num = 1;
+ address = 0;
+ line_num = 1;
+ column_num = 0;
+ is_stmt = default_is_stmt;
+ basic_block = false;
+ end_sequence = false;
+ }
+
+ uint32_t file_num;
+ uint64_t address;
+ uint32_t line_num;
+ uint32_t column_num;
+ bool is_stmt; // stmt means statement.
+ bool basic_block;
+ bool end_sequence;
+};
+
+} // namespace google_breakpad
+
+
+#endif // COMMON_DWARF_LINE_STATE_MACHINE_H__
diff --git a/contrib/libs/breakpad/src/common/dwarf/types.h b/contrib/libs/breakpad/src/common/dwarf/types.h
new file mode 100644
index 0000000000..23412d0eaa
--- /dev/null
+++ b/contrib/libs/breakpad/src/common/dwarf/types.h
@@ -0,0 +1,41 @@
+// Copyright 2008 Google, Inc. All Rights reserved
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// This file contains some typedefs for basic types
+
+
+#ifndef _COMMON_DWARF_TYPES_H__
+#define _COMMON_DWARF_TYPES_H__
+
+#include <stdint.h>
+
+typedef intptr_t intptr;
+typedef uintptr_t uintptr;
+
+#endif // _COMMON_DWARF_TYPES_H__