diff options
author | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
---|---|---|
committer | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /contrib/libs/llvm12/lib/DebugInfo/GSYM | |
parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) | |
download | ydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz |
fix ya.make
Diffstat (limited to 'contrib/libs/llvm12/lib/DebugInfo/GSYM')
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 572 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/FileWriter.cpp | 78 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/FunctionInfo.cpp | 254 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/GsymCreator.cpp | 320 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/GsymReader.cpp | 406 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/Header.cpp | 109 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/InlineInfo.cpp | 265 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/LineTable.cpp | 293 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/LookupResult.cpp | 74 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp | 116 | ||||
-rw-r--r-- | contrib/libs/llvm12/lib/DebugInfo/GSYM/Range.cpp | 124 |
11 files changed, 2611 insertions, 0 deletions
diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/DwarfTransformer.cpp new file mode 100644 index 0000000000..1e527ab391 --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -0,0 +1,572 @@ +//===- DwarfTransformer.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <thread> +#include <unordered_set> + +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" + +using namespace llvm; +using namespace gsym; + +struct llvm::gsym::CUInfo { + const DWARFDebugLine::LineTable *LineTable; + const char *CompDir; + std::vector<uint32_t> FileCache; + uint64_t Language = 0; + uint8_t AddrSize = 0; + + CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { + LineTable = DICtx.getLineTableForUnit(CU); + CompDir = CU->getCompilationDir(); + FileCache.clear(); + if (LineTable) + FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); + DWARFDie Die = CU->getUnitDIE(); + Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); + AddrSize = CU->getAddressByteSize(); + } + + /// Return true if Addr is the highest address for a given compile unit. The + /// highest address is encoded as -1, of all ones in the address. These high + /// addresses are used by some linkers to indicate that a function has been + /// dead stripped or didn't end up in the linked executable. + bool isHighestAddress(uint64_t Addr) const { + if (AddrSize == 4) + return Addr == UINT32_MAX; + else if (AddrSize == 8) + return Addr == UINT64_MAX; + return false; + } + + /// Convert a DWARF compile unit file index into a GSYM global file index. + /// + /// Each compile unit in DWARF has its own file table in the line table + /// prologue. GSYM has a single large file table that applies to all files + /// from all of the info in a GSYM file. This function converts between the + /// two and caches and DWARF CU file index that has already been converted so + /// the first client that asks for a compile unit file index will end up + /// doing the conversion, and subsequent clients will get the cached GSYM + /// index. + uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) { + if (!LineTable) + return 0; + assert(DwarfFileIdx < FileCache.size()); + uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; + if (GsymFileIdx != UINT32_MAX) + return GsymFileIdx; + std::string File; + if (LineTable->getFileNameByIndex( + DwarfFileIdx, CompDir, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) + GsymFileIdx = Gsym.insertFile(File); + else + GsymFileIdx = 0; + return GsymFileIdx; + } +}; + + +static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { + if (DWARFDie SpecDie = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { + if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) + return SpecParent; + } + if (DWARFDie AbstDie = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { + if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) + return AbstParent; + } + + // We never want to follow parent for inlined subroutine - that would + // give us information about where the function is inlined, not what + // function is inlined + if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) + return DWARFDie(); + + DWARFDie ParentDie = Die.getParent(); + if (!ParentDie) + return DWARFDie(); + + switch (ParentDie.getTag()) { + case dwarf::DW_TAG_namespace: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_subprogram: + return ParentDie; // Found parent decl context DIE + case dwarf::DW_TAG_lexical_block: + return GetParentDeclContextDIE(ParentDie); + default: + break; + } + + return DWARFDie(); +} + +/// Get the GsymCreator string table offset for the qualified name for the +/// DIE passed in. This function will avoid making copies of any strings in +/// the GsymCreator when possible. We don't need to copy a string when the +/// string comes from our .debug_str section or is an inlined string in the +/// .debug_info. If we create a qualified name string in this function by +/// combining multiple strings in the DWARF string table or info, we will make +/// a copy of the string when we add it to the string table. +static Optional<uint32_t> getQualifiedNameIndex(DWARFDie &Die, + uint64_t Language, + GsymCreator &Gsym) { + // If the dwarf has mangled name, use mangled name + if (auto LinkageName = + dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_AT_linkage_name}), + nullptr)) + return Gsym.insertString(LinkageName, /* Copy */ false); + + StringRef ShortName(Die.getName(DINameKind::ShortName)); + if (ShortName.empty()) + return llvm::None; + + // For C++ and ObjC, prepend names of all parent declaration contexts + if (!(Language == dwarf::DW_LANG_C_plus_plus || + Language == dwarf::DW_LANG_C_plus_plus_03 || + Language == dwarf::DW_LANG_C_plus_plus_11 || + Language == dwarf::DW_LANG_C_plus_plus_14 || + Language == dwarf::DW_LANG_ObjC_plus_plus || + // This should not be needed for C, but we see C++ code marked as C + // in some binaries. This should hurt, so let's do it for C as well + Language == dwarf::DW_LANG_C)) + return Gsym.insertString(ShortName, /* Copy */ false); + + // Some GCC optimizations create functions with names ending with .isra.<num> + // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name + // If it looks like it could be the case, don't add any prefix + if (ShortName.startswith("_Z") && + (ShortName.contains(".isra.") || ShortName.contains(".part."))) + return Gsym.insertString(ShortName, /* Copy */ false); + + DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); + if (ParentDeclCtxDie) { + std::string Name = ShortName.str(); + while (ParentDeclCtxDie) { + StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); + if (!ParentName.empty()) { + // "lambda" names are wrapped in < >. Replace with { } + // to be consistent with demangled names and not to confuse with + // templates + if (ParentName.front() == '<' && ParentName.back() == '>') + Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + + "::" + Name; + else + Name = ParentName.str() + "::" + Name; + } + ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); + } + // Copy the name since we created a new name in a std::string. + return Gsym.insertString(Name, /* Copy */ true); + } + // Don't copy the name since it exists in the DWARF object file. + return Gsym.insertString(ShortName, /* Copy */ false); +} + +static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { + bool CheckChildren = true; + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: + // Don't look into functions within functions. + CheckChildren = Depth == 0; + break; + case dwarf::DW_TAG_inlined_subroutine: + return true; + default: + break; + } + if (!CheckChildren) + return false; + for (DWARFDie ChildDie : Die.children()) { + if (hasInlineInfo(ChildDie, Depth + 1)) + return true; + } + return false; +} + +static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die, + uint32_t Depth, FunctionInfo &FI, + InlineInfo &parent) { + if (!hasInlineInfo(Die, Depth)) + return; + + dwarf::Tag Tag = Die.getTag(); + if (Tag == dwarf::DW_TAG_inlined_subroutine) { + // create new InlineInfo and append to parent.children + InlineInfo II; + DWARFAddressRange FuncRange = + DWARFAddressRange(FI.startAddress(), FI.endAddress()); + Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); + if (RangesOrError) { + for (const DWARFAddressRange &Range : RangesOrError.get()) { + // Check that the inlined function is within the range of the function + // info, it might not be in case of split functions + if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC) + II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC)); + } + } + if (II.Ranges.empty()) + return; + + if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) + II.Name = *NameIndex; + II.CallFile = CUI.DWARFToGSYMFileIndex( + Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0)); + II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); + // parse all children and append to parent + for (DWARFDie ChildDie : Die.children()) + parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II); + parent.Children.emplace_back(std::move(II)); + return; + } + if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { + // skip this Die and just recurse down + for (DWARFDie ChildDie : Die.children()) + parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent); + } +} + +static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI, + DWARFDie Die, GsymCreator &Gsym, + FunctionInfo &FI) { + std::vector<uint32_t> RowVector; + const uint64_t StartAddress = FI.startAddress(); + const uint64_t EndAddress = FI.endAddress(); + const uint64_t RangeSize = EndAddress - StartAddress; + const object::SectionedAddress SecAddress{ + StartAddress, object::SectionedAddress::UndefSection}; + + + if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) { + // If we have a DW_TAG_subprogram but no line entries, fall back to using + // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. + if (auto FileIdx = + dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) { + if (auto Line = + dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { + LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx), + *Line); + FI.OptLineTable = LineTable(); + FI.OptLineTable->push(LE); + // LE.Addr = EndAddress; + // FI.OptLineTable->push(LE); + } + } + return; + } + + FI.OptLineTable = LineTable(); + DWARFDebugLine::Row PrevRow; + for (uint32_t RowIndex : RowVector) { + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; + const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File); + uint64_t RowAddress = Row.Address.Address; + // Watch out for a RowAddress that is in the middle of a line table entry + // in the DWARF. If we pass an address in between two line table entries + // we will get a RowIndex for the previous valid line table row which won't + // be contained in our function. This is usually a bug in the DWARF due to + // linker problems or LTO or other DWARF re-linking so it is worth emitting + // an error, but not worth stopping the creation of the GSYM. + if (!FI.Range.contains(RowAddress)) { + if (RowAddress < FI.Range.Start) { + Log << "error: DIE has a start address whose LowPC is between the " + "line table Row[" << RowIndex << "] with address " + << HEX64(RowAddress) << " and the next one.\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + RowAddress = FI.Range.Start; + } else { + continue; + } + } + + LineEntry LE(RowAddress, FileIdx, Row.Line); + if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { + // We have seen full duplicate line tables for functions in some + // DWARF files. Watch for those here by checking the the last + // row was the function's end address (HighPC) and that the + // current line table entry's address is the same as the first + // line entry we already have in our "function_info.Lines". If + // so break out after printing a warning. + auto FirstLE = FI.OptLineTable->first(); + if (FirstLE && *FirstLE == LE) { + Log << "warning: duplicate line table detected for DIE:\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } else { + // Print out (ignore if os == nulls as this is expensive) + Log << "error: line table has addresses that do not " + << "monotonically increase:\n"; + for (uint32_t RowIndex2 : RowVector) { + CUI.LineTable->Rows[RowIndex2].dump(Log); + } + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + + // Skip multiple line entries for the same file and line. + auto LastLE = FI.OptLineTable->last(); + if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) + continue; + // Only push a row if it isn't an end sequence. End sequence markers are + // included for the last address in a function or the last contiguous + // address in a sequence. + if (Row.EndSequence) { + // End sequence means that the next line entry could have a lower address + // that the previous entries. So we clear the previous row so we don't + // trigger the line table error about address that do not monotonically + // increase. + PrevRow = DWARFDebugLine::Row(); + } else { + FI.OptLineTable->push(LE); + PrevRow = Row; + } + } + // If not line table rows were added, clear the line table so we don't encode + // on in the GSYM file. + if (FI.OptLineTable->empty()) + FI.OptLineTable = llvm::None; +} + +void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: { + Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) { + consumeError(RangesOrError.takeError()); + break; + } + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + if (Ranges.empty()) + break; + auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); + if (!NameIndex) { + OS << "error: function at " << HEX64(Die.getOffset()) + << " has no name\n "; + Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); + break; + } + + // Create a function_info for each range + for (const DWARFAddressRange &Range : Ranges) { + // The low PC must be less than the high PC. Many linkers don't remove + // DWARF for functions that don't get linked into the final executable. + // If both the high and low pc have relocations, linkers will often set + // the address values for both to the same value to indicate the function + // has been remove. Other linkers have been known to set the one or both + // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 + // byte addresses to indicate the function isn't valid. The check below + // tries to watch for these cases and abort if it runs into them. + if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) + break; + + // Many linkers can't remove DWARF and might set the LowPC to zero. Since + // high PC can be an offset from the low PC in more recent DWARF versions + // we need to watch for a zero'ed low pc which we do using + // ValidTextRanges below. + if (!Gsym.IsValidTextAddress(Range.LowPC)) { + // We expect zero and -1 to be invalid addresses in DWARF depending + // on the linker of the DWARF. This indicates a function was stripped + // and the debug info wasn't able to be stripped from the DWARF. If + // the LowPC isn't zero or -1, then we should emit an error. + if (Range.LowPC != 0) { + // Unexpected invalid address, emit an error + Log << "warning: DIE has an address range whose start address is " + "not in any executable sections (" << + *Gsym.GetValidTextRanges() << ") and will not be processed:\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + + FunctionInfo FI; + FI.setStartAddress(Range.LowPC); + FI.setEndAddress(Range.HighPC); + FI.Name = *NameIndex; + if (CUI.LineTable) { + convertFunctionLineTable(OS, CUI, Die, Gsym, FI); + } + if (hasInlineInfo(Die, 0)) { + FI.Inline = InlineInfo(); + FI.Inline->Name = *NameIndex; + FI.Inline->Ranges.insert(FI.Range); + parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline); + } + Gsym.addFunctionInfo(std::move(FI)); + } + } break; + default: + break; + } + for (DWARFDie ChildDie : Die.children()) + handleDie(OS, CUI, ChildDie); +} + +Error DwarfTransformer::convert(uint32_t NumThreads) { + size_t NumBefore = Gsym.getNumFunctionInfos(); + if (NumThreads == 1) { + // Parse all DWARF data from this thread, use the same string/file table + // for everything + for (const auto &CU : DICtx.compile_units()) { + DWARFDie Die = CU->getUnitDIE(false); + CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); + handleDie(Log, CUI, Die); + } + } else { + // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up + // front before we start accessing any DIEs since there might be + // cross compile unit references in the DWARF. If we don't do this we can + // end up crashing. + + // We need to call getAbbreviations sequentially first so that getUnitDIE() + // only works with its local data. + for (const auto &CU : DICtx.compile_units()) + CU->getAbbreviations(); + + // Now parse all DIEs in case we have cross compile unit references in a + // thread pool. + ThreadPool pool(hardware_concurrency(NumThreads)); + for (const auto &CU : DICtx.compile_units()) + pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); + pool.wait(); + + // Now convert all DWARF to GSYM in a thread pool. + std::mutex LogMutex; + for (const auto &CU : DICtx.compile_units()) { + DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); + if (Die) { + CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); + pool.async([this, CUI, &LogMutex, Die]() mutable { + std::string ThreadLogStorage; + raw_string_ostream ThreadOS(ThreadLogStorage); + handleDie(ThreadOS, CUI, Die); + ThreadOS.flush(); + if (!ThreadLogStorage.empty()) { + // Print ThreadLogStorage lines into an actual stream under a lock + std::lock_guard<std::mutex> guard(LogMutex); + Log << ThreadLogStorage; + } + }); + } + } + pool.wait(); + } + size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; + return Error::success(); +} + +llvm::Error DwarfTransformer::verify(StringRef GsymPath) { + Log << "Verifying GSYM file \"" << GsymPath << "\":\n"; + + auto Gsym = GsymReader::openFile(GsymPath); + if (!Gsym) + return Gsym.takeError(); + + auto NumAddrs = Gsym->getNumAddresses(); + DILineInfoSpecifier DLIS( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + DILineInfoSpecifier::FunctionNameKind::LinkageName); + std::string gsymFilename; + for (uint32_t I = 0; I < NumAddrs; ++I) { + auto FuncAddr = Gsym->getAddress(I); + if (!FuncAddr) + return createStringError(std::errc::invalid_argument, + "failed to extract address[%i]", I); + + auto FI = Gsym->getFunctionInfo(*FuncAddr); + if (!FI) + return createStringError(std::errc::invalid_argument, + "failed to extract function info for address 0x%" + PRIu64, *FuncAddr); + + for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { + const object::SectionedAddress SectAddr{ + Addr, object::SectionedAddress::UndefSection}; + auto LR = Gsym->lookup(Addr); + if (!LR) + return LR.takeError(); + + auto DwarfInlineInfos = + DICtx.getInliningInfoForAddress(SectAddr, DLIS); + uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); + if (NumDwarfInlineInfos == 0) { + DwarfInlineInfos.addFrame( + DICtx.getLineInfoForAddress(SectAddr, DLIS)); + } + + // Check for 1 entry that has no file and line info + if (NumDwarfInlineInfos == 1 && + DwarfInlineInfos.getFrame(0).FileName == "<invalid>") { + DwarfInlineInfos = DIInliningInfo(); + NumDwarfInlineInfos = 0; + } + if (NumDwarfInlineInfos > 0 && + NumDwarfInlineInfos != LR->Locations.size()) { + Log << "error: address " << HEX64(Addr) << " has " + << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " + << LR->Locations.size() << "\n"; + Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; + for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { + const auto dii = DwarfInlineInfos.getFrame(Idx); + Log << " [" << Idx << "]: " << dii.FunctionName << " @ " + << dii.FileName << ':' << dii.Line << '\n'; + } + Log << " " << LR->Locations.size() << " GSYM frames:\n"; + for (size_t Idx = 0, count = LR->Locations.size(); + Idx < count; ++Idx) { + const auto &gii = LR->Locations[Idx]; + Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir + << '/' << gii.Base << ':' << gii.Line << '\n'; + } + DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); + Gsym->dump(Log, *FI); + continue; + } + + for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; + ++Idx) { + const auto &gii = LR->Locations[Idx]; + if (Idx < NumDwarfInlineInfos) { + const auto dii = DwarfInlineInfos.getFrame(Idx); + gsymFilename = LR->getSourceFile(Idx); + // Verify function name + if (dii.FunctionName.find(gii.Name.str()) != 0) + Log << "error: address " << HEX64(Addr) << " DWARF function \"" + << dii.FunctionName.c_str() + << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; + // Verify source file path + if (dii.FileName != gsymFilename) + Log << "error: address " << HEX64(Addr) << " DWARF path \"" + << dii.FileName.c_str() << "\" doesn't match GSYM path \"" + << gsymFilename.c_str() << "\"\n"; + // Verify source file line + if (dii.Line != gii.Line) + Log << "error: address " << HEX64(Addr) << " DWARF line " + << dii.Line << " != GSYM line " << gii.Line << "\n"; + } + } + } + } + return Error::success(); +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/FileWriter.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/FileWriter.cpp new file mode 100644 index 0000000000..4b30dcb60a --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/FileWriter.cpp @@ -0,0 +1,78 @@ +//===- FileWriter.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> + +using namespace llvm; +using namespace gsym; + +FileWriter::~FileWriter() { OS.flush(); } + +void FileWriter::writeSLEB(int64_t S) { + uint8_t Bytes[32]; + auto Length = encodeSLEB128(S, Bytes); + assert(Length < sizeof(Bytes)); + OS.write(reinterpret_cast<const char *>(Bytes), Length); +} + +void FileWriter::writeULEB(uint64_t U) { + uint8_t Bytes[32]; + auto Length = encodeULEB128(U, Bytes); + assert(Length < sizeof(Bytes)); + OS.write(reinterpret_cast<const char *>(Bytes), Length); +} + +void FileWriter::writeU8(uint8_t U) { + OS.write(reinterpret_cast<const char *>(&U), sizeof(U)); +} + +void FileWriter::writeU16(uint16_t U) { + const uint16_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast<const char *>(&Swapped), sizeof(Swapped)); +} + +void FileWriter::writeU32(uint32_t U) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast<const char *>(&Swapped), sizeof(Swapped)); +} + +void FileWriter::writeU64(uint64_t U) { + const uint64_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast<const char *>(&Swapped), sizeof(Swapped)); +} + +void FileWriter::fixup32(uint32_t U, uint64_t Offset) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.pwrite(reinterpret_cast<const char *>(&Swapped), sizeof(Swapped), + Offset); +} + +void FileWriter::writeData(llvm::ArrayRef<uint8_t> Data) { + OS.write(reinterpret_cast<const char *>(Data.data()), Data.size()); +} + +void FileWriter::writeNullTerminated(llvm::StringRef Str) { + OS << Str << '\0'; +} + +uint64_t FileWriter::tell() { + return OS.tell(); +} + +void FileWriter::alignTo(size_t Align) { + off_t Offset = OS.tell(); + off_t AlignedOffset = (Offset + Align - 1) / Align * Align; + if (AlignedOffset == Offset) + return; + off_t PadCount = AlignedOffset - Offset; + OS.write_zeros(PadCount); +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/FunctionInfo.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/FunctionInfo.cpp new file mode 100644 index 0000000000..cef1b9498c --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -0,0 +1,254 @@ +//===- FunctionInfo.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/Support/DataExtractor.h" + +using namespace llvm; +using namespace gsym; + +/// FunctionInfo information type that is used to encode the optional data +/// that is associated with a FunctionInfo object. +enum InfoType : uint32_t { + EndOfList = 0u, + LineTableInfo = 1u, + InlineInfo = 2u +}; + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { + OS << FI.Range << ": " << "Name=" << HEX32(FI.Name) << '\n'; + if (FI.OptLineTable) + OS << FI.OptLineTable << '\n'; + if (FI.Inline) + OS << FI.Inline << '\n'; + return OS; +} + +llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data, + uint64_t BaseAddr) { + FunctionInfo FI; + FI.Range.Start = BaseAddr; + uint64_t Offset = 0; + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset); + FI.Range.End = FI.Range.Start + Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset); + FI.Name = Data.getU32(&Offset); + if (FI.Name == 0) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x", + Offset - 4, FI.Name); + bool Done = false; + while (!Done) { + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo InfoType value", Offset); + const uint32_t IT = Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo InfoType length", Offset); + const uint32_t InfoLength = Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, InfoLength)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u", + Offset, IT); + DataExtractor InfoData(Data.getData().substr(Offset, InfoLength), + Data.isLittleEndian(), + Data.getAddressSize()); + switch (IT) { + case InfoType::EndOfList: + Done = true; + break; + + case InfoType::LineTableInfo: + if (Expected<LineTable> LT = LineTable::decode(InfoData, BaseAddr)) + FI.OptLineTable = std::move(LT.get()); + else + return LT.takeError(); + break; + + case InfoType::InlineInfo: + if (Expected<InlineInfo> II = InlineInfo::decode(InfoData, BaseAddr)) + FI.Inline = std::move(II.get()); + else + return II.takeError(); + break; + + default: + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": unsupported InfoType %u", + Offset-8, IT); + } + Offset += InfoLength; + } + return std::move(FI); +} + +llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const { + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid FunctionInfo object"); + // Align FunctionInfo data to a 4 byte alignment. + O.alignTo(4); + const uint64_t FuncInfoOffset = O.tell(); + // Write the size in bytes of this function as a uint32_t. This can be zero + // if we just have a symbol from a symbol table and that symbol has no size. + O.writeU32(size()); + // Write the name of this function as a uint32_t string table offset. + O.writeU32(Name); + + if (OptLineTable.hasValue()) { + O.writeU32(InfoType::LineTableInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the LineTable out with the number of bytes that were written. + O.writeU32(0); + const auto StartOffset = O.tell(); + llvm::Error err = OptLineTable->encode(O, Range.Start); + if (err) + return std::move(err); + const auto Length = O.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "LineTable length is greater than UINT32_MAX"); + // Fixup the size of the LineTable data with the correct size. + O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4); + } + + // Write out the inline function info if we have any and if it is valid. + if (Inline.hasValue()) { + O.writeU32(InfoType::InlineInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the LineTable out with the number of bytes that were written. + O.writeU32(0); + const auto StartOffset = O.tell(); + llvm::Error err = Inline->encode(O, Range.Start); + if (err) + return std::move(err); + const auto Length = O.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "InlineInfo length is greater than UINT32_MAX"); + // Fixup the size of the InlineInfo data with the correct size. + O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4); + } + + // Terminate the data chunks with and end of list with zero size + O.writeU32(InfoType::EndOfList); + O.writeU32(0); + return FuncInfoOffset; +} + + +llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data, + const GsymReader &GR, + uint64_t FuncAddr, + uint64_t Addr) { + LookupResult LR; + LR.LookupAddr = Addr; + LR.FuncRange.Start = FuncAddr; + uint64_t Offset = 0; + LR.FuncRange.End = FuncAddr + Data.getU32(&Offset); + uint32_t NameOffset = Data.getU32(&Offset); + // The "lookup" functions doesn't report errors as accurately as the "decode" + // function as it is meant to be fast. For more accurage errors we could call + // "decode". + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "FunctionInfo data is truncated"); + // This function will be called with the result of a binary search of the + // address table, we must still make sure the address does not fall into a + // gap between functions are after the last function. + if (LR.FuncRange.size() > 0 && !LR.FuncRange.contains(Addr)) + return createStringError(std::errc::io_error, + "address 0x%" PRIx64 " is not in GSYM", Addr); + + if (NameOffset == 0) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000", + Offset - 4); + LR.FuncName = GR.getString(NameOffset); + bool Done = false; + Optional<LineEntry> LineEntry; + Optional<DataExtractor> InlineInfoData; + while (!Done) { + if (!Data.isValidOffsetForDataOfSize(Offset, 8)) + return createStringError(std::errc::io_error, + "FunctionInfo data is truncated"); + const uint32_t IT = Data.getU32(&Offset); + const uint32_t InfoLength = Data.getU32(&Offset); + const StringRef InfoBytes = Data.getData().substr(Offset, InfoLength); + if (InfoLength != InfoBytes.size()) + return createStringError(std::errc::io_error, + "FunctionInfo data is truncated"); + DataExtractor InfoData(InfoBytes, Data.isLittleEndian(), + Data.getAddressSize()); + switch (IT) { + case InfoType::EndOfList: + Done = true; + break; + + case InfoType::LineTableInfo: + if (auto ExpectedLE = LineTable::lookup(InfoData, FuncAddr, Addr)) + LineEntry = ExpectedLE.get(); + else + return ExpectedLE.takeError(); + break; + + case InfoType::InlineInfo: + // We will parse the inline info after our line table, but only if + // we have a line entry. + InlineInfoData = InfoData; + break; + + default: + break; + } + Offset += InfoLength; + } + + if (!LineEntry) { + // We don't have a valid line entry for our address, fill in our source + // location as best we can and return. + SourceLocation SrcLoc; + SrcLoc.Name = LR.FuncName; + SrcLoc.Offset = Addr - FuncAddr; + LR.Locations.push_back(SrcLoc); + return LR; + } + + Optional<FileEntry> LineEntryFile = GR.getFile(LineEntry->File); + if (!LineEntryFile) + return createStringError(std::errc::invalid_argument, + "failed to extract file[%" PRIu32 "]", + LineEntry->File); + + SourceLocation SrcLoc; + SrcLoc.Name = LR.FuncName; + SrcLoc.Offset = Addr - FuncAddr; + SrcLoc.Dir = GR.getString(LineEntryFile->Dir); + SrcLoc.Base = GR.getString(LineEntryFile->Base); + SrcLoc.Line = LineEntry->Line; + LR.Locations.push_back(SrcLoc); + // If we don't have inline information, we are done. + if (!InlineInfoData) + return LR; + // We have inline information. Try to augment the lookup result with this + // data. + llvm::Error Err = InlineInfo::lookup(GR, *InlineInfoData, FuncAddr, Addr, + LR.Locations); + if (Err) + return std::move(Err); + return LR; +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/GsymCreator.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/GsymCreator.cpp new file mode 100644 index 0000000000..2001478e80 --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,320 @@ +//===- GsymCreator.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include <algorithm> +#include <cassert> +#include <functional> +#include <vector> + +using namespace llvm; +using namespace gsym; + + +GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { + insertFile(StringRef()); +} + +uint32_t GsymCreator::insertFile(StringRef Path, + llvm::sys::path::Style Style) { + llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); + llvm::StringRef filename = llvm::sys::path::filename(Path, Style); + // We must insert the strings first, then call the FileEntry constructor. + // If we inline the insertString() function call into the constructor, the + // call order is undefined due to parameter lists not having any ordering + // requirements. + const uint32_t Dir = insertString(directory); + const uint32_t Base = insertString(filename); + FileEntry FE(Dir, Base); + + std::lock_guard<std::recursive_mutex> Guard(Mutex); + const auto NextIndex = Files.size(); + // Find FE in hash map and insert if not present. + auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); + if (R.second) + Files.emplace_back(FE); + return R.first->second; +} + +llvm::Error GsymCreator::save(StringRef Path, + llvm::support::endianness ByteOrder) const { + std::error_code EC; + raw_fd_ostream OutStrm(Path, EC); + if (EC) + return llvm::errorCodeToError(EC); + FileWriter O(OutStrm, ByteOrder); + return encode(O); +} + +llvm::Error GsymCreator::encode(FileWriter &O) const { + std::lock_guard<std::recursive_mutex> Guard(Mutex); + if (Funcs.empty()) + return createStringError(std::errc::invalid_argument, + "no functions to encode"); + if (!Finalized) + return createStringError(std::errc::invalid_argument, + "GsymCreator wasn't finalized prior to encoding"); + + if (Funcs.size() > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many FunctionInfos"); + + const uint64_t MinAddr = BaseAddress ? *BaseAddress : Funcs.front().startAddress(); + const uint64_t MaxAddr = Funcs.back().startAddress(); + const uint64_t AddrDelta = MaxAddr - MinAddr; + Header Hdr; + Hdr.Magic = GSYM_MAGIC; + Hdr.Version = GSYM_VERSION; + Hdr.AddrOffSize = 0; + Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); + Hdr.BaseAddress = MinAddr; + Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); + Hdr.StrtabOffset = 0; // We will fix this up later. + Hdr.StrtabSize = 0; // We will fix this up later. + memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); + if (UUID.size() > sizeof(Hdr.UUID)) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", (uint32_t)UUID.size()); + // Set the address offset size correctly in the GSYM header. + if (AddrDelta <= UINT8_MAX) + Hdr.AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + Hdr.AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + Hdr.AddrOffSize = 4; + else + Hdr.AddrOffSize = 8; + // Copy the UUID value if we have one. + if (UUID.size() > 0) + memcpy(Hdr.UUID, UUID.data(), UUID.size()); + // Write out the header. + llvm::Error Err = Hdr.encode(O); + if (Err) + return Err; + + // Write out the address offsets. + O.alignTo(Hdr.AddrOffSize); + for (const auto &FuncInfo : Funcs) { + uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + switch(Hdr.AddrOffSize) { + case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break; + case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break; + case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break; + case 8: O.writeU64(AddrOffset); break; + } + } + + // Write out all zeros for the AddrInfoOffsets. + O.alignTo(4); + const off_t AddrInfoOffsetsOffset = O.tell(); + for (size_t i = 0, n = Funcs.size(); i < n; ++i) + O.writeU32(0); + + // Write out the file table + O.alignTo(4); + assert(!Files.empty()); + assert(Files[0].Dir == 0); + assert(Files[0].Base == 0); + size_t NumFiles = Files.size(); + if (NumFiles > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many files"); + O.writeU32(static_cast<uint32_t>(NumFiles)); + for (auto File: Files) { + O.writeU32(File.Dir); + O.writeU32(File.Base); + } + + // Write out the sting table. + const off_t StrtabOffset = O.tell(); + StrTab.write(O.get_stream()); + const off_t StrtabSize = O.tell() - StrtabOffset; + std::vector<uint32_t> AddrInfoOffsets; + + // Write out the address infos for each function info. + for (const auto &FuncInfo : Funcs) { + if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) + AddrInfoOffsets.push_back(OffsetOrErr.get()); + else + return OffsetOrErr.takeError(); + } + // Fixup the string table offset and size in the header + O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); + O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); + + // Fixup all address info offsets + uint64_t Offset = 0; + for (auto AddrInfoOffset: AddrInfoOffsets) { + O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); + Offset += 4; + } + return ErrorSuccess(); +} + +llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { + std::lock_guard<std::recursive_mutex> Guard(Mutex); + if (Finalized) + return createStringError(std::errc::invalid_argument, + "already finalized"); + Finalized = true; + + // Sort function infos so we can emit sorted functions. + llvm::sort(Funcs); + + // Don't let the string table indexes change by finalizing in order. + StrTab.finalizeInOrder(); + + // Remove duplicates function infos that have both entries from debug info + // (DWARF or Breakpad) and entries from the SymbolTable. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + auto NumBefore = Funcs.size(); + auto Curr = Funcs.begin(); + auto Prev = Funcs.end(); + while (Curr != Funcs.end()) { + // Can't check for overlaps or same address ranges if we don't have a + // previous entry + if (Prev != Funcs.end()) { + if (Prev->Range.intersects(Curr->Range)) { + // Overlapping address ranges. + if (Prev->Range == Curr->Range) { + // Same address range. Check if one is from debug info and the other + // is from a symbol table. If so, then keep the one with debug info. + // Our sorting guarantees that entries with matching address ranges + // that have debug info are last in the sort. + if (*Prev == *Curr) { + // FunctionInfo entries match exactly (range, lines, inlines) + OS << "warning: duplicate function info entries for range: " + << Curr->Range << '\n'; + Curr = Funcs.erase(Prev); + } else { + if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { + // Same address range, one with no debug info (symbol) and the + // next with debug info. Keep the latter. + Curr = Funcs.erase(Prev); + } else { + OS << "warning: same address range contains different debug " + << "info. Removing:\n" + << *Prev << "\nIn favor of this one:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + } else { + // print warnings about overlaps + OS << "warning: function ranges overlap:\n" + << *Prev << "\n" + << *Curr << "\n"; + } + } else if (Prev->Range.size() == 0 && + Curr->Range.contains(Prev->Range.Start)) { + OS << "warning: removing symbol:\n" + << *Prev << "\nKeeping:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + if (Curr == Funcs.end()) + break; + Prev = Curr++; + } + + // If our last function info entry doesn't have a size and if we have valid + // text ranges, we should set the size of the last entry since any search for + // a high address might match our last entry. By fixing up this size, we can + // help ensure we don't cause lookups to always return the last symbol that + // has no size when doing lookups. + if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { + if (auto Range = ValidTextRanges->getRangeThatContains( + Funcs.back().Range.Start)) { + Funcs.back().Range.End = Range->End; + } + } + OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " + << Funcs.size() << " total\n"; + return Error::success(); +} + +uint32_t GsymCreator::insertString(StringRef S, bool Copy) { + if (S.empty()) + return 0; + std::lock_guard<std::recursive_mutex> Guard(Mutex); + if (Copy) { + // We need to provide backing storage for the string if requested + // since StringTableBuilder stores references to strings. Any string + // that comes from a section in an object file doesn't need to be + // copied, but any string created by code will need to be copied. + // This allows GsymCreator to be really fast when parsing DWARF and + // other object files as most strings don't need to be copied. + CachedHashStringRef CHStr(S); + if (!StrTab.contains(CHStr)) + S = StringStorage.insert(S).first->getKey(); + } + return StrTab.add(S); +} + +void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { + std::lock_guard<std::recursive_mutex> Guard(Mutex); + Ranges.insert(FI.Range); + Funcs.emplace_back(FI); +} + +void GsymCreator::forEachFunctionInfo( + std::function<bool(FunctionInfo &)> const &Callback) { + std::lock_guard<std::recursive_mutex> Guard(Mutex); + for (auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} + +void GsymCreator::forEachFunctionInfo( + std::function<bool(const FunctionInfo &)> const &Callback) const { + std::lock_guard<std::recursive_mutex> Guard(Mutex); + for (const auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} + +size_t GsymCreator::getNumFunctionInfos() const{ + std::lock_guard<std::recursive_mutex> Guard(Mutex); + return Funcs.size(); +} + +bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { + if (ValidTextRanges) + return ValidTextRanges->contains(Addr); + return true; // No valid text ranges has been set, so accept all ranges. +} + +bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { + std::lock_guard<std::recursive_mutex> Guard(Mutex); + return Ranges.contains(Addr); +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/GsymReader.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/GsymReader.cpp new file mode 100644 index 0000000000..2ad18bf63d --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,406 @@ +//===- GsymReader.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include <assert.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace gsym; + +GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer) : + MemBuffer(std::move(Buffer)), + Endian(support::endian::system_endianness()) {} + + GsymReader::GsymReader(GsymReader &&RHS) = default; + +GsymReader::~GsymReader() = default; + +llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) { + // Open the input file and return an appropriate error if needed. + ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto Err = BuffOrErr.getError(); + if (Err) + return llvm::errorCodeToError(Err); + return create(BuffOrErr.get()); +} + +llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) { + auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); + return create(MemBuffer); +} + +llvm::Expected<llvm::gsym::GsymReader> +GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) { + if (!MemBuffer.get()) + return createStringError(std::errc::invalid_argument, + "invalid memory buffer"); + GsymReader GR(std::move(MemBuffer)); + llvm::Error Err = GR.parse(); + if (Err) + return std::move(Err); + return std::move(GR); +} + +llvm::Error +GsymReader::parse() { + BinaryStreamReader FileData(MemBuffer->getBuffer(), + support::endian::system_endianness()); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + if (FileData.readObject(Hdr)) + return createStringError(std::errc::invalid_argument, + "not enough data for a GSYM header"); + + const auto HostByteOrder = support::endian::system_endianness(); + switch (Hdr->Magic) { + case GSYM_MAGIC: + Endian = HostByteOrder; + break; + case GSYM_CIGAM: + // This is a GSYM file, but not native endianness. + Endian = sys::IsBigEndianHost ? support::little : support::big; + Swap.reset(new SwappedData); + break; + default: + return createStringError(std::errc::invalid_argument, + "not a GSYM file"); + } + + bool DataIsLittleEndian = HostByteOrder != support::little; + // Read a correctly byte swapped header if we need to. + if (Swap) { + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + if (auto ExpectedHdr = Header::decode(Data)) + Swap->Hdr = ExpectedHdr.get(); + else + return ExpectedHdr.takeError(); + Hdr = &Swap->Hdr; + } + + // Detect errors in the header and report any that are found. If we make it + // past this without errors, we know we have a good magic value, a supported + // version number, verified address offset size and a valid UUID size. + if (Error Err = Hdr->checkForError()) + return Err; + + if (!Swap) { + // This is the native endianness case that is most common and optimized for + // efficient lookups. Here we just grab pointers to the native data and + // use ArrayRef objects to allow efficient read only access. + + // Read the address offsets. + if (FileData.padToAlignment(Hdr->AddrOffSize) || + FileData.readArray(AddrOffsets, + Hdr->NumAddresses * Hdr->AddrOffSize)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + + // Read the address info offsets. + if (FileData.padToAlignment(4) || + FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address info offsets table"); + + // Read the file table. + uint32_t NumFiles = 0; + if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles)) + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + + // Get the string table. + FileData.setOffset(Hdr->StrtabOffset); + if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize)) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); +} else { + // This is the non native endianness case that is not common and not + // optimized for lookups. Here we decode the important tables into local + // storage and then set the ArrayRef objects to point to these swapped + // copies of the read only data so lookups can be as efficient as possible. + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + + // Read the address offsets. + uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize); + Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize); + switch (Hdr->AddrOffSize) { + case 1: + if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 2: + if (!Data.getU16(&Offset, + reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 4: + if (!Data.getU32(&Offset, + reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 8: + if (!Data.getU64(&Offset, + reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + } + AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets); + + // Read the address info offsets. + Offset = alignTo(Offset, 4); + Swap->AddrInfoOffsets.resize(Hdr->NumAddresses); + if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses)) + AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets); + else + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + // Read the file table. + const uint32_t NumFiles = Data.getU32(&Offset); + if (NumFiles > 0) { + Swap->Files.resize(NumFiles); + if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2)) + Files = ArrayRef<FileEntry>(Swap->Files); + else + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + } + // Get the string table. + StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset, + Hdr->StrtabSize); + if (StrTab.Data.empty()) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); + } + return Error::success(); + +} + +const Header &GsymReader::getHeader() const { + // The only way to get a GsymReader is from GsymReader::openFile(...) or + // GsymReader::copyBuffer() and the header must be valid and initialized to + // a valid pointer value, so the assert below should not trigger. + assert(Hdr); + return *Hdr; +} + +Optional<uint64_t> GsymReader::getAddress(size_t Index) const { + switch (Hdr->AddrOffSize) { + case 1: return addressForIndex<uint8_t>(Index); + case 2: return addressForIndex<uint16_t>(Index); + case 4: return addressForIndex<uint32_t>(Index); + case 8: return addressForIndex<uint64_t>(Index); + } + return llvm::None; +} + +Optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const { + const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); + if (Index < NumAddrInfoOffsets) + return AddrInfoOffsets[Index]; + return llvm::None; +} + +Expected<uint64_t> +GsymReader::getAddressIndex(const uint64_t Addr) const { + if (Addr >= Hdr->BaseAddress) { + const uint64_t AddrOffset = Addr - Hdr->BaseAddress; + Optional<uint64_t> AddrOffsetIndex; + switch (Hdr->AddrOffSize) { + case 1: + AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset); + break; + case 2: + AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset); + break; + case 4: + AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset); + break; + case 8: + AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset); + break; + default: + return createStringError(std::errc::invalid_argument, + "unsupported address offset size %u", + Hdr->AddrOffSize); + } + if (AddrOffsetIndex) + return *AddrOffsetIndex; + } + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " is not in GSYM", Addr); + +} + +llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const { + Expected<uint64_t> AddressIndex = getAddressIndex(Addr); + if (!AddressIndex) + return AddressIndex.takeError(); + // Address info offsets size should have been checked in parse(). + assert(*AddressIndex < AddrInfoOffsets.size()); + auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; + DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4); + if (Optional<uint64_t> OptAddr = getAddress(*AddressIndex)) { + auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr); + if (ExpectedFI) { + if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0) + return ExpectedFI; + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " is not in GSYM", Addr); + } + } + return createStringError(std::errc::invalid_argument, + "failed to extract address[%" PRIu64 "]", + *AddressIndex); +} + +llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const { + Expected<uint64_t> AddressIndex = getAddressIndex(Addr); + if (!AddressIndex) + return AddressIndex.takeError(); + // Address info offsets size should have been checked in parse(). + assert(*AddressIndex < AddrInfoOffsets.size()); + auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; + DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4); + if (Optional<uint64_t> OptAddr = getAddress(*AddressIndex)) + return FunctionInfo::lookup(Data, *this, *OptAddr, Addr); + return createStringError(std::errc::invalid_argument, + "failed to extract address[%" PRIu64 "]", + *AddressIndex); +} + +void GsymReader::dump(raw_ostream &OS) { + const auto &Header = getHeader(); + // Dump the GSYM header. + OS << Header << "\n"; + // Dump the address table. + OS << "Address Table:\n"; + OS << "INDEX OFFSET"; + + switch (Hdr->AddrOffSize) { + case 1: OS << "8 "; break; + case 2: OS << "16"; break; + case 4: OS << "32"; break; + case 8: OS << "64"; break; + default: OS << "??"; break; + } + OS << " (ADDRESS)\n"; + OS << "====== =============================== \n"; + for (uint32_t I = 0; I < Header.NumAddresses; ++I) { + OS << format("[%4u] ", I); + switch (Hdr->AddrOffSize) { + case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break; + case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break; + case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break; + case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break; + default: break; + } + OS << " (" << HEX64(*getAddress(I)) << ")\n"; + } + // Dump the address info offsets table. + OS << "\nAddress Info Offsets:\n"; + OS << "INDEX Offset\n"; + OS << "====== ==========\n"; + for (uint32_t I = 0; I < Header.NumAddresses; ++I) + OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n"; + // Dump the file table. + OS << "\nFiles:\n"; + OS << "INDEX DIRECTORY BASENAME PATH\n"; + OS << "====== ========== ========== ==============================\n"; + for (uint32_t I = 0; I < Files.size(); ++I) { + OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' ' + << HEX32(Files[I].Base) << ' '; + dump(OS, getFile(I)); + OS << "\n"; + } + OS << "\n" << StrTab << "\n"; + + for (uint32_t I = 0; I < Header.NumAddresses; ++I) { + OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": "; + if (auto FI = getFunctionInfo(*getAddress(I))) + dump(OS, *FI); + else + logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:"); + } +} + +void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) { + OS << FI.Range << " \"" << getString(FI.Name) << "\"\n"; + if (FI.OptLineTable) + dump(OS, *FI.OptLineTable); + if (FI.Inline) + dump(OS, *FI.Inline); +} + +void GsymReader::dump(raw_ostream &OS, const LineTable <) { + OS << "LineTable:\n"; + for (auto &LE: LT) { + OS << " " << HEX64(LE.Addr) << ' '; + if (LE.File) + dump(OS, getFile(LE.File)); + OS << ':' << LE.Line << '\n'; + } +} + +void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) { + if (Indent == 0) + OS << "InlineInfo:\n"; + else + OS.indent(Indent); + OS << II.Ranges << ' ' << getString(II.Name); + if (II.CallFile != 0) { + if (auto File = getFile(II.CallFile)) { + OS << " called from "; + dump(OS, File); + OS << ':' << II.CallLine; + } + } + OS << '\n'; + for (const auto &ChildII: II.Children) + dump(OS, ChildII, Indent + 2); +} + +void GsymReader::dump(raw_ostream &OS, Optional<FileEntry> FE) { + if (FE) { + // IF we have the file from index 0, then don't print anything + if (FE->Dir == 0 && FE->Base == 0) + return; + StringRef Dir = getString(FE->Dir); + StringRef Base = getString(FE->Base); + if (!Dir.empty()) { + OS << Dir; + if (Dir.contains('\\') && !Dir.contains('/')) + OS << '\\'; + else + OS << '/'; + } + if (!Base.empty()) { + OS << Base; + } + if (!Dir.empty() || !Base.empty()) + return; + } + OS << "<invalid-file>"; +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/Header.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/Header.cpp new file mode 100644 index 0000000000..0b3fb9c498 --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/Header.cpp @@ -0,0 +1,109 @@ +//===- Header.cpp -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +#define HEX8(v) llvm::format_hex(v, 4) +#define HEX16(v) llvm::format_hex(v, 6) +#define HEX32(v) llvm::format_hex(v, 10) +#define HEX64(v) llvm::format_hex(v, 18) + +using namespace llvm; +using namespace gsym; + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const Header &H) { + OS << "Header:\n"; + OS << " Magic = " << HEX32(H.Magic) << "\n"; + OS << " Version = " << HEX16(H.Version) << '\n'; + OS << " AddrOffSize = " << HEX8(H.AddrOffSize) << '\n'; + OS << " UUIDSize = " << HEX8(H.UUIDSize) << '\n'; + OS << " BaseAddress = " << HEX64(H.BaseAddress) << '\n'; + OS << " NumAddresses = " << HEX32(H.NumAddresses) << '\n'; + OS << " StrtabOffset = " << HEX32(H.StrtabOffset) << '\n'; + OS << " StrtabSize = " << HEX32(H.StrtabSize) << '\n'; + OS << " UUID = "; + for (uint8_t I = 0; I < H.UUIDSize; ++I) + OS << format_hex_no_prefix(H.UUID[I], 2); + OS << '\n'; + return OS; +} + +/// Check the header and detect any errors. +llvm::Error Header::checkForError() const { + if (Magic != GSYM_MAGIC) + return createStringError(std::errc::invalid_argument, + "invalid GSYM magic 0x%8.8x", Magic); + if (Version != GSYM_VERSION) + return createStringError(std::errc::invalid_argument, + "unsupported GSYM version %u", Version); + switch (AddrOffSize) { + case 1: break; + case 2: break; + case 4: break; + case 8: break; + default: + return createStringError(std::errc::invalid_argument, + "invalid address offset size %u", + AddrOffSize); + } + if (UUIDSize > GSYM_MAX_UUID_SIZE) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", UUIDSize); + return Error::success(); +} + +llvm::Expected<Header> Header::decode(DataExtractor &Data) { + uint64_t Offset = 0; + // The header is stored as a single blob of data that has a fixed byte size. + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Header))) + return createStringError(std::errc::invalid_argument, + "not enough data for a gsym::Header"); + Header H; + H.Magic = Data.getU32(&Offset); + H.Version = Data.getU16(&Offset); + H.AddrOffSize = Data.getU8(&Offset); + H.UUIDSize = Data.getU8(&Offset); + H.BaseAddress = Data.getU64(&Offset); + H.NumAddresses = Data.getU32(&Offset); + H.StrtabOffset = Data.getU32(&Offset); + H.StrtabSize = Data.getU32(&Offset); + Data.getU8(&Offset, H.UUID, GSYM_MAX_UUID_SIZE); + if (llvm::Error Err = H.checkForError()) + return std::move(Err); + return H; +} + +llvm::Error Header::encode(FileWriter &O) const { + // Users must verify the Header is valid prior to calling this funtion. + if (llvm::Error Err = checkForError()) + return Err; + O.writeU32(Magic); + O.writeU16(Version); + O.writeU8(AddrOffSize); + O.writeU8(UUIDSize); + O.writeU64(BaseAddress); + O.writeU32(NumAddresses); + O.writeU32(StrtabOffset); + O.writeU32(StrtabSize); + O.writeData(llvm::ArrayRef<uint8_t>(UUID)); + return Error::success(); +} + +bool llvm::gsym::operator==(const Header &LHS, const Header &RHS) { + return LHS.Magic == RHS.Magic && LHS.Version == RHS.Version && + LHS.AddrOffSize == RHS.AddrOffSize && LHS.UUIDSize == RHS.UUIDSize && + LHS.BaseAddress == RHS.BaseAddress && + LHS.NumAddresses == RHS.NumAddresses && + LHS.StrtabOffset == RHS.StrtabOffset && + LHS.StrtabSize == RHS.StrtabSize && + memcmp(LHS.UUID, RHS.UUID, LHS.UUIDSize) == 0; +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/InlineInfo.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/InlineInfo.cpp new file mode 100644 index 0000000000..21679b1b78 --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -0,0 +1,265 @@ +//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/Support/DataExtractor.h" +#include <algorithm> +#include <inttypes.h> + +using namespace llvm; +using namespace gsym; + + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const InlineInfo &II) { + if (!II.isValid()) + return OS; + bool First = true; + for (auto Range : II.Ranges) { + if (First) + First = false; + else + OS << ' '; + OS << Range; + } + OS << " Name = " << HEX32(II.Name) << ", CallFile = " << II.CallFile + << ", CallLine = " << II.CallFile << '\n'; + for (const auto &Child : II.Children) + OS << Child; + return OS; +} + +static bool getInlineStackHelper(const InlineInfo &II, uint64_t Addr, + std::vector<const InlineInfo *> &InlineStack) { + if (II.Ranges.contains(Addr)) { + // If this is the top level that represents the concrete function, + // there will be no name and we shoud clear the inline stack. Otherwise + // we have found an inline call stack that we need to insert. + if (II.Name != 0) + InlineStack.insert(InlineStack.begin(), &II); + for (const auto &Child : II.Children) { + if (::getInlineStackHelper(Child, Addr, InlineStack)) + break; + } + return !InlineStack.empty(); + } + return false; +} + +llvm::Optional<InlineInfo::InlineArray> InlineInfo::getInlineStack(uint64_t Addr) const { + InlineArray Result; + if (getInlineStackHelper(*this, Addr, Result)) + return Result; + return llvm::None; +} + +/// Skip an InlineInfo object in the specified data at the specified offset. +/// +/// Used during the InlineInfo::lookup() call to quickly skip child InlineInfo +/// objects where the addres ranges isn't contained in the InlineInfo object +/// or its children. This avoids allocations by not appending child InlineInfo +/// objects to the InlineInfo::Children array. +/// +/// \param Data The binary stream to read the data from. +/// +/// \param Offset The byte offset within \a Data. +/// +/// \param SkippedRanges If true, address ranges have already been skipped. + +static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) { + if (!SkippedRanges) { + if (AddressRanges::skip(Data, Offset) == 0) + return false; + } + bool HasChildren = Data.getU8(&Offset) != 0; + Data.getU32(&Offset); // Skip Inline.Name. + Data.getULEB128(&Offset); // Skip Inline.CallFile. + Data.getULEB128(&Offset); // Skip Inline.CallLine. + if (HasChildren) { + while (skip(Data, Offset, false /* SkippedRanges */)) + /* Do nothing */; + } + // We skipped a valid InlineInfo. + return true; +} + +/// A Lookup helper functions. +/// +/// Used during the InlineInfo::lookup() call to quickly only parse an +/// InlineInfo object if the address falls within this object. This avoids +/// allocations by not appending child InlineInfo objects to the +/// InlineInfo::Children array and also skips any InlineInfo objects that do +/// not contain the address we are looking up. +/// +/// \param Data The binary stream to read the data from. +/// +/// \param Offset The byte offset within \a Data. +/// +/// \param BaseAddr The address that the relative address range offsets are +/// relative to. + +static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, + uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, + llvm::Error &Err) { + InlineInfo Inline; + Inline.Ranges.decode(Data, BaseAddr, Offset); + if (Inline.Ranges.empty()) + return true; + // Check if the address is contained within the inline information, and if + // not, quickly skip this InlineInfo object and all its children. + if (!Inline.Ranges.contains(Addr)) { + skip(Data, Offset, true /* SkippedRanges */); + return false; + } + + // The address range is contained within this InlineInfo, add the source + // location for this InlineInfo and any children that contain the address. + bool HasChildren = Data.getU8(&Offset) != 0; + Inline.Name = Data.getU32(&Offset); + Inline.CallFile = (uint32_t)Data.getULEB128(&Offset); + Inline.CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + // Child address ranges are encoded relative to the first address in the + // parent InlineInfo object. + const auto ChildBaseAddr = Inline.Ranges[0].Start; + bool Done = false; + while (!Done) + Done = lookup(GR, Data, Offset, ChildBaseAddr, Addr, SrcLocs, Err); + } + + Optional<FileEntry> CallFile = GR.getFile(Inline.CallFile); + if (!CallFile) { + Err = createStringError(std::errc::invalid_argument, + "failed to extract file[%" PRIu32 "]", + Inline.CallFile); + return false; + } + + if (CallFile->Dir || CallFile->Base) { + SourceLocation SrcLoc; + SrcLoc.Name = SrcLocs.back().Name; + SrcLoc.Offset = SrcLocs.back().Offset; + SrcLoc.Dir = GR.getString(CallFile->Dir); + SrcLoc.Base = GR.getString(CallFile->Base); + SrcLoc.Line = Inline.CallLine; + SrcLocs.back().Name = GR.getString(Inline.Name); + SrcLocs.back().Offset = Addr - Inline.Ranges[0].Start; + SrcLocs.push_back(SrcLoc); + } + return true; +} + +llvm::Error InlineInfo::lookup(const GsymReader &GR, DataExtractor &Data, + uint64_t BaseAddr, uint64_t Addr, + SourceLocations &SrcLocs) { + // Call our recursive helper function starting at offset zero. + uint64_t Offset = 0; + llvm::Error Err = Error::success(); + ::lookup(GR, Data, Offset, BaseAddr, Addr, SrcLocs, Err); + return Err; +} + +/// Decode an InlineInfo in Data at the specified offset. +/// +/// A local helper function to decode InlineInfo objects. This function is +/// called recursively when parsing child InlineInfo objects. +/// +/// \param Data The data extractor to decode from. +/// \param Offset The offset within \a Data to decode from. +/// \param BaseAddr The base address to use when decoding address ranges. +/// \returns An InlineInfo or an error describing the issue that was +/// encountered during decoding. +static llvm::Expected<InlineInfo> decode(DataExtractor &Data, uint64_t &Offset, + uint64_t BaseAddr) { + InlineInfo Inline; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo address ranges data", Offset); + Inline.Ranges.decode(Data, BaseAddr, Offset); + if (Inline.Ranges.empty()) + return Inline; + if (!Data.isValidOffsetForDataOfSize(Offset, 1)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo uint8_t indicating children", + Offset); + bool HasChildren = Data.getU8(&Offset) != 0; + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo uint32_t for name", Offset); + Inline.Name = Data.getU32(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call file", Offset); + Inline.CallFile = (uint32_t)Data.getULEB128(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call line", Offset); + Inline.CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + // Child address ranges are encoded relative to the first address in the + // parent InlineInfo object. + const auto ChildBaseAddr = Inline.Ranges[0].Start; + while (true) { + llvm::Expected<InlineInfo> Child = decode(Data, Offset, ChildBaseAddr); + if (!Child) + return Child.takeError(); + // InlineInfo with empty Ranges termintes a child sibling chain. + if (Child.get().Ranges.empty()) + break; + Inline.Children.emplace_back(std::move(*Child)); + } + } + return Inline; +} + +llvm::Expected<InlineInfo> InlineInfo::decode(DataExtractor &Data, + uint64_t BaseAddr) { + uint64_t Offset = 0; + return ::decode(Data, Offset, BaseAddr); +} + +llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const { + // Users must verify the InlineInfo is valid prior to calling this funtion. + // We don't want to emit any InlineInfo objects if they are not valid since + // it will waste space in the GSYM file. + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid InlineInfo object"); + Ranges.encode(O, BaseAddr); + bool HasChildren = !Children.empty(); + O.writeU8(HasChildren); + O.writeU32(Name); + O.writeULEB(CallFile); + O.writeULEB(CallLine); + if (HasChildren) { + // Child address ranges are encoded as relative to the first + // address in the Ranges for this object. This keeps the offsets + // small and allows for efficient encoding using ULEB offsets. + const uint64_t ChildBaseAddr = Ranges[0].Start; + for (const auto &Child : Children) { + // Make sure all child address ranges are contained in the parent address + // ranges. + for (const auto &ChildRange: Child.Ranges) { + if (!Ranges.contains(ChildRange)) + return createStringError(std::errc::invalid_argument, + "child range not contained in parent"); + } + llvm::Error Err = Child.encode(O, ChildBaseAddr); + if (Err) + return Err; + } + + // Terminate child sibling chain by emitting a zero. This zero will cause + // the decodeAll() function above to return false and stop the decoding + // of child InlineInfo objects that are siblings. + O.writeULEB(0); + } + return Error::success(); +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/LineTable.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/LineTable.cpp new file mode 100644 index 0000000000..a49a3ba9bf --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/LineTable.cpp @@ -0,0 +1,293 @@ +//===- LineTable.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" + +using namespace llvm; +using namespace gsym; + +enum LineTableOpCode { + EndSequence = 0x00, ///< End of the line table. + SetFile = 0x01, ///< Set LineTableRow.file_idx, don't push a row. + AdvancePC = 0x02, ///< Increment LineTableRow.address, and push a row. + AdvanceLine = 0x03, ///< Set LineTableRow.file_line, don't push a row. + FirstSpecial = 0x04, ///< All special opcodes push a row. +}; + +struct DeltaInfo { + int64_t Delta; + uint32_t Count; + DeltaInfo(int64_t D, uint32_t C) : Delta(D), Count(C) {} +}; + +inline bool operator<(const DeltaInfo &LHS, int64_t Delta) { + return LHS.Delta < Delta; +} + +static bool encodeSpecial(int64_t MinLineDelta, int64_t MaxLineDelta, + int64_t LineDelta, uint64_t AddrDelta, + uint8_t &SpecialOp) { + if (LineDelta < MinLineDelta) + return false; + if (LineDelta > MaxLineDelta) + return false; + int64_t LineRange = MaxLineDelta - MinLineDelta + 1; + int64_t AdjustedOp = ((LineDelta - MinLineDelta) + AddrDelta * LineRange); + int64_t Op = AdjustedOp + FirstSpecial; + if (Op < 0) + return false; + if (Op > 255) + return false; + SpecialOp = (uint8_t)Op; + return true; +} + +typedef std::function<bool(const LineEntry &Row)> LineEntryCallback; + +static llvm::Error parse(DataExtractor &Data, uint64_t BaseAddr, + LineEntryCallback const &Callback) { + uint64_t Offset = 0; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable MinDelta", Offset); + int64_t MinDelta = Data.getSLEB128(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable MaxDelta", Offset); + int64_t MaxDelta = Data.getSLEB128(&Offset); + int64_t LineRange = MaxDelta - MinDelta + 1; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable FirstLine", Offset); + const uint32_t FirstLine = (uint32_t)Data.getULEB128(&Offset); + LineEntry Row(BaseAddr, 1, FirstLine); + bool Done = false; + while (!Done) { + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before EndSequence", Offset); + uint8_t Op = Data.getU8(&Offset); + switch (Op) { + case EndSequence: + Done = true; + break; + case SetFile: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before SetFile value", + Offset); + Row.File = (uint32_t)Data.getULEB128(&Offset); + break; + case AdvancePC: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before AdvancePC value", + Offset); + Row.Addr += Data.getULEB128(&Offset); + // If the function callback returns false, we stop parsing. + if (Callback(Row) == false) + return Error::success(); + break; + case AdvanceLine: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before AdvanceLine value", + Offset); + Row.Line += Data.getSLEB128(&Offset); + break; + default: { + // A byte that contains both address and line increment. + uint8_t AdjustedOp = Op - FirstSpecial; + int64_t LineDelta = MinDelta + (AdjustedOp % LineRange); + uint64_t AddrDelta = (AdjustedOp / LineRange); + Row.Line += LineDelta; + Row.Addr += AddrDelta; + // If the function callback returns false, we stop parsing. + if (Callback(Row) == false) + return Error::success(); + break; + } + } + } + return Error::success(); +} + +llvm::Error LineTable::encode(FileWriter &Out, uint64_t BaseAddr) const { + // Users must verify the LineTable is valid prior to calling this funtion. + // We don't want to emit any LineTable objects if they are not valid since + // it will waste space in the GSYM file. + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid LineTable object"); + + int64_t MinLineDelta = INT64_MAX; + int64_t MaxLineDelta = INT64_MIN; + std::vector<DeltaInfo> DeltaInfos; + if (Lines.size() == 1) { + MinLineDelta = 0; + MaxLineDelta = 0; + } else { + int64_t PrevLine = 1; + bool First = true; + for (const auto &line_entry : Lines) { + if (First) + First = false; + else { + int64_t LineDelta = (int64_t)line_entry.Line - PrevLine; + auto End = DeltaInfos.end(); + auto Pos = std::lower_bound(DeltaInfos.begin(), End, LineDelta); + if (Pos != End && Pos->Delta == LineDelta) + ++Pos->Count; + else + DeltaInfos.insert(Pos, DeltaInfo(LineDelta, 1)); + if (LineDelta < MinLineDelta) + MinLineDelta = LineDelta; + if (LineDelta > MaxLineDelta) + MaxLineDelta = LineDelta; + } + PrevLine = (int64_t)line_entry.Line; + } + assert(MinLineDelta <= MaxLineDelta); + } + // Set the min and max line delta intelligently based on the counts of + // the line deltas. if our range is too large. + const int64_t MaxLineRange = 14; + if (MaxLineDelta - MinLineDelta > MaxLineRange) { + uint32_t BestIndex = 0; + uint32_t BestEndIndex = 0; + uint32_t BestCount = 0; + const size_t NumDeltaInfos = DeltaInfos.size(); + for (uint32_t I = 0; I < NumDeltaInfos; ++I) { + const int64_t FirstDelta = DeltaInfos[I].Delta; + uint32_t CurrCount = 0; + uint32_t J; + for (J = I; J < NumDeltaInfos; ++J) { + auto LineRange = DeltaInfos[J].Delta - FirstDelta; + if (LineRange > MaxLineRange) + break; + CurrCount += DeltaInfos[J].Count; + } + if (CurrCount > BestCount) { + BestIndex = I; + BestEndIndex = J - 1; + BestCount = CurrCount; + } + } + MinLineDelta = DeltaInfos[BestIndex].Delta; + MaxLineDelta = DeltaInfos[BestEndIndex].Delta; + } + if (MinLineDelta == MaxLineDelta && MinLineDelta > 0 && + MinLineDelta < MaxLineRange) + MinLineDelta = 0; + assert(MinLineDelta <= MaxLineDelta); + + // Initialize the line entry state as a starting point. All line entries + // will be deltas from this. + LineEntry Prev(BaseAddr, 1, Lines.front().Line); + + // Write out the min and max line delta as signed LEB128. + Out.writeSLEB(MinLineDelta); + Out.writeSLEB(MaxLineDelta); + // Write out the starting line number as a unsigned LEB128. + Out.writeULEB(Prev.Line); + + for (const auto &Curr : Lines) { + if (Curr.Addr < BaseAddr) + return createStringError(std::errc::invalid_argument, + "LineEntry has address 0x%" PRIx64 " which is " + "less than the function start address 0x%" + PRIx64, Curr.Addr, BaseAddr); + if (Curr.Addr < Prev.Addr) + return createStringError(std::errc::invalid_argument, + "LineEntry in LineTable not in ascending order"); + const uint64_t AddrDelta = Curr.Addr - Prev.Addr; + int64_t LineDelta = 0; + if (Curr.Line > Prev.Line) + LineDelta = Curr.Line - Prev.Line; + else if (Prev.Line > Curr.Line) + LineDelta = -((int32_t)(Prev.Line - Curr.Line)); + + // Set the file if it doesn't match the current one. + if (Curr.File != Prev.File) { + Out.writeU8(SetFile); + Out.writeULEB(Curr.File); + } + + uint8_t SpecialOp; + if (encodeSpecial(MinLineDelta, MaxLineDelta, LineDelta, AddrDelta, + SpecialOp)) { + // Advance the PC and line and push a row. + Out.writeU8(SpecialOp); + } else { + // We can't encode the address delta and line delta into + // a single special opcode, we must do them separately. + + // Advance the line. + if (LineDelta != 0) { + Out.writeU8(AdvanceLine); + Out.writeSLEB(LineDelta); + } + + // Advance the PC and push a row. + Out.writeU8(AdvancePC); + Out.writeULEB(AddrDelta); + } + Prev = Curr; + } + Out.writeU8(EndSequence); + return Error::success(); +} + +// Parse all line table entries into the "LineTable" vector. We can +// cache the results of this if needed, or we can call LineTable::lookup() +// below. +llvm::Expected<LineTable> LineTable::decode(DataExtractor &Data, + uint64_t BaseAddr) { + LineTable LT; + llvm::Error Err = parse(Data, BaseAddr, [&](const LineEntry &Row) -> bool { + LT.Lines.push_back(Row); + return true; // Keep parsing by returning true. + }); + if (Err) + return std::move(Err); + return LT; +} +// Parse the line table on the fly and find the row we are looking for. +// We will need to determine if we need to cache the line table by calling +// LineTable::parseAllEntries(...) or just call this function each time. +// There is a CPU vs memory tradeoff we will need to determined. +Expected<LineEntry> LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) { + LineEntry Result; + llvm::Error Err = parse(Data, BaseAddr, + [Addr, &Result](const LineEntry &Row) -> bool { + if (Addr < Row.Addr) + return false; // Stop parsing, result contains the line table row! + Result = Row; + if (Addr == Row.Addr) { + // Stop parsing, this is the row we are looking for since the address + // matches. + return false; + } + return true; // Keep parsing till we find the right row. + }); + if (Err) + return std::move(Err); + if (Result.isValid()) + return Result; + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " is not in the line table", + Addr); +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable <) { + for (const auto &LineEntry : LT) + OS << LineEntry << '\n'; + return OS; +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/LookupResult.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/LookupResult.cpp new file mode 100644 index 0000000000..8a624226b1 --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/LookupResult.cpp @@ -0,0 +1,74 @@ +//===- LookupResult.cpp -------------------------------------------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LookupResult.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <ciso646> + +using namespace llvm; +using namespace gsym; + +std::string LookupResult::getSourceFile(uint32_t Index) const { + std::string Fullpath; + if (Index < Locations.size()) { + if (!Locations[Index].Dir.empty()) { + if (Locations[Index].Base.empty()) { + Fullpath = std::string(Locations[Index].Dir); + } else { + llvm::SmallString<64> Storage; + llvm::sys::path::append(Storage, Locations[Index].Dir, + Locations[Index].Base); + Fullpath.assign(Storage.begin(), Storage.end()); + } + } else if (!Locations[Index].Base.empty()) + Fullpath = std::string(Locations[Index].Base); + } + return Fullpath; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const SourceLocation &SL) { + OS << SL.Name; + if (SL.Offset > 0) + OS << " + " << SL.Offset; + if (SL.Dir.size() || SL.Base.size()) { + OS << " @ "; + if (!SL.Dir.empty()) { + OS << SL.Dir; + if (SL.Dir.contains('\\') and not SL.Dir.contains('/')) + OS << '\\'; + else + OS << '/'; + } + if (SL.Base.empty()) + OS << "<invalid-file>"; + else + OS << SL.Base; + OS << ':' << SL.Line; + } + return OS; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) { + OS << HEX64(LR.LookupAddr) << ": "; + auto NumLocations = LR.Locations.size(); + for (size_t I = 0; I < NumLocations; ++I) { + if (I > 0) { + OS << '\n'; + OS.indent(20); + } + const bool IsInlined = I + 1 != NumLocations; + OS << LR.Locations[I]; + if (IsInlined) + OS << " [inlined]"; + } + OS << '\n'; + return OS; +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp new file mode 100644 index 0000000000..ad35aefe77 --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp @@ -0,0 +1,116 @@ +//===- ObjectFileTransformer.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <unordered_set> + +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" + +using namespace llvm; +using namespace gsym; + +constexpr uint32_t NT_GNU_BUILD_ID_TAG = 0x03; + +static std::vector<uint8_t> getUUID(const object::ObjectFile &Obj) { + // Extract the UUID from the object file + std::vector<uint8_t> UUID; + if (auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj)) { + const ArrayRef<uint8_t> MachUUID = MachO->getUuid(); + if (!MachUUID.empty()) + UUID.assign(MachUUID.data(), MachUUID.data() + MachUUID.size()); + } else if (isa<object::ELFObjectFileBase>(&Obj)) { + const StringRef GNUBuildID(".note.gnu.build-id"); + for (const object::SectionRef &Sect : Obj.sections()) { + Expected<StringRef> SectNameOrErr = Sect.getName(); + if (!SectNameOrErr) { + consumeError(SectNameOrErr.takeError()); + continue; + } + StringRef SectName(*SectNameOrErr); + if (SectName != GNUBuildID) + continue; + StringRef BuildIDData; + Expected<StringRef> E = Sect.getContents(); + if (E) + BuildIDData = *E; + else { + consumeError(E.takeError()); + continue; + } + DataExtractor Decoder(BuildIDData, Obj.makeTriple().isLittleEndian(), 8); + uint64_t Offset = 0; + const uint32_t NameSize = Decoder.getU32(&Offset); + const uint32_t PayloadSize = Decoder.getU32(&Offset); + const uint32_t PayloadType = Decoder.getU32(&Offset); + StringRef Name(Decoder.getFixedLengthString(&Offset, NameSize)); + if (Name == "GNU" && PayloadType == NT_GNU_BUILD_ID_TAG) { + Offset = alignTo(Offset, 4); + StringRef UUIDBytes(Decoder.getBytes(&Offset, PayloadSize)); + if (!UUIDBytes.empty()) { + auto Ptr = reinterpret_cast<const uint8_t *>(UUIDBytes.data()); + UUID.assign(Ptr, Ptr + UUIDBytes.size()); + } + } + } + } + return UUID; +} + +llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj, + raw_ostream &Log, + GsymCreator &Gsym) { + using namespace llvm::object; + + const bool IsMachO = isa<MachOObjectFile>(&Obj); + const bool IsELF = isa<ELFObjectFileBase>(&Obj); + + // Read build ID. + Gsym.setUUID(getUUID(Obj)); + + // Parse the symbol table. + size_t NumBefore = Gsym.getNumFunctionInfos(); + for (const object::SymbolRef &Sym : Obj.symbols()) { + Expected<SymbolRef::Type> SymType = Sym.getType(); + if (!SymType) { + consumeError(SymType.takeError()); + continue; + } + Expected<uint64_t> AddrOrErr = Sym.getValue(); + if (!AddrOrErr) + // TODO: Test this error. + return AddrOrErr.takeError(); + + if (SymType.get() != SymbolRef::Type::ST_Function || + !Gsym.IsValidTextAddress(*AddrOrErr) || + Gsym.hasFunctionInfoForAddress(*AddrOrErr)) + continue; + // Function size for MachO files will be 0 + constexpr bool NoCopy = false; + const uint64_t size = IsELF ? ELFSymbolRef(Sym).getSize() : 0; + Expected<StringRef> Name = Sym.getName(); + if (!Name) { + logAllUnhandledErrors(Name.takeError(), Log, "ObjectFileTransformer: "); + continue; + } + // Remove the leading '_' character in any symbol names if there is one + // for mach-o files. + if (IsMachO) + Name->consume_front("_"); + Gsym.addFunctionInfo( + FunctionInfo(*AddrOrErr, size, Gsym.insertString(*Name, NoCopy))); + } + size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from symbol table.\n"; + return Error::success(); +} diff --git a/contrib/libs/llvm12/lib/DebugInfo/GSYM/Range.cpp b/contrib/libs/llvm12/lib/DebugInfo/GSYM/Range.cpp new file mode 100644 index 0000000000..044ddb8ba1 --- /dev/null +++ b/contrib/libs/llvm12/lib/DebugInfo/GSYM/Range.cpp @@ -0,0 +1,124 @@ +//===- Range.cpp ------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" +#include <algorithm> +#include <inttypes.h> + +using namespace llvm; +using namespace gsym; + + +void AddressRanges::insert(AddressRange Range) { + if (Range.size() == 0) + return; + + auto It = llvm::upper_bound(Ranges, Range); + auto It2 = It; + while (It2 != Ranges.end() && It2->Start < Range.End) + ++It2; + if (It != It2) { + Range.End = std::max(Range.End, It2[-1].End); + It = Ranges.erase(It, It2); + } + if (It != Ranges.begin() && Range.Start < It[-1].End) + It[-1].End = std::max(It[-1].End, Range.End); + else + Ranges.insert(It, Range); +} + +bool AddressRanges::contains(uint64_t Addr) const { + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.Start <= Addr; }); + return It != Ranges.begin() && Addr < It[-1].End; +} + +bool AddressRanges::contains(AddressRange Range) const { + if (Range.size() == 0) + return false; + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.Start <= Range.Start; }); + if (It == Ranges.begin()) + return false; + return Range.End <= It[-1].End; +} + +Optional<AddressRange> +AddressRanges::getRangeThatContains(uint64_t Addr) const { + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.Start <= Addr; }); + if (It != Ranges.begin() && Addr < It[-1].End) + return It[-1]; + return llvm::None; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) { + return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")"; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) { + size_t Size = AR.size(); + for (size_t I = 0; I < Size; ++I) { + if (I) + OS << ' '; + OS << AR[I]; + } + return OS; +} + +void AddressRange::encode(FileWriter &O, uint64_t BaseAddr) const { + assert(Start >= BaseAddr); + O.writeULEB(Start - BaseAddr); + O.writeULEB(size()); +} + +void AddressRange::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + const uint64_t AddrOffset = Data.getULEB128(&Offset); + const uint64_t Size = Data.getULEB128(&Offset); + const uint64_t StartAddr = BaseAddr + AddrOffset; + Start = StartAddr; + End = StartAddr + Size; +} + +void AddressRanges::encode(FileWriter &O, uint64_t BaseAddr) const { + O.writeULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) + Range.encode(O, BaseAddr); +} + +void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + clear(); + uint64_t NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return; + Ranges.resize(NumRanges); + for (auto &Range : Ranges) + Range.decode(Data, BaseAddr, Offset); +} + +void AddressRange::skip(DataExtractor &Data, uint64_t &Offset) { + Data.getULEB128(&Offset); + Data.getULEB128(&Offset); +} + +uint64_t AddressRanges::skip(DataExtractor &Data, uint64_t &Offset) { + uint64_t NumRanges = Data.getULEB128(&Offset); + for (uint64_t I=0; I<NumRanges; ++I) + AddressRange::skip(Data, Offset); + return NumRanges; +} |