diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/llvm12/include/llvm/MC/MCDisassembler | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/llvm12/include/llvm/MC/MCDisassembler')
4 files changed, 424 insertions, 0 deletions
diff --git a/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCDisassembler.h b/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCDisassembler.h new file mode 100644 index 0000000000..cd43c93081 --- /dev/null +++ b/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -0,0 +1,208 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H +#define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/XCOFF.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include <cstdint> +#include <memory> +#include <vector> + +namespace llvm { + +struct XCOFFSymbolInfo { + Optional<XCOFF::StorageMappingClass> StorageMappingClass; + Optional<uint32_t> Index; + bool IsLabel; + XCOFFSymbolInfo(Optional<XCOFF::StorageMappingClass> Smc, + Optional<uint32_t> Idx, bool Label) + : StorageMappingClass(Smc), Index(Idx), IsLabel(Label) {} + + bool operator<(const XCOFFSymbolInfo &SymInfo) const; +}; + +struct SymbolInfoTy { + uint64_t Addr; + StringRef Name; + union { + uint8_t Type; + XCOFFSymbolInfo XCOFFSymInfo; + }; + +private: + bool IsXCOFF; + +public: + SymbolInfoTy(uint64_t Addr, StringRef Name, + Optional<XCOFF::StorageMappingClass> Smc, Optional<uint32_t> Idx, + bool Label) + : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {} + SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type) + : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {} + bool isXCOFF() const { return IsXCOFF; } + +private: + friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { + assert(P1.IsXCOFF == P2.IsXCOFF && + "P1.IsXCOFF should be equal to P2.IsXCOFF."); + if (P1.IsXCOFF) + return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) < + std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name); + + return std::tie(P1.Addr, P1.Name, P1.Type) < + std::tie(P2.Addr, P2.Name, P2.Type); + } +}; + +using SectionSymbolsTy = std::vector<SymbolInfoTy>; + +template <typename T> class ArrayRef; +class MCContext; +class MCInst; +class MCSubtargetInfo; +class raw_ostream; + +/// Superclass for all disassemblers. Consumes a memory region and provides an +/// array of assembly instructions. +class MCDisassembler { +public: + /// Ternary decode status. Most backends will just use Fail and + /// Success, however some have a concept of an instruction with + /// understandable semantics but which is architecturally + /// incorrect. An example of this is ARM UNPREDICTABLE instructions + /// which are disassemblable but cause undefined behaviour. + /// + /// Because it makes sense to disassemble these instructions, there + /// is a "soft fail" failure mode that indicates the MCInst& is + /// valid but architecturally incorrect. + /// + /// The enum numbers are deliberately chosen such that reduction + /// from Success->SoftFail ->Fail can be done with a simple + /// bitwise-AND: + /// + /// LEFT & TOP = | Success Unpredictable Fail + /// --------------+----------------------------------- + /// Success | Success Unpredictable Fail + /// Unpredictable | Unpredictable Unpredictable Fail + /// Fail | Fail Fail Fail + /// + /// An easy way of encoding this is as 0b11, 0b01, 0b00 for + /// Success, SoftFail, Fail respectively. + enum DecodeStatus { + Fail = 0, + SoftFail = 1, + Success = 3 + }; + + MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : Ctx(Ctx), STI(STI) {} + + virtual ~MCDisassembler(); + + /// Returns the disassembly of a single instruction. + /// + /// \param Instr - An MCInst to populate with the contents of the + /// instruction. + /// \param Size - A value to populate with the size of the instruction, or + /// the number of bytes consumed while attempting to decode + /// an invalid instruction. + /// \param Address - The address, in the memory space of region, of the first + /// byte of the instruction. + /// \param Bytes - A reference to the actual bytes of the instruction. + /// \param CStream - The stream to print comments and annotations on. + /// \return - MCDisassembler::Success if the instruction is valid, + /// MCDisassembler::SoftFail if the instruction was + /// disassemblable but invalid, + /// MCDisassembler::Fail if the instruction was invalid. + virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &CStream) const = 0; + + /// Used to perform separate target specific disassembly for a particular + /// symbol. May parse any prelude that precedes instructions after the + /// start of a symbol, or the entire symbol. + /// This is used for example by WebAssembly to decode preludes. + /// + /// Base implementation returns None. So all targets by default ignore to + /// treat symbols separately. + /// + /// \param Symbol - The symbol. + /// \param Size - The number of bytes consumed. + /// \param Address - The address, in the memory space of region, of the first + /// byte of the symbol. + /// \param Bytes - A reference to the actual bytes at the symbol location. + /// \param CStream - The stream to print comments and annotations on. + /// \return - MCDisassembler::Success if bytes are decoded + /// successfully. Size must hold the number of bytes that + /// were decoded. + /// - MCDisassembler::Fail if the bytes are invalid. Size + /// must hold the number of bytes that were decoded before + /// failing. The target must print nothing. This can be + /// done by buffering the output if needed. + /// - None if the target doesn't want to handle the symbol + /// separately. Value of Size is ignored in this case. + virtual Optional<DecodeStatus> + onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, + uint64_t Address, raw_ostream &CStream) const; + // TODO: + // Implement similar hooks that can be used at other points during + // disassembly. Something along the following lines: + // - onBeforeInstructionDecode() + // - onAfterInstructionDecode() + // - onSymbolEnd() + // It should help move much of the target specific code from llvm-objdump to + // respective target disassemblers. + +private: + MCContext &Ctx; + +protected: + // Subtarget information, for instruction decoding predicates if required. + const MCSubtargetInfo &STI; + std::unique_ptr<MCSymbolizer> Symbolizer; + +public: + // Helpers around MCSymbolizer + bool tryAddingSymbolicOperand(MCInst &Inst, + int64_t Value, + uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) const; + + void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; + + /// Set \p Symzer as the current symbolizer. + /// This takes ownership of \p Symzer, and deletes the previously set one. + void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); + + MCContext& getContext() const { return Ctx; } + + const MCSubtargetInfo& getSubtargetInfo() const { return STI; } + + // Marked mutable because we cache it inside the disassembler, rather than + // having to pass it around as an argument through all the autogenerated code. + mutable raw_ostream *CommentStream = nullptr; +}; + +} // end namespace llvm + +#endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h b/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h new file mode 100644 index 0000000000..9e4284f86c --- /dev/null +++ b/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h @@ -0,0 +1,68 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===-- llvm/MC/MCExternalSymbolizer.h - ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCExternalSymbolizer class, which +// enables library users to provide callbacks (through the C API) to do the +// symbolization externally. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCDISASSEMBLER_MCEXTERNALSYMBOLIZER_H +#define LLVM_MC_MCDISASSEMBLER_MCEXTERNALSYMBOLIZER_H + +#include "llvm-c/Disassembler.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include <memory> + +namespace llvm { + +/// Symbolize using user-provided, C API, callbacks. +/// +/// See llvm-c/Disassembler.h. +class MCExternalSymbolizer : public MCSymbolizer { +protected: + /// \name Hooks for symbolic disassembly via the public 'C' interface. + /// @{ + /// The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + /// The function to lookup a symbol name. + LLVMSymbolLookupCallback SymbolLookUp; + /// The pointer to the block of symbolic information for above call back. + void *DisInfo; + /// @} + +public: + MCExternalSymbolizer(MCContext &Ctx, + std::unique_ptr<MCRelocationInfo> RelInfo, + LLVMOpInfoCallback getOpInfo, + LLVMSymbolLookupCallback symbolLookUp, void *disInfo) + : MCSymbolizer(Ctx, std::move(RelInfo)), GetOpInfo(getOpInfo), + SymbolLookUp(symbolLookUp), DisInfo(disInfo) {} + + bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) override; + void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream, + int64_t Value, + uint64_t Address) override; +}; + +} + +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCRelocationInfo.h b/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCRelocationInfo.h new file mode 100644 index 0000000000..8e006d7eac --- /dev/null +++ b/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCRelocationInfo.h @@ -0,0 +1,55 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- llvm/MC/MCRelocationInfo.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCRelocationInfo class, which provides methods to +// create MCExprs from relocations, either found in an object::ObjectFile +// (object::RelocationRef), or provided through the C API. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCDISASSEMBLER_MCRELOCATIONINFO_H +#define LLVM_MC_MCDISASSEMBLER_MCRELOCATIONINFO_H + +namespace llvm { + +class MCContext; +class MCExpr; + +/// Create MCExprs from relocations found in an object file. +class MCRelocationInfo { +protected: + MCContext &Ctx; + +public: + MCRelocationInfo(MCContext &Ctx); + MCRelocationInfo(const MCRelocationInfo &) = delete; + MCRelocationInfo &operator=(const MCRelocationInfo &) = delete; + virtual ~MCRelocationInfo(); + + /// Create an MCExpr for the target-specific \p VariantKind. + /// The VariantKinds are defined in llvm-c/Disassembler.h. + /// Used by MCExternalSymbolizer. + /// \returns If possible, an MCExpr corresponding to VariantKind, else 0. + virtual const MCExpr *createExprForCAPIVariantKind(const MCExpr *SubExpr, + unsigned VariantKind); +}; + +} // end namespace llvm + +#endif // LLVM_MC_MCDISASSEMBLER_MCRELOCATIONINFO_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCSymbolizer.h new file mode 100644 index 0000000000..e8f9f9a158 --- /dev/null +++ b/contrib/libs/llvm12/include/llvm/MC/MCDisassembler/MCSymbolizer.h @@ -0,0 +1,93 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- llvm/MC/MCSymbolizer.h - MCSymbolizer class --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCSymbolizer class, which is used +// to symbolize instructions decoded from an object, that is, transform their +// immediate operands to MCExprs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H +#define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H + +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" +#include <algorithm> +#include <cstdint> +#include <memory> + +namespace llvm { + +class MCContext; +class MCInst; +class raw_ostream; + +/// Symbolize and annotate disassembled instructions. +/// +/// For now this mimics the old symbolization logic (from both ARM and x86), that +/// relied on user-provided (C API) callbacks to do the actual symbol lookup in +/// the object file. This was moved to MCExternalSymbolizer. +/// A better API would not rely on actually calling the two methods here from +/// inside each disassembler, but would use the instr info to determine what +/// operands are actually symbolizable, and in what way. I don't think this +/// information exists right now. +class MCSymbolizer { +protected: + MCContext &Ctx; + std::unique_ptr<MCRelocationInfo> RelInfo; + +public: + /// Construct an MCSymbolizer, taking ownership of \p RelInfo. + MCSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo) + : Ctx(Ctx), RelInfo(std::move(RelInfo)) { + } + + MCSymbolizer(const MCSymbolizer &) = delete; + MCSymbolizer &operator=(const MCSymbolizer &) = delete; + virtual ~MCSymbolizer(); + + /// Try to add a symbolic operand instead of \p Value to the MCInst. + /// + /// Instead of having a difficult to read immediate, a symbolic operand would + /// represent this immediate in a more understandable way, for instance as a + /// symbol or an offset from a symbol. Relocations can also be used to enrich + /// the symbolic expression. + /// \param Inst - The MCInst where to insert the symbolic operand. + /// \param cStream - Stream to print comments and annotations on. + /// \param Value - Operand value, pc-adjusted by the caller if necessary. + /// \param Address - Load address of the instruction. + /// \param IsBranch - Is the instruction a branch? + /// \param Offset - Byte offset of the operand inside the inst. + /// \param InstSize - Size of the instruction in bytes. + /// \return Whether a symbolic operand was added. + virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, + int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, + uint64_t InstSize) = 0; + + /// Try to add a comment on the PC-relative load. + /// For instance, in Mach-O, this is used to add annotations to instructions + /// that use C string literals, as found in __cstring. + virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream, + int64_t Value, + uint64_t Address) = 0; +}; + +} // end namespace llvm + +#endif // LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif |