diff options
author | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
commit | 6ffe9e53658409f212834330e13564e4952558f6 (patch) | |
tree | 85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm14/tools/llvm-cfi-verify | |
parent | 726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff) | |
download | ydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz |
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm14/tools/llvm-cfi-verify')
7 files changed, 1703 insertions, 0 deletions
diff --git a/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/FileAnalysis.cpp b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/FileAnalysis.cpp new file mode 100644 index 0000000000..dac2bdab04 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/FileAnalysis.cpp @@ -0,0 +1,599 @@ +//===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FileAnalysis.h" +#include "GraphBuilder.h" + +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" + +using Instr = llvm::cfi_verify::FileAnalysis::Instr; +using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; + +namespace llvm { +namespace cfi_verify { + +bool IgnoreDWARFFlag; + +static cl::opt<bool, true> IgnoreDWARFArg( + "ignore-dwarf", + cl::desc( + "Ignore all DWARF data. This relaxes the requirements for all " + "statically linked libraries to have been compiled with '-g', but " + "will result in false positives for 'CFI unprotected' instructions."), + cl::location(IgnoreDWARFFlag), cl::init(false)); + +StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { + switch (Status) { + case CFIProtectionStatus::PROTECTED: + return "PROTECTED"; + case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: + return "FAIL_NOT_INDIRECT_CF"; + case CFIProtectionStatus::FAIL_ORPHANS: + return "FAIL_ORPHANS"; + case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: + return "FAIL_BAD_CONDITIONAL_BRANCH"; + case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: + return "FAIL_REGISTER_CLOBBERED"; + case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: + return "FAIL_INVALID_INSTRUCTION"; + } + llvm_unreachable("Attempted to stringify an unknown enum value."); +} + +Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { + // Open the filename provided. + Expected<object::OwningBinary<object::Binary>> BinaryOrErr = + object::createBinary(Filename); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + + // Construct the object and allow it to take ownership of the binary. + object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); + FileAnalysis Analysis(std::move(Binary)); + + Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); + if (!Analysis.Object) + return make_error<UnsupportedDisassembly>("Failed to cast object"); + + switch (Analysis.Object->getArch()) { + case Triple::x86: + case Triple::x86_64: + case Triple::aarch64: + case Triple::aarch64_be: + break; + default: + return make_error<UnsupportedDisassembly>("Unsupported architecture."); + } + + Analysis.ObjectTriple = Analysis.Object->makeTriple(); + Analysis.Features = Analysis.Object->getFeatures(); + + // Init the rest of the object. + if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) + return std::move(InitResponse); + + if (auto SectionParseResponse = Analysis.parseCodeSections()) + return std::move(SectionParseResponse); + + if (auto SymbolTableParseResponse = Analysis.parseSymbolTable()) + return std::move(SymbolTableParseResponse); + + return std::move(Analysis); +} + +FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) + : Binary(std::move(Binary)) {} + +FileAnalysis::FileAnalysis(const Triple &ObjectTriple, + const SubtargetFeatures &Features) + : ObjectTriple(ObjectTriple), Features(Features) {} + +const Instr * +FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { + std::map<uint64_t, Instr>::const_iterator KV = + Instructions.find(InstrMeta.VMAddress); + if (KV == Instructions.end() || KV == Instructions.begin()) + return nullptr; + + if (!(--KV)->second.Valid) + return nullptr; + + return &KV->second; +} + +const Instr * +FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { + std::map<uint64_t, Instr>::const_iterator KV = + Instructions.find(InstrMeta.VMAddress); + if (KV == Instructions.end() || ++KV == Instructions.end()) + return nullptr; + + if (!KV->second.Valid) + return nullptr; + + return &KV->second; +} + +bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { + for (const auto &Operand : InstrMeta.Instruction) { + if (Operand.isReg()) + return true; + } + return false; +} + +const Instr *FileAnalysis::getInstruction(uint64_t Address) const { + const auto &InstrKV = Instructions.find(Address); + if (InstrKV == Instructions.end()) + return nullptr; + + return &InstrKV->second; +} + +const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { + const auto &InstrKV = Instructions.find(Address); + assert(InstrKV != Instructions.end() && "Address doesn't exist."); + return InstrKV->second; +} + +bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { + const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); + return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta); +} + +bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const { + const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); + if (!InstrDesc.isCall()) + return false; + uint64_t Target; + if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, + InstrMeta.InstructionSize, Target)) + return false; + return TrapOnFailFunctionAddresses.contains(Target); +} + +bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { + if (!InstrMeta.Valid) + return false; + + if (isCFITrap(InstrMeta)) + return false; + + const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); + if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) + return InstrDesc.isConditionalBranch(); + + return true; +} + +const Instr * +FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { + if (!InstrMeta.Valid) + return nullptr; + + if (isCFITrap(InstrMeta)) + return nullptr; + + const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); + const Instr *NextMetaPtr; + if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { + if (InstrDesc.isConditionalBranch()) + return nullptr; + + uint64_t Target; + if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, + InstrMeta.InstructionSize, Target)) + return nullptr; + + NextMetaPtr = getInstruction(Target); + } else { + NextMetaPtr = + getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); + } + + if (!NextMetaPtr || !NextMetaPtr->Valid) + return nullptr; + + return NextMetaPtr; +} + +std::set<const Instr *> +FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { + std::set<const Instr *> CFCrossReferences; + const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); + + if (PrevInstruction && canFallThrough(*PrevInstruction)) + CFCrossReferences.insert(PrevInstruction); + + const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); + if (TargetRefsKV == StaticBranchTargetings.end()) + return CFCrossReferences; + + for (uint64_t SourceInstrAddress : TargetRefsKV->second) { + const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); + if (SourceInstrKV == Instructions.end()) { + errs() << "Failed to find source instruction at address " + << format_hex(SourceInstrAddress, 2) + << " for the cross-reference to instruction at address " + << format_hex(InstrMeta.VMAddress, 2) << ".\n"; + continue; + } + + CFCrossReferences.insert(&SourceInstrKV->second); + } + + return CFCrossReferences; +} + +const std::set<object::SectionedAddress> & +FileAnalysis::getIndirectInstructions() const { + return IndirectInstructions; +} + +const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { + return RegisterInfo.get(); +} + +const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } + +const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { + return MIA.get(); +} + +Expected<DIInliningInfo> +FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address) { + assert(Symbolizer != nullptr && "Symbolizer is invalid."); + + return Symbolizer->symbolizeInlinedCode(std::string(Object->getFileName()), + Address); +} + +CFIProtectionStatus +FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { + const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); + if (!InstrMetaPtr) + return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; + + const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); + if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) + return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; + + if (!usesRegisterOperand(*InstrMetaPtr)) + return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; + + if (!Graph.OrphanedNodes.empty()) + return CFIProtectionStatus::FAIL_ORPHANS; + + for (const auto &BranchNode : Graph.ConditionalBranchNodes) { + if (!BranchNode.CFIProtection) + return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; + } + + if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) + return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; + + return CFIProtectionStatus::PROTECTED; +} + +uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { + assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); + + // Get the set of registers we must check to ensure they're not clobbered. + const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); + DenseSet<unsigned> RegisterNumbers; + for (const auto &Operand : IndirectCF.Instruction) { + if (Operand.isReg()) + RegisterNumbers.insert(Operand.getReg()); + } + assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); + + // Now check all branches to indirect CFs and ensure no clobbering happens. + for (const auto &Branch : Graph.ConditionalBranchNodes) { + uint64_t Node; + if (Branch.IndirectCFIsOnTargetPath) + Node = Branch.Target; + else + Node = Branch.Fallthrough; + + // Some architectures (e.g., AArch64) cannot load in an indirect branch, so + // we allow them one load. + bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); + + // We walk backwards from the indirect CF. It is the last node returned by + // Graph.flattenAddress, so we skip it since we already handled it. + DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; + std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); + for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { + Node = *I; + const Instr &NodeInstr = getInstructionOrDie(Node); + const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); + + for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); + RI != RE; ++RI) { + unsigned RegNum = *RI; + if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, + *RegisterInfo)) { + if (!canLoad || !InstrDesc.mayLoad()) + return Node; + canLoad = false; + CurRegisterNumbers.erase(RI); + // Add the registers this load reads to those we check for clobbers. + for (unsigned i = InstrDesc.getNumDefs(), + e = InstrDesc.getNumOperands(); i != e; i++) { + const auto &Operand = NodeInstr.Instruction.getOperand(i); + if (Operand.isReg()) + CurRegisterNumbers.insert(Operand.getReg()); + } + break; + } + } + } + } + + return Graph.BaseAddress; +} + +void FileAnalysis::printInstruction(const Instr &InstrMeta, + raw_ostream &OS) const { + Printer->printInst(&InstrMeta.Instruction, 0, "", *SubtargetInfo.get(), OS); +} + +Error FileAnalysis::initialiseDisassemblyMembers() { + std::string TripleName = ObjectTriple.getTriple(); + ArchName = ""; + MCPU = ""; + std::string ErrorString; + + LLVMSymbolizer::Options Opt; + Opt.UseSymbolTable = false; + Symbolizer.reset(new LLVMSymbolizer(Opt)); + + ObjectTarget = + TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); + if (!ObjectTarget) + return make_error<UnsupportedDisassembly>( + (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + + "\", failed with error: " + ErrorString) + .str()); + + RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); + if (!RegisterInfo) + return make_error<UnsupportedDisassembly>( + "Failed to initialise RegisterInfo."); + + MCTargetOptions MCOptions; + AsmInfo.reset( + ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName, MCOptions)); + if (!AsmInfo) + return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); + + SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( + TripleName, MCPU, Features.getString())); + if (!SubtargetInfo) + return make_error<UnsupportedDisassembly>( + "Failed to initialise SubtargetInfo."); + + MII.reset(ObjectTarget->createMCInstrInfo()); + if (!MII) + return make_error<UnsupportedDisassembly>("Failed to initialise MII."); + + Context.reset(new MCContext(Triple(TripleName), AsmInfo.get(), + RegisterInfo.get(), SubtargetInfo.get())); + + Disassembler.reset( + ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); + + if (!Disassembler) + return make_error<UnsupportedDisassembly>( + "No disassembler available for target"); + + MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); + + Printer.reset(ObjectTarget->createMCInstPrinter( + ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, + *RegisterInfo)); + + return Error::success(); +} + +Error FileAnalysis::parseCodeSections() { + if (!IgnoreDWARFFlag) { + std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); + if (!DWARF) + return make_error<StringError>("Could not create DWARF information.", + inconvertibleErrorCode()); + + bool LineInfoValid = false; + + for (auto &Unit : DWARF->compile_units()) { + const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); + if (LineTable && !LineTable->Rows.empty()) { + LineInfoValid = true; + break; + } + } + + if (!LineInfoValid) + return make_error<StringError>( + "DWARF line information missing. Did you compile with '-g'?", + inconvertibleErrorCode()); + } + + for (const object::SectionRef &Section : Object->sections()) { + // Ensure only executable sections get analysed. + if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) + continue; + + // Avoid checking the PLT since it produces spurious failures on AArch64 + // when ignoring DWARF data. + Expected<StringRef> NameOrErr = Section.getName(); + if (NameOrErr && *NameOrErr == ".plt") + continue; + consumeError(NameOrErr.takeError()); + + Expected<StringRef> Contents = Section.getContents(); + if (!Contents) + return Contents.takeError(); + ArrayRef<uint8_t> SectionBytes = arrayRefFromStringRef(*Contents); + + parseSectionContents(SectionBytes, + {Section.getAddress(), Section.getIndex()}); + } + return Error::success(); +} + +void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, + object::SectionedAddress Address) { + assert(Symbolizer && "Symbolizer is uninitialised."); + MCInst Instruction; + Instr InstrMeta; + uint64_t InstructionSize; + + for (uint64_t Byte = 0; Byte < SectionBytes.size();) { + bool ValidInstruction = + Disassembler->getInstruction(Instruction, InstructionSize, + SectionBytes.drop_front(Byte), 0, + outs()) == MCDisassembler::Success; + + Byte += InstructionSize; + + uint64_t VMAddress = Address.Address + Byte - InstructionSize; + InstrMeta.Instruction = Instruction; + InstrMeta.VMAddress = VMAddress; + InstrMeta.InstructionSize = InstructionSize; + InstrMeta.Valid = ValidInstruction; + + addInstruction(InstrMeta); + + if (!ValidInstruction) + continue; + + // Skip additional parsing for instructions that do not affect the control + // flow. + const auto &InstrDesc = MII->get(Instruction.getOpcode()); + if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) + continue; + + uint64_t Target; + if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { + // If the target can be evaluated, it's not indirect. + StaticBranchTargetings[Target].push_back(VMAddress); + continue; + } + + if (!usesRegisterOperand(InstrMeta)) + continue; + + if (InstrDesc.isReturn()) + continue; + + // Check if this instruction exists in the range of the DWARF metadata. + if (!IgnoreDWARFFlag) { + auto LineInfo = + Symbolizer->symbolizeCode(std::string(Object->getFileName()), + {VMAddress, Address.SectionIndex}); + if (!LineInfo) { + handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { + errs() << "Symbolizer failed to get line: " << E.message() << "\n"; + }); + continue; + } + + if (LineInfo->FileName == DILineInfo::BadString) + continue; + } + + IndirectInstructions.insert({VMAddress, Address.SectionIndex}); + } +} + +void FileAnalysis::addInstruction(const Instr &Instruction) { + const auto &KV = + Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); + if (!KV.second) { + errs() << "Failed to add instruction at address " + << format_hex(Instruction.VMAddress, 2) + << ": Instruction at this address already exists.\n"; + exit(EXIT_FAILURE); + } +} + +Error FileAnalysis::parseSymbolTable() { + // Functions that will trap on CFI violations. + SmallSet<StringRef, 4> TrapOnFailFunctions; + TrapOnFailFunctions.insert("__cfi_slowpath"); + TrapOnFailFunctions.insert("__cfi_slowpath_diag"); + TrapOnFailFunctions.insert("abort"); + + // Look through the list of symbols for functions that will trap on CFI + // violations. + for (auto &Sym : Object->symbols()) { + auto SymNameOrErr = Sym.getName(); + if (!SymNameOrErr) + consumeError(SymNameOrErr.takeError()); + else if (TrapOnFailFunctions.contains(*SymNameOrErr)) { + auto AddrOrErr = Sym.getAddress(); + if (!AddrOrErr) + consumeError(AddrOrErr.takeError()); + else + TrapOnFailFunctionAddresses.insert(*AddrOrErr); + } + } + if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) { + for (const auto &Addr : ElfObject->getPltAddresses()) { + if (!Addr.first) + continue; + object::SymbolRef Sym(*Addr.first, Object); + auto SymNameOrErr = Sym.getName(); + if (!SymNameOrErr) + consumeError(SymNameOrErr.takeError()); + else if (TrapOnFailFunctions.contains(*SymNameOrErr)) + TrapOnFailFunctionAddresses.insert(Addr.second); + } + } + return Error::success(); +} + +UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) + : Text(std::string(Text)) {} + +char UnsupportedDisassembly::ID; +void UnsupportedDisassembly::log(raw_ostream &OS) const { + OS << "Could not initialise disassembler: " << Text; +} + +std::error_code UnsupportedDisassembly::convertToErrorCode() const { + return std::error_code(); +} + +} // namespace cfi_verify +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/FileAnalysis.h b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/FileAnalysis.h new file mode 100644 index 0000000000..8fd687d1b1 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/FileAnalysis.h @@ -0,0 +1,248 @@ +//===- FileAnalysis.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H +#define LLVM_CFI_VERIFY_FILE_ANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" + +#include <functional> +#include <set> +#include <string> +#include <unordered_map> + +namespace llvm { +namespace cfi_verify { + +struct GraphResult; + +extern bool IgnoreDWARFFlag; + +enum class CFIProtectionStatus { + // This instruction is protected by CFI. + PROTECTED, + // The instruction is not an indirect control flow instruction, and thus + // shouldn't be protected. + FAIL_NOT_INDIRECT_CF, + // There is a path to the instruction that was unexpected. + FAIL_ORPHANS, + // There is a path to the instruction from a conditional branch that does not + // properly check the destination for this vcall/icall. + FAIL_BAD_CONDITIONAL_BRANCH, + // One of the operands of the indirect CF instruction is modified between the + // CFI-check and execution. + FAIL_REGISTER_CLOBBERED, + // The instruction referenced does not exist. This normally indicates an + // error in the program, where you try and validate a graph that was created + // in a different FileAnalysis object. + FAIL_INVALID_INSTRUCTION, +}; + +StringRef stringCFIProtectionStatus(CFIProtectionStatus Status); + +// Disassembler and analysis tool for machine code files. Keeps track of non- +// sequential control flows, including indirect control flow instructions. +class FileAnalysis { +public: + // A metadata struct for an instruction. + struct Instr { + uint64_t VMAddress; // Virtual memory address of this instruction. + MCInst Instruction; // Instruction. + uint64_t InstructionSize; // Size of this instruction. + bool Valid; // Is this a valid instruction? If false, Instr::Instruction is + // undefined. + }; + + // Construct a FileAnalysis from a file path. + static Expected<FileAnalysis> Create(StringRef Filename); + + // Construct and take ownership of the supplied object. Do not use this + // constructor, prefer to use FileAnalysis::Create instead. + FileAnalysis(object::OwningBinary<object::Binary> Binary); + FileAnalysis() = delete; + FileAnalysis(const FileAnalysis &) = delete; + FileAnalysis(FileAnalysis &&Other) = default; + + // Returns the instruction at the provided address. Returns nullptr if there + // is no instruction at the provided address. + const Instr *getInstruction(uint64_t Address) const; + + // Returns the instruction at the provided adress, dying if the instruction is + // not found. + const Instr &getInstructionOrDie(uint64_t Address) const; + + // Returns a pointer to the previous/next instruction in sequence, + // respectively. Returns nullptr if the next/prev instruction doesn't exist, + // or if the provided instruction doesn't exist. + const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const; + const Instr *getNextInstructionSequential(const Instr &InstrMeta) const; + + // Returns whether this instruction is used by CFI to trap the program. + bool isCFITrap(const Instr &InstrMeta) const; + + // Returns whether this instruction is a call to a function that will trap on + // CFI violations (i.e., it serves as a trap in this instance). + bool willTrapOnCFIViolation(const Instr &InstrMeta) const; + + // Returns whether this function can fall through to the next instruction. + // Undefined (and bad) instructions cannot fall through, and instruction that + // modify the control flow can only fall through if they are conditional + // branches or calls. + bool canFallThrough(const Instr &InstrMeta) const; + + // Returns the definitive next instruction. This is different from the next + // instruction sequentially as it will follow unconditional branches (assuming + // they can be resolved at compile time, i.e. not indirect). This method + // returns nullptr if the provided instruction does not transfer control flow + // to exactly one instruction that is known deterministically at compile time. + // Also returns nullptr if the deterministic target does not exist in this + // file. + const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const; + + // Get a list of deterministic control flows that lead to the provided + // instruction. This list includes all static control flow cross-references as + // well as the previous instruction if it can fall through. + std::set<const Instr *> + getDirectControlFlowXRefs(const Instr &InstrMeta) const; + + // Returns whether this instruction uses a register operand. + bool usesRegisterOperand(const Instr &InstrMeta) const; + + // Returns the list of indirect instructions. + const std::set<object::SectionedAddress> &getIndirectInstructions() const; + + const MCRegisterInfo *getRegisterInfo() const; + const MCInstrInfo *getMCInstrInfo() const; + const MCInstrAnalysis *getMCInstrAnalysis() const; + + // Returns the inlining information for the provided address. + Expected<DIInliningInfo> + symbolizeInlinedCode(object::SectionedAddress Address); + + // Returns whether the provided Graph represents a protected indirect control + // flow instruction in this file. + CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const; + + // Returns the first place the operand register is clobbered between the CFI- + // check and the indirect CF instruction execution. We do this by walking + // backwards from the indirect CF and ensuring there is at most one load + // involving the operand register (which is the indirect CF itself on x86). + // If the register is not modified, returns the address of the indirect CF + // instruction. The result is undefined if the provided graph does not fall + // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see + // CFIProtectionStatus). + uint64_t indirectCFOperandClobber(const GraphResult& Graph) const; + + // Prints an instruction to the provided stream using this object's pretty- + // printers. + void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const; + +protected: + // Construct a blank object with the provided triple and features. Used in + // testing, where a sub class will dependency inject protected methods to + // allow analysis of raw binary, without requiring a fully valid ELF file. + FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features); + + // Add an instruction to this object. + void addInstruction(const Instr &Instruction); + + // Disassemble and parse the provided bytes into this object. Instruction + // address calculation is done relative to the provided SectionAddress. + void parseSectionContents(ArrayRef<uint8_t> SectionBytes, + object::SectionedAddress Address); + + // Constructs and initialises members required for disassembly. + Error initialiseDisassemblyMembers(); + + // Parses code sections from the internal object file. Saves them into the + // internal members. Should only be called once by Create(). + Error parseCodeSections(); + + // Parses the symbol table to look for the addresses of functions that will + // trap on CFI violations. + Error parseSymbolTable(); + +private: + // Members that describe the input file. + object::OwningBinary<object::Binary> Binary; + const object::ObjectFile *Object = nullptr; + Triple ObjectTriple; + std::string ArchName; + std::string MCPU; + const Target *ObjectTarget = nullptr; + SubtargetFeatures Features; + + // Members required for disassembly. + std::unique_ptr<const MCRegisterInfo> RegisterInfo; + std::unique_ptr<const MCAsmInfo> AsmInfo; + std::unique_ptr<MCSubtargetInfo> SubtargetInfo; + std::unique_ptr<const MCInstrInfo> MII; + std::unique_ptr<MCContext> Context; + std::unique_ptr<const MCDisassembler> Disassembler; + std::unique_ptr<const MCInstrAnalysis> MIA; + std::unique_ptr<MCInstPrinter> Printer; + + // Symbolizer used for debug information parsing. + std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer; + + // A mapping between the virtual memory address to the instruction metadata + // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per- + // insertion allocation. + std::map<uint64_t, Instr> Instructions; + + // Contains a mapping between a specific address, and a list of instructions + // that use this address as a branch target (including call instructions). + DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings; + + // A list of addresses of indirect control flow instructions. + std::set<object::SectionedAddress> IndirectInstructions; + + // The addresses of functions that will trap on CFI violations. + SmallSet<uint64_t, 4> TrapOnFailFunctionAddresses; +}; + +class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> { +public: + static char ID; + std::string Text; + + UnsupportedDisassembly(StringRef Text); + + void log(raw_ostream &OS) const override; + std::error_code convertToErrorCode() const override; +}; + +} // namespace cfi_verify +} // namespace llvm + +#endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H diff --git a/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/GraphBuilder.cpp b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/GraphBuilder.cpp new file mode 100644 index 0000000000..88fbbdf6b2 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/GraphBuilder.cpp @@ -0,0 +1,339 @@ +//===- GraphBuilder.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "GraphBuilder.h" + +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" + +using Instr = llvm::cfi_verify::FileAnalysis::Instr; + +namespace llvm { +namespace cfi_verify { + +uint64_t SearchLengthForUndef; +uint64_t SearchLengthForConditionalBranch; + +static cl::opt<uint64_t, true> SearchLengthForUndefArg( + "search-length-undef", + cl::desc("Specify the maximum amount of instructions " + "to inspect when searching for an undefined " + "instruction from a conditional branch."), + cl::location(SearchLengthForUndef), cl::init(2)); + +static cl::opt<uint64_t, true> SearchLengthForConditionalBranchArg( + "search-length-cb", + cl::desc("Specify the maximum amount of instructions " + "to inspect when searching for a conditional " + "branch from an indirect control flow."), + cl::location(SearchLengthForConditionalBranch), cl::init(20)); + +std::vector<uint64_t> GraphResult::flattenAddress(uint64_t Address) const { + std::vector<uint64_t> Addresses; + + auto It = IntermediateNodes.find(Address); + Addresses.push_back(Address); + + while (It != IntermediateNodes.end()) { + Addresses.push_back(It->second); + It = IntermediateNodes.find(It->second); + } + return Addresses; +} + +void printPairToDOT(const FileAnalysis &Analysis, raw_ostream &OS, + uint64_t From, uint64_t To) { + OS << " \"" << format_hex(From, 2) << ": "; + Analysis.printInstruction(Analysis.getInstructionOrDie(From), OS); + OS << "\" -> \"" << format_hex(To, 2) << ": "; + Analysis.printInstruction(Analysis.getInstructionOrDie(To), OS); + OS << "\"\n"; +} + +void GraphResult::printToDOT(const FileAnalysis &Analysis, + raw_ostream &OS) const { + std::map<uint64_t, uint64_t> SortedIntermediateNodes( + IntermediateNodes.begin(), IntermediateNodes.end()); + OS << "digraph graph_" << format_hex(BaseAddress, 2) << " {\n"; + for (const auto &KV : SortedIntermediateNodes) + printPairToDOT(Analysis, OS, KV.first, KV.second); + + for (auto &BranchNode : ConditionalBranchNodes) { + for (auto &V : {BranchNode.Target, BranchNode.Fallthrough}) + printPairToDOT(Analysis, OS, BranchNode.Address, V); + } + OS << "}\n"; +} + +GraphResult GraphBuilder::buildFlowGraph(const FileAnalysis &Analysis, + object::SectionedAddress Address) { + GraphResult Result; + Result.BaseAddress = Address.Address; + DenseSet<uint64_t> OpenedNodes; + + const auto &IndirectInstructions = Analysis.getIndirectInstructions(); + + // check that IndirectInstructions contains specified Address + if (IndirectInstructions.find(Address) == IndirectInstructions.end()) { + return Result; + } + + buildFlowGraphImpl(Analysis, OpenedNodes, Result, Address.Address, 0); + return Result; +} + +void GraphBuilder::buildFlowsToUndefined(const FileAnalysis &Analysis, + GraphResult &Result, + ConditionalBranchNode &BranchNode, + const Instr &BranchInstrMeta) { + assert(SearchLengthForUndef > 0 && + "Search length for undefined flow must be greater than zero."); + + // Start setting up the next node in the block. + uint64_t NextAddress = 0; + const Instr *NextMetaPtr; + + // Find out the next instruction in the block and add it to the new + // node. + if (BranchNode.Target && !BranchNode.Fallthrough) { + // We know the target of the branch, find the fallthrough. + NextMetaPtr = Analysis.getNextInstructionSequential(BranchInstrMeta); + if (!NextMetaPtr) { + errs() << "Failed to get next instruction from " + << format_hex(BranchNode.Address, 2) << ".\n"; + return; + } + + NextAddress = NextMetaPtr->VMAddress; + BranchNode.Fallthrough = + NextMetaPtr->VMAddress; // Add the new node to the branch head. + } else if (BranchNode.Fallthrough && !BranchNode.Target) { + // We already know the fallthrough, evaluate the target. + uint64_t Target; + if (!Analysis.getMCInstrAnalysis()->evaluateBranch( + BranchInstrMeta.Instruction, BranchInstrMeta.VMAddress, + BranchInstrMeta.InstructionSize, Target)) { + errs() << "Failed to get branch target for conditional branch at address " + << format_hex(BranchInstrMeta.VMAddress, 2) << ".\n"; + return; + } + + // Resolve the meta pointer for the target of this branch. + NextMetaPtr = Analysis.getInstruction(Target); + if (!NextMetaPtr) { + errs() << "Failed to find instruction at address " + << format_hex(Target, 2) << ".\n"; + return; + } + + NextAddress = Target; + BranchNode.Target = + NextMetaPtr->VMAddress; // Add the new node to the branch head. + } else { + errs() << "ControlBranchNode supplied to buildFlowsToUndefined should " + "provide Target xor Fallthrough.\n"; + return; + } + + uint64_t CurrentAddress = NextAddress; + const Instr *CurrentMetaPtr = NextMetaPtr; + + // Now the branch head has been set properly, complete the rest of the block. + for (uint64_t i = 1; i < SearchLengthForUndef; ++i) { + // Check to see whether the block should die. + if (Analysis.isCFITrap(*CurrentMetaPtr)) { + BranchNode.CFIProtection = true; + return; + } + + // Find the metadata of the next instruction. + NextMetaPtr = Analysis.getDefiniteNextInstruction(*CurrentMetaPtr); + if (!NextMetaPtr) + return; + + // Setup the next node. + NextAddress = NextMetaPtr->VMAddress; + + // Add this as an intermediate. + Result.IntermediateNodes[CurrentAddress] = NextAddress; + + // Move the 'current' pointers to the new tail of the block. + CurrentMetaPtr = NextMetaPtr; + CurrentAddress = NextAddress; + } + + // Final check of the last thing we added to the block. + if (Analysis.isCFITrap(*CurrentMetaPtr)) + BranchNode.CFIProtection = true; +} + +void GraphBuilder::buildFlowGraphImpl(const FileAnalysis &Analysis, + DenseSet<uint64_t> &OpenedNodes, + GraphResult &Result, uint64_t Address, + uint64_t Depth) { + // If we've exceeded the flow length, terminate. + if (Depth >= SearchLengthForConditionalBranch) { + Result.OrphanedNodes.push_back(Address); + return; + } + + // Ensure this flow is acyclic. + if (OpenedNodes.count(Address)) + Result.OrphanedNodes.push_back(Address); + + // If this flow is already explored, stop here. + if (Result.IntermediateNodes.count(Address)) + return; + + // Get the metadata for the node instruction. + const auto &InstrMetaPtr = Analysis.getInstruction(Address); + if (!InstrMetaPtr) { + errs() << "Failed to build flow graph for instruction at address " + << format_hex(Address, 2) << ".\n"; + Result.OrphanedNodes.push_back(Address); + return; + } + const auto &ChildMeta = *InstrMetaPtr; + + OpenedNodes.insert(Address); + std::set<const Instr *> CFCrossRefs = + Analysis.getDirectControlFlowXRefs(ChildMeta); + + bool HasValidCrossRef = false; + + for (const auto *ParentMetaPtr : CFCrossRefs) { + assert(ParentMetaPtr && "CFCrossRefs returned nullptr."); + const auto &ParentMeta = *ParentMetaPtr; + const auto &ParentDesc = + Analysis.getMCInstrInfo()->get(ParentMeta.Instruction.getOpcode()); + + if (!ParentDesc.mayAffectControlFlow(ParentMeta.Instruction, + *Analysis.getRegisterInfo())) { + // If this cross reference doesn't affect CF, continue the graph. + buildFlowGraphImpl(Analysis, OpenedNodes, Result, ParentMeta.VMAddress, + Depth + 1); + Result.IntermediateNodes[ParentMeta.VMAddress] = Address; + HasValidCrossRef = true; + continue; + } + + // Call instructions are not valid in the upwards traversal. + if (ParentDesc.isCall()) { + Result.IntermediateNodes[ParentMeta.VMAddress] = Address; + Result.OrphanedNodes.push_back(ParentMeta.VMAddress); + continue; + } + + // Evaluate the branch target to ascertain whether this XRef is the result + // of a fallthrough or the target of a branch. + uint64_t BranchTarget; + if (!Analysis.getMCInstrAnalysis()->evaluateBranch( + ParentMeta.Instruction, ParentMeta.VMAddress, + ParentMeta.InstructionSize, BranchTarget)) { + errs() << "Failed to evaluate branch target for instruction at address " + << format_hex(ParentMeta.VMAddress, 2) << ".\n"; + Result.IntermediateNodes[ParentMeta.VMAddress] = Address; + Result.OrphanedNodes.push_back(ParentMeta.VMAddress); + continue; + } + + // Allow unconditional branches to be part of the upwards traversal. + if (ParentDesc.isUnconditionalBranch()) { + // Ensures that the unconditional branch is actually an XRef to the child. + if (BranchTarget != Address) { + errs() << "Control flow to " << format_hex(Address, 2) + << ", but target resolution of " + << format_hex(ParentMeta.VMAddress, 2) + << " is not this address?\n"; + Result.IntermediateNodes[ParentMeta.VMAddress] = Address; + Result.OrphanedNodes.push_back(ParentMeta.VMAddress); + continue; + } + + buildFlowGraphImpl(Analysis, OpenedNodes, Result, ParentMeta.VMAddress, + Depth + 1); + Result.IntermediateNodes[ParentMeta.VMAddress] = Address; + HasValidCrossRef = true; + continue; + } + + // Ensure that any unknown CFs are caught. + if (!ParentDesc.isConditionalBranch()) { + errs() << "Unknown control flow encountered when building graph at " + << format_hex(Address, 2) << "\n."; + Result.IntermediateNodes[ParentMeta.VMAddress] = Address; + Result.OrphanedNodes.push_back(ParentMeta.VMAddress); + continue; + } + + // Only direct conditional branches should be present at this point. Setup + // a conditional branch node and build flows to the ud2. + ConditionalBranchNode BranchNode; + BranchNode.Address = ParentMeta.VMAddress; + BranchNode.Target = 0; + BranchNode.Fallthrough = 0; + BranchNode.CFIProtection = false; + BranchNode.IndirectCFIsOnTargetPath = (BranchTarget == Address); + + if (BranchTarget == Address) + BranchNode.Target = Address; + else + BranchNode.Fallthrough = Address; + + HasValidCrossRef = true; + buildFlowsToUndefined(Analysis, Result, BranchNode, ParentMeta); + Result.ConditionalBranchNodes.push_back(BranchNode); + } + + // When using cross-DSO, some indirect calls are not guarded by a branch to a + // trap but instead follow a call to __cfi_slowpath. For example: + // if (!InlinedFastCheck(f)) + // call *f + // else { + // __cfi_slowpath(CallSiteTypeId, f); + // call *f + // } + // To mark the second call as protected, we recognize indirect calls that + // directly follow calls to functions that will trap on CFI violations. + if (CFCrossRefs.empty()) { + const Instr *PrevInstr = Analysis.getPrevInstructionSequential(ChildMeta); + if (PrevInstr && Analysis.willTrapOnCFIViolation(*PrevInstr)) { + Result.IntermediateNodes[PrevInstr->VMAddress] = Address; + HasValidCrossRef = true; + } + } + + if (!HasValidCrossRef) + Result.OrphanedNodes.push_back(Address); + + OpenedNodes.erase(Address); +} + +} // namespace cfi_verify +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/GraphBuilder.h b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/GraphBuilder.h new file mode 100644 index 0000000000..89724c04f7 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/GraphBuilder.h @@ -0,0 +1,136 @@ +//===- GraphBuilder.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CFI_VERIFY_GRAPH_BUILDER_H +#define LLVM_CFI_VERIFY_GRAPH_BUILDER_H + +#include "FileAnalysis.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" + +#include <functional> +#include <set> +#include <string> +#include <unordered_map> + +using Instr = llvm::cfi_verify::FileAnalysis::Instr; + +namespace llvm { +namespace cfi_verify { + +extern uint64_t SearchLengthForUndef; +extern uint64_t SearchLengthForConditionalBranch; + +struct ConditionalBranchNode { + uint64_t Address; + uint64_t Target; + uint64_t Fallthrough; + // Does this conditional branch look like it's used for CFI protection? i.e. + // - The exit point of a basic block whos entry point is {target|fallthrough} + // is a CFI trap, and... + // - The exit point of the other basic block is an undirect CF instruction. + bool CFIProtection; + bool IndirectCFIsOnTargetPath; +}; + +// The canonical graph result structure returned by GraphBuilder. The members +// in this structure encapsulate all possible code paths to the instruction +// located at `BaseAddress`. +struct GraphResult { + uint64_t BaseAddress; + + // Map between an instruction address, and the address of the next instruction + // that will be executed. This map will contain all keys in the range: + // - [orphaned node, base address) + // - [conditional branch node {target|fallthrough}, base address) + DenseMap<uint64_t, uint64_t> IntermediateNodes; + + // A list of orphaned nodes. A node is an 'orphan' if it meets any of the + // following criteria: + // - The length of the path from the base to this node has exceeded + // `SearchLengthForConditionalBranch`. + // - The node has no cross references to it. + // - The path from the base to this node is cyclic. + std::vector<uint64_t> OrphanedNodes; + + // A list of top-level conditional branches that exist at the top of any + // non-orphan paths from the base. + std::vector<ConditionalBranchNode> ConditionalBranchNodes; + + // Returns an in-order list of the path between the address provided and the + // base. The provided address must be part of this graph, and must not be a + // conditional branch. + std::vector<uint64_t> flattenAddress(uint64_t Address) const; + + // Print the DOT representation of this result. + void printToDOT(const FileAnalysis &Analysis, raw_ostream &OS) const; +}; + +class GraphBuilder { +public: + // Build the control flow graph for a provided control flow node. This method + // will enumerate all branch nodes that can lead to this node, and place them + // into GraphResult::ConditionalBranchNodes. It will also provide any orphaned + // (i.e. the upwards traversal did not make it to a branch node) flows to the + // provided node in GraphResult::OrphanedNodes. + static GraphResult buildFlowGraph(const FileAnalysis &Analysis, + object::SectionedAddress Address); + +private: + // Implementation function that actually builds the flow graph. Retrieves a + // list of cross references to instruction referenced in `Address`. If any of + // these XRefs are conditional branches, it will build the other potential + // path (fallthrough or target) using `buildFlowsToUndefined`. Otherwise, this + // function will recursively call itself where `Address` in the recursive call + // is now the XRef. If any XRef is an orphan, it is added to + // `Result.OrphanedNodes`. `OpenedNodes` keeps track of the list of nodes + // in the current path and is used for cycle-checking. If the path is found + // to be cyclic, it will be added to `Result.OrphanedNodes`. + static void buildFlowGraphImpl(const FileAnalysis &Analysis, + DenseSet<uint64_t> &OpenedNodes, + GraphResult &Result, uint64_t Address, + uint64_t Depth); + + // Utilised by buildFlowGraphImpl to build the tree out from the provided + // conditional branch node to an undefined instruction. The provided + // conditional branch node must have exactly one of its subtrees set, and will + // update the node's CFIProtection field if a deterministic flow can be found + // to an undefined instruction. + static void buildFlowsToUndefined(const FileAnalysis &Analysis, + GraphResult &Result, + ConditionalBranchNode &BranchNode, + const Instr &BranchInstrMeta); +}; + +} // end namespace cfi_verify +} // end namespace llvm + +#endif // LLVM_CFI_VERIFY_GRAPH_BUILDER_H diff --git a/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/ya.make b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/ya.make new file mode 100644 index 0000000000..3b9ac103f7 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-cfi-verify/lib/ya.make @@ -0,0 +1,32 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm14 + contrib/libs/llvm14/lib/DebugInfo/DWARF + contrib/libs/llvm14/lib/DebugInfo/Symbolize + contrib/libs/llvm14/lib/MC + contrib/libs/llvm14/lib/MC/MCParser + contrib/libs/llvm14/lib/Object + contrib/libs/llvm14/lib/Support +) + +ADDINCL( + contrib/libs/llvm14/tools/llvm-cfi-verify/lib +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + FileAnalysis.cpp + GraphBuilder.cpp +) + +END() diff --git a/contrib/libs/llvm14/tools/llvm-cfi-verify/llvm-cfi-verify.cpp b/contrib/libs/llvm14/tools/llvm-cfi-verify/llvm-cfi-verify.cpp new file mode 100644 index 0000000000..8c43ea8390 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-cfi-verify/llvm-cfi-verify.cpp @@ -0,0 +1,282 @@ +//===-- llvm-cfi-verify.cpp - CFI Verification tool for LLVM --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This tool verifies Control Flow Integrity (CFI) instrumentation by static +// binary anaylsis. See the design document in /docs/CFIVerify.rst for more +// information. +// +// This tool is currently incomplete. It currently only does disassembly for +// object files, and searches through the code for indirect control flow +// instructions, printing them once found. +// +//===----------------------------------------------------------------------===// + +#include "lib/FileAnalysis.h" +#include "lib/GraphBuilder.h" + +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/SpecialCaseList.h" +#include "llvm/Support/VirtualFileSystem.h" + +#include <cstdlib> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::cfi_verify; + +static cl::OptionCategory CFIVerifyCategory("CFI Verify Options"); + +cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"), + cl::Required, cl::cat(CFIVerifyCategory)); +cl::opt<std::string> IgnorelistFilename(cl::Positional, + cl::desc("[ignorelist file]"), + cl::init("-"), + cl::cat(CFIVerifyCategory)); +cl::opt<bool> PrintGraphs( + "print-graphs", + cl::desc("Print graphs around indirect CF instructions in DOT format."), + cl::init(false), cl::cat(CFIVerifyCategory)); +cl::opt<unsigned> PrintBlameContext( + "blame-context", + cl::desc("Print the blame context (if possible) for BAD instructions. This " + "specifies the number of lines of context to include, where zero " + "disables this feature."), + cl::init(0), cl::cat(CFIVerifyCategory)); +cl::opt<unsigned> PrintBlameContextAll( + "blame-context-all", + cl::desc("Prints the blame context (if possible) for ALL instructions. " + "This specifies the number of lines of context for non-BAD " + "instructions (see --blame-context). If --blame-context is " + "unspecified, it prints this number of contextual lines for BAD " + "instructions as well."), + cl::init(0), cl::cat(CFIVerifyCategory)); +cl::opt<bool> Summarize("summarize", cl::desc("Print the summary only."), + cl::init(false), cl::cat(CFIVerifyCategory)); + +ExitOnError ExitOnErr; + +static void printBlameContext(const DILineInfo &LineInfo, unsigned Context) { + auto FileOrErr = MemoryBuffer::getFile(LineInfo.FileName); + if (!FileOrErr) { + errs() << "Could not open file: " << LineInfo.FileName << "\n"; + return; + } + + std::unique_ptr<MemoryBuffer> File = std::move(FileOrErr.get()); + SmallVector<StringRef, 100> Lines; + File->getBuffer().split(Lines, '\n'); + + for (unsigned i = std::max<size_t>(1, LineInfo.Line - Context); + i < + std::min<size_t>(Lines.size() + 1, LineInfo.Line + Context + 1); + ++i) { + if (i == LineInfo.Line) + outs() << ">"; + else + outs() << " "; + + outs() << i << ": " << Lines[i - 1] << "\n"; + } +} + +static void printInstructionInformation(const FileAnalysis &Analysis, + const Instr &InstrMeta, + const GraphResult &Graph, + CFIProtectionStatus ProtectionStatus) { + outs() << "Instruction: " << format_hex(InstrMeta.VMAddress, 2) << " (" + << stringCFIProtectionStatus(ProtectionStatus) << "): "; + Analysis.printInstruction(InstrMeta, outs()); + outs() << " \n"; + + if (PrintGraphs) + Graph.printToDOT(Analysis, outs()); +} + +static void printInstructionStatus(unsigned BlameLine, bool CFIProtected, + const DILineInfo &LineInfo) { + if (BlameLine) { + outs() << "Ignorelist Match: " << IgnorelistFilename << ":" << BlameLine + << "\n"; + if (CFIProtected) + outs() << "====> Unexpected Protected\n"; + else + outs() << "====> Expected Unprotected\n"; + + if (PrintBlameContextAll) + printBlameContext(LineInfo, PrintBlameContextAll); + } else { + if (CFIProtected) { + outs() << "====> Expected Protected\n"; + if (PrintBlameContextAll) + printBlameContext(LineInfo, PrintBlameContextAll); + } else { + outs() << "====> Unexpected Unprotected (BAD)\n"; + if (PrintBlameContext) + printBlameContext(LineInfo, PrintBlameContext); + } + } +} + +static void +printIndirectCFInstructions(FileAnalysis &Analysis, + const SpecialCaseList *SpecialCaseList) { + uint64_t ExpectedProtected = 0; + uint64_t UnexpectedProtected = 0; + uint64_t ExpectedUnprotected = 0; + uint64_t UnexpectedUnprotected = 0; + + std::map<unsigned, uint64_t> BlameCounter; + + for (object::SectionedAddress Address : Analysis.getIndirectInstructions()) { + const auto &InstrMeta = Analysis.getInstructionOrDie(Address.Address); + GraphResult Graph = GraphBuilder::buildFlowGraph(Analysis, Address); + + CFIProtectionStatus ProtectionStatus = + Analysis.validateCFIProtection(Graph); + bool CFIProtected = (ProtectionStatus == CFIProtectionStatus::PROTECTED); + + if (!Summarize) { + outs() << "-----------------------------------------------------\n"; + printInstructionInformation(Analysis, InstrMeta, Graph, ProtectionStatus); + } + + if (IgnoreDWARFFlag) { + if (CFIProtected) + ExpectedProtected++; + else + UnexpectedUnprotected++; + continue; + } + + auto InliningInfo = Analysis.symbolizeInlinedCode(Address); + if (!InliningInfo || InliningInfo->getNumberOfFrames() == 0) { + errs() << "Failed to symbolise " << format_hex(Address.Address, 2) + << " with line tables from " << InputFilename << "\n"; + exit(EXIT_FAILURE); + } + + const auto &LineInfo = InliningInfo->getFrame(0); + + // Print the inlining symbolisation of this instruction. + if (!Summarize) { + for (uint32_t i = 0; i < InliningInfo->getNumberOfFrames(); ++i) { + const auto &Line = InliningInfo->getFrame(i); + outs() << " " << format_hex(Address.Address, 2) << " = " + << Line.FileName << ":" << Line.Line << ":" << Line.Column + << " (" << Line.FunctionName << ")\n"; + } + } + + if (!SpecialCaseList) { + if (CFIProtected) { + if (PrintBlameContextAll && !Summarize) + printBlameContext(LineInfo, PrintBlameContextAll); + ExpectedProtected++; + } else { + if (PrintBlameContext && !Summarize) + printBlameContext(LineInfo, PrintBlameContext); + UnexpectedUnprotected++; + } + continue; + } + + unsigned BlameLine = 0; + for (auto &K : {"cfi-icall", "cfi-vcall"}) { + if (!BlameLine) + BlameLine = + SpecialCaseList->inSectionBlame(K, "src", LineInfo.FileName); + if (!BlameLine) + BlameLine = + SpecialCaseList->inSectionBlame(K, "fun", LineInfo.FunctionName); + } + + if (BlameLine) { + BlameCounter[BlameLine]++; + if (CFIProtected) + UnexpectedProtected++; + else + ExpectedUnprotected++; + } else { + if (CFIProtected) + ExpectedProtected++; + else + UnexpectedUnprotected++; + } + + if (!Summarize) + printInstructionStatus(BlameLine, CFIProtected, LineInfo); + } + + uint64_t IndirectCFInstructions = ExpectedProtected + UnexpectedProtected + + ExpectedUnprotected + UnexpectedUnprotected; + + if (IndirectCFInstructions == 0) { + outs() << "No indirect CF instructions found.\n"; + return; + } + + outs() << formatv("\nTotal Indirect CF Instructions: {0}\n" + "Expected Protected: {1} ({2:P})\n" + "Unexpected Protected: {3} ({4:P})\n" + "Expected Unprotected: {5} ({6:P})\n" + "Unexpected Unprotected (BAD): {7} ({8:P})\n", + IndirectCFInstructions, ExpectedProtected, + ((double)ExpectedProtected) / IndirectCFInstructions, + UnexpectedProtected, + ((double)UnexpectedProtected) / IndirectCFInstructions, + ExpectedUnprotected, + ((double)ExpectedUnprotected) / IndirectCFInstructions, + UnexpectedUnprotected, + ((double)UnexpectedUnprotected) / IndirectCFInstructions); + + if (!SpecialCaseList) + return; + + outs() << "\nIgnorelist Results:\n"; + for (const auto &KV : BlameCounter) { + outs() << " " << IgnorelistFilename << ":" << KV.first << " affects " + << KV.second << " indirect CF instructions.\n"; + } +} + +int main(int argc, char **argv) { + cl::HideUnrelatedOptions({&CFIVerifyCategory, &getColorCategory()}); + cl::ParseCommandLineOptions( + argc, argv, + "Identifies whether Control Flow Integrity protects all indirect control " + "flow instructions in the provided object file, DSO or binary.\nNote: " + "Anything statically linked into the provided file *must* be compiled " + "with '-g'. This can be relaxed through the '--ignore-dwarf' flag."); + + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllDisassemblers(); + + if (PrintBlameContextAll && !PrintBlameContext) + PrintBlameContext.setValue(PrintBlameContextAll); + + std::unique_ptr<SpecialCaseList> SpecialCaseList; + if (IgnorelistFilename != "-") { + std::string Error; + SpecialCaseList = SpecialCaseList::create({IgnorelistFilename}, + *vfs::getRealFileSystem(), Error); + if (!SpecialCaseList) { + errs() << "Failed to get ignorelist: " << Error << "\n"; + exit(EXIT_FAILURE); + } + } + + FileAnalysis Analysis = ExitOnErr(FileAnalysis::Create(InputFilename)); + printIndirectCFInstructions(Analysis, SpecialCaseList.get()); + + return EXIT_SUCCESS; +} diff --git a/contrib/libs/llvm14/tools/llvm-cfi-verify/ya.make b/contrib/libs/llvm14/tools/llvm-cfi-verify/ya.make new file mode 100644 index 0000000000..b56a323e2a --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-cfi-verify/ya.make @@ -0,0 +1,67 @@ +# Generated by devtools/yamaker. + +PROGRAM() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm14 + contrib/libs/llvm14/lib/BinaryFormat + contrib/libs/llvm14/lib/Bitcode/Reader + contrib/libs/llvm14/lib/Bitstream/Reader + contrib/libs/llvm14/lib/DebugInfo/CodeView + contrib/libs/llvm14/lib/DebugInfo/DWARF + contrib/libs/llvm14/lib/DebugInfo/MSF + contrib/libs/llvm14/lib/DebugInfo/PDB + contrib/libs/llvm14/lib/DebugInfo/Symbolize + contrib/libs/llvm14/lib/Demangle + contrib/libs/llvm14/lib/IR + contrib/libs/llvm14/lib/MC + contrib/libs/llvm14/lib/MC/MCDisassembler + contrib/libs/llvm14/lib/MC/MCParser + contrib/libs/llvm14/lib/Object + contrib/libs/llvm14/lib/Remarks + contrib/libs/llvm14/lib/Support + contrib/libs/llvm14/lib/Target/AArch64/AsmParser + contrib/libs/llvm14/lib/Target/AArch64/Disassembler + contrib/libs/llvm14/lib/Target/AArch64/MCTargetDesc + contrib/libs/llvm14/lib/Target/AArch64/TargetInfo + contrib/libs/llvm14/lib/Target/AArch64/Utils + contrib/libs/llvm14/lib/Target/ARM/AsmParser + contrib/libs/llvm14/lib/Target/ARM/Disassembler + contrib/libs/llvm14/lib/Target/ARM/MCTargetDesc + contrib/libs/llvm14/lib/Target/ARM/TargetInfo + contrib/libs/llvm14/lib/Target/ARM/Utils + contrib/libs/llvm14/lib/Target/BPF/AsmParser + contrib/libs/llvm14/lib/Target/BPF/Disassembler + contrib/libs/llvm14/lib/Target/BPF/MCTargetDesc + contrib/libs/llvm14/lib/Target/BPF/TargetInfo + contrib/libs/llvm14/lib/Target/NVPTX/MCTargetDesc + contrib/libs/llvm14/lib/Target/NVPTX/TargetInfo + contrib/libs/llvm14/lib/Target/PowerPC/AsmParser + contrib/libs/llvm14/lib/Target/PowerPC/Disassembler + contrib/libs/llvm14/lib/Target/PowerPC/MCTargetDesc + contrib/libs/llvm14/lib/Target/PowerPC/TargetInfo + contrib/libs/llvm14/lib/Target/X86/AsmParser + contrib/libs/llvm14/lib/Target/X86/Disassembler + contrib/libs/llvm14/lib/Target/X86/MCTargetDesc + contrib/libs/llvm14/lib/Target/X86/TargetInfo + contrib/libs/llvm14/lib/TextAPI + contrib/libs/llvm14/tools/llvm-cfi-verify/lib +) + +ADDINCL( + contrib/libs/llvm14/tools/llvm-cfi-verify +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + llvm-cfi-verify.cpp +) + +END() |