diff options
author | vvvv <vvvv@ydb.tech> | 2024-02-06 20:01:22 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2024-02-06 20:22:16 +0300 |
commit | 0203b7a9a40828bb2bd4c32029b79ff0ea3d1f8f (patch) | |
tree | e630d0d5bd0bd29fc8c2d2842ed2cfde781b993a /contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp | |
parent | ba27db76d99d12a4f1c06960b5449423218614c4 (diff) | |
download | ydb-0203b7a9a40828bb2bd4c32029b79ff0ea3d1f8f.tar.gz |
llvm16 targets
Diffstat (limited to 'contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp')
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp | 1038 |
1 files changed, 1038 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp b/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp new file mode 100644 index 0000000000..6573bf010c --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp @@ -0,0 +1,1038 @@ +//===-- DifferenceEngine.cpp - Structural function/module comparison ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the implementation of the LLVM difference +// engine, which structurally compares global values within a module. +// +//===----------------------------------------------------------------------===// + +#include "DifferenceEngine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/type_traits.h" +#include <utility> + +using namespace llvm; + +namespace { + +/// A priority queue, implemented as a heap. +template <class T, class Sorter, unsigned InlineCapacity> +class PriorityQueue { + Sorter Precedes; + llvm::SmallVector<T, InlineCapacity> Storage; + +public: + PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {} + + /// Checks whether the heap is empty. + bool empty() const { return Storage.empty(); } + + /// Insert a new value on the heap. + void insert(const T &V) { + unsigned Index = Storage.size(); + Storage.push_back(V); + if (Index == 0) return; + + T *data = Storage.data(); + while (true) { + unsigned Target = (Index + 1) / 2 - 1; + if (!Precedes(data[Index], data[Target])) return; + std::swap(data[Index], data[Target]); + if (Target == 0) return; + Index = Target; + } + } + + /// Remove the minimum value in the heap. Only valid on a non-empty heap. + T remove_min() { + assert(!empty()); + T tmp = Storage[0]; + + unsigned NewSize = Storage.size() - 1; + if (NewSize) { + // Move the slot at the end to the beginning. + if (std::is_trivially_copyable<T>::value) + Storage[0] = Storage[NewSize]; + else + std::swap(Storage[0], Storage[NewSize]); + + // Bubble the root up as necessary. + unsigned Index = 0; + while (true) { + // With a 1-based index, the children would be Index*2 and Index*2+1. + unsigned R = (Index + 1) * 2; + unsigned L = R - 1; + + // If R is out of bounds, we're done after this in any case. + if (R >= NewSize) { + // If L is also out of bounds, we're done immediately. + if (L >= NewSize) break; + + // Otherwise, test whether we should swap L and Index. + if (Precedes(Storage[L], Storage[Index])) + std::swap(Storage[L], Storage[Index]); + break; + } + + // Otherwise, we need to compare with the smaller of L and R. + // Prefer R because it's closer to the end of the array. + unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R); + + // If Index is >= the min of L and R, then heap ordering is restored. + if (!Precedes(Storage[IndexToTest], Storage[Index])) + break; + + // Otherwise, keep bubbling up. + std::swap(Storage[IndexToTest], Storage[Index]); + Index = IndexToTest; + } + } + Storage.pop_back(); + + return tmp; + } +}; + +/// A function-scope difference engine. +class FunctionDifferenceEngine { + DifferenceEngine &Engine; + + // Some initializers may reference the variable we're currently checking. This + // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent + // recursing. + const Value *SavedLHS; + const Value *SavedRHS; + + // The current mapping from old local values to new local values. + DenseMap<const Value *, const Value *> Values; + + // The current mapping from old blocks to new blocks. + DenseMap<const BasicBlock *, const BasicBlock *> Blocks; + + // The tentative mapping from old local values while comparing a pair of + // basic blocks. Once the pair has been processed, the tentative mapping is + // committed to the Values map. + DenseSet<std::pair<const Value *, const Value *>> TentativeValues; + + // Equivalence Assumptions + // + // For basic blocks in loops, some values in phi nodes may depend on + // values from not yet processed basic blocks in the loop. When encountering + // such values, we optimistically asssume their equivalence and store this + // assumption in a BlockDiffCandidate for the pair of compared BBs. + // + // Once we have diffed all BBs, for every BlockDiffCandidate, we check all + // stored assumptions using the Values map that stores proven equivalences + // between the old and new values, and report a diff if an assumption cannot + // be proven to be true. + // + // Note that after having made an assumption, all further determined + // equivalences implicitly depend on that assumption. These will not be + // reverted or reported if the assumption proves to be false, because these + // are considered indirect diffs caused by earlier direct diffs. + // + // We aim to avoid false negatives in llvm-diff, that is, ensure that + // whenever no diff is reported, the functions are indeed equal. If + // assumptions were made, this is not entirely clear, because in principle we + // could end up with a circular proof where the proof of equivalence of two + // nodes is depending on the assumption of their equivalence. + // + // To see that assumptions do not add false negatives, note that if we do not + // report a diff, this means that there is an equivalence mapping between old + // and new values that is consistent with all assumptions made. The circular + // dependency that exists on an IR value level does not exist at run time, + // because the values selected by the phi nodes must always already have been + // computed. Hence, we can prove equivalence of the old and new functions by + // considering step-wise parallel execution, and incrementally proving + // equivalence of every new computed value. Another way to think about it is + // to imagine cloning the loop BBs for every iteration, turning the loops + // into (possibly infinite) DAGs, and proving equivalence by induction on the + // iteration, using the computed value mapping. + + // The class BlockDiffCandidate stores pairs which either have already been + // proven to differ, or pairs whose equivalence depends on assumptions to be + // verified later. + struct BlockDiffCandidate { + const BasicBlock *LBB; + const BasicBlock *RBB; + // Maps old values to assumed-to-be-equivalent new values + SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions; + // If set, we already know the blocks differ. + bool KnownToDiffer; + }; + + // List of block diff candidates in the order found by processing. + // We generate reports in this order. + // For every LBB, there may only be one corresponding RBB. + SmallVector<BlockDiffCandidate> BlockDiffCandidates; + // Maps LBB to the index of its BlockDiffCandidate, if existing. + DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices; + + // Note: Every LBB must always be queried together with the same RBB. + // The returned reference is not permanently valid and should not be stored. + BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB, + const BasicBlock *RBB) { + auto It = BlockDiffCandidateIndices.find(LBB); + // Check if LBB already has a diff candidate + if (It == BlockDiffCandidateIndices.end()) { + // Add new one + BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size(); + BlockDiffCandidates.push_back( + {LBB, RBB, SmallDenseMap<const Value *, const Value *>(), false}); + return BlockDiffCandidates.back(); + } + // Use existing one + BlockDiffCandidate &Result = BlockDiffCandidates[It->second]; + assert(Result.RBB == RBB && "Inconsistent basic block pairing!"); + return Result; + } + + // Optionally passed to equivalence checker functions, so these can add + // assumptions in BlockDiffCandidates. Its presence controls whether + // assumptions are generated. + struct AssumptionContext { + // The two basic blocks that need the two compared values to be equivalent. + const BasicBlock *LBB; + const BasicBlock *RBB; + }; + + unsigned getUnprocPredCount(const BasicBlock *Block) const { + unsigned Count = 0; + for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E; + ++I) + if (!Blocks.count(*I)) Count++; + return Count; + } + + typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair; + + /// A type which sorts a priority queue by the number of unprocessed + /// predecessor blocks it has remaining. + /// + /// This is actually really expensive to calculate. + struct QueueSorter { + const FunctionDifferenceEngine &fde; + explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {} + + bool operator()(BlockPair &Old, BlockPair &New) { + return fde.getUnprocPredCount(Old.first) + < fde.getUnprocPredCount(New.first); + } + }; + + /// A queue of unified blocks to process. + PriorityQueue<BlockPair, QueueSorter, 20> Queue; + + /// Try to unify the given two blocks. Enqueues them for processing + /// if they haven't already been processed. + /// + /// Returns true if there was a problem unifying them. + bool tryUnify(const BasicBlock *L, const BasicBlock *R) { + const BasicBlock *&Ref = Blocks[L]; + + if (Ref) { + if (Ref == R) return false; + + Engine.logf("successor %l cannot be equivalent to %r; " + "it's already equivalent to %r") + << L << R << Ref; + return true; + } + + Ref = R; + Queue.insert(BlockPair(L, R)); + return false; + } + + /// Unifies two instructions, given that they're known not to have + /// structural differences. + void unify(const Instruction *L, const Instruction *R) { + DifferenceEngine::Context C(Engine, L, R); + + bool Result = diff(L, R, true, true, true); + assert(!Result && "structural differences second time around?"); + (void) Result; + if (!L->use_empty()) + Values[L] = R; + } + + void processQueue() { + while (!Queue.empty()) { + BlockPair Pair = Queue.remove_min(); + diff(Pair.first, Pair.second); + } + } + + void checkAndReportDiffCandidates() { + for (BlockDiffCandidate &BDC : BlockDiffCandidates) { + + // Check assumptions + for (const auto &[L, R] : BDC.EquivalenceAssumptions) { + auto It = Values.find(L); + if (It == Values.end() || It->second != R) { + BDC.KnownToDiffer = true; + break; + } + } + + // Run block diff if the BBs differ + if (BDC.KnownToDiffer) { + DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB); + runBlockDiff(BDC.LBB->begin(), BDC.RBB->begin()); + } + } + } + + void diff(const BasicBlock *L, const BasicBlock *R) { + DifferenceEngine::Context C(Engine, L, R); + + BasicBlock::const_iterator LI = L->begin(), LE = L->end(); + BasicBlock::const_iterator RI = R->begin(); + + do { + assert(LI != LE && RI != R->end()); + const Instruction *LeftI = &*LI, *RightI = &*RI; + + // If the instructions differ, start the more sophisticated diff + // algorithm at the start of the block. + if (diff(LeftI, RightI, false, false, true)) { + TentativeValues.clear(); + // Register (L, R) as diffing pair. Note that we could directly emit a + // block diff here, but this way we ensure all diffs are emitted in one + // consistent order, independent of whether the diffs were detected + // immediately or via invalid assumptions. + getOrCreateBlockDiffCandidate(L, R).KnownToDiffer = true; + return; + } + + // Otherwise, tentatively unify them. + if (!LeftI->use_empty()) + TentativeValues.insert(std::make_pair(LeftI, RightI)); + + ++LI; + ++RI; + } while (LI != LE); // This is sufficient: we can't get equality of + // terminators if there are residual instructions. + + // Unify everything in the block, non-tentatively this time. + TentativeValues.clear(); + for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI) + unify(&*LI, &*RI); + } + + bool matchForBlockDiff(const Instruction *L, const Instruction *R); + void runBlockDiff(BasicBlock::const_iterator LI, + BasicBlock::const_iterator RI); + + bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) { + // FIXME: call attributes + AssumptionContext AC = {L.getParent(), R.getParent()}; + if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand(), + &AC)) { + if (Complain) Engine.log("called functions differ"); + return true; + } + if (L.arg_size() != R.arg_size()) { + if (Complain) Engine.log("argument counts differ"); + return true; + } + for (unsigned I = 0, E = L.arg_size(); I != E; ++I) + if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I), &AC)) { + if (Complain) + Engine.logf("arguments %l and %r differ") + << L.getArgOperand(I) << R.getArgOperand(I); + return true; + } + return false; + } + + // If AllowAssumptions is enabled, whenever we encounter a pair of values + // that we cannot prove to be equivalent, we assume equivalence and store that + // assumption to be checked later in BlockDiffCandidates. + bool diff(const Instruction *L, const Instruction *R, bool Complain, + bool TryUnify, bool AllowAssumptions) { + // FIXME: metadata (if Complain is set) + AssumptionContext ACValue = {L->getParent(), R->getParent()}; + // nullptr AssumptionContext disables assumption generation. + const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr; + + // Different opcodes always imply different operations. + if (L->getOpcode() != R->getOpcode()) { + if (Complain) Engine.log("different instruction types"); + return true; + } + + if (isa<CmpInst>(L)) { + if (cast<CmpInst>(L)->getPredicate() + != cast<CmpInst>(R)->getPredicate()) { + if (Complain) Engine.log("different predicates"); + return true; + } + } else if (isa<CallInst>(L)) { + return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain); + } else if (isa<PHINode>(L)) { + const PHINode &LI = cast<PHINode>(*L); + const PHINode &RI = cast<PHINode>(*R); + + // This is really weird; type uniquing is broken? + if (LI.getType() != RI.getType()) { + if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) { + if (Complain) Engine.log("different phi types"); + return true; + } + } + + if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) { + if (Complain) + Engine.log("PHI node # of incoming values differ"); + return true; + } + + for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) { + if (TryUnify) + tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I)); + + if (!equivalentAsOperands(LI.getIncomingValue(I), + RI.getIncomingValue(I), AC)) { + if (Complain) + Engine.log("PHI node incoming values differ"); + return true; + } + } + + return false; + + // Terminators. + } else if (isa<InvokeInst>(L)) { + const InvokeInst &LI = cast<InvokeInst>(*L); + const InvokeInst &RI = cast<InvokeInst>(*R); + if (diffCallSites(LI, RI, Complain)) + return true; + + if (TryUnify) { + tryUnify(LI.getNormalDest(), RI.getNormalDest()); + tryUnify(LI.getUnwindDest(), RI.getUnwindDest()); + } + return false; + + } else if (isa<CallBrInst>(L)) { + const CallBrInst &LI = cast<CallBrInst>(*L); + const CallBrInst &RI = cast<CallBrInst>(*R); + if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) { + if (Complain) + Engine.log("callbr # of indirect destinations differ"); + return true; + } + + // Perform the "try unify" step so that we can equate the indirect + // destinations before checking the call site. + for (unsigned I = 0; I < LI.getNumIndirectDests(); I++) + tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I)); + + if (diffCallSites(LI, RI, Complain)) + return true; + + if (TryUnify) + tryUnify(LI.getDefaultDest(), RI.getDefaultDest()); + return false; + + } else if (isa<BranchInst>(L)) { + const BranchInst *LI = cast<BranchInst>(L); + const BranchInst *RI = cast<BranchInst>(R); + if (LI->isConditional() != RI->isConditional()) { + if (Complain) Engine.log("branch conditionality differs"); + return true; + } + + if (LI->isConditional()) { + if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) { + if (Complain) Engine.log("branch conditions differ"); + return true; + } + if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1)); + } + if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0)); + return false; + + } else if (isa<IndirectBrInst>(L)) { + const IndirectBrInst *LI = cast<IndirectBrInst>(L); + const IndirectBrInst *RI = cast<IndirectBrInst>(R); + if (LI->getNumDestinations() != RI->getNumDestinations()) { + if (Complain) Engine.log("indirectbr # of destinations differ"); + return true; + } + + if (!equivalentAsOperands(LI->getAddress(), RI->getAddress(), AC)) { + if (Complain) Engine.log("indirectbr addresses differ"); + return true; + } + + if (TryUnify) { + for (unsigned i = 0; i < LI->getNumDestinations(); i++) { + tryUnify(LI->getDestination(i), RI->getDestination(i)); + } + } + return false; + + } else if (isa<SwitchInst>(L)) { + const SwitchInst *LI = cast<SwitchInst>(L); + const SwitchInst *RI = cast<SwitchInst>(R); + if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) { + if (Complain) Engine.log("switch conditions differ"); + return true; + } + if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest()); + + bool Difference = false; + + DenseMap<const ConstantInt *, const BasicBlock *> LCases; + for (auto Case : LI->cases()) + LCases[Case.getCaseValue()] = Case.getCaseSuccessor(); + + for (auto Case : RI->cases()) { + const ConstantInt *CaseValue = Case.getCaseValue(); + const BasicBlock *LCase = LCases[CaseValue]; + if (LCase) { + if (TryUnify) + tryUnify(LCase, Case.getCaseSuccessor()); + LCases.erase(CaseValue); + } else if (Complain || !Difference) { + if (Complain) + Engine.logf("right switch has extra case %r") << CaseValue; + Difference = true; + } + } + if (!Difference) + for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator + I = LCases.begin(), + E = LCases.end(); + I != E; ++I) { + if (Complain) + Engine.logf("left switch has extra case %l") << I->first; + Difference = true; + } + return Difference; + } else if (isa<UnreachableInst>(L)) { + return false; + } + + if (L->getNumOperands() != R->getNumOperands()) { + if (Complain) Engine.log("instructions have different operand counts"); + return true; + } + + for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { + Value *LO = L->getOperand(I), *RO = R->getOperand(I); + if (!equivalentAsOperands(LO, RO, AC)) { + if (Complain) Engine.logf("operands %l and %r differ") << LO << RO; + return true; + } + } + + return false; + } + +public: + bool equivalentAsOperands(const Constant *L, const Constant *R, + const AssumptionContext *AC) { + // Use equality as a preliminary filter. + if (L == R) + return true; + + if (L->getValueID() != R->getValueID()) + return false; + + // Ask the engine about global values. + if (isa<GlobalValue>(L)) + return Engine.equivalentAsOperands(cast<GlobalValue>(L), + cast<GlobalValue>(R)); + + // Compare constant expressions structurally. + if (isa<ConstantExpr>(L)) + return equivalentAsOperands(cast<ConstantExpr>(L), cast<ConstantExpr>(R), + AC); + + // Constants of the "same type" don't always actually have the same + // type; I don't know why. Just white-list them. + if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L)) + return true; + + // Block addresses only match if we've already encountered the + // block. FIXME: tentative matches? + if (isa<BlockAddress>(L)) + return Blocks[cast<BlockAddress>(L)->getBasicBlock()] + == cast<BlockAddress>(R)->getBasicBlock(); + + // If L and R are ConstantVectors, compare each element + if (isa<ConstantVector>(L)) { + const ConstantVector *CVL = cast<ConstantVector>(L); + const ConstantVector *CVR = cast<ConstantVector>(R); + if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements()) + return false; + for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) { + if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i), AC)) + return false; + } + return true; + } + + // If L and R are ConstantArrays, compare the element count and types. + if (isa<ConstantArray>(L)) { + const ConstantArray *CAL = cast<ConstantArray>(L); + const ConstantArray *CAR = cast<ConstantArray>(R); + // Sometimes a type may be equivalent, but not uniquified---e.g. it may + // contain a GEP instruction. Do a deeper comparison of the types. + if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements()) + return false; + + for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) { + if (!equivalentAsOperands(CAL->getAggregateElement(I), + CAR->getAggregateElement(I), AC)) + return false; + } + + return true; + } + + // If L and R are ConstantStructs, compare each field and type. + if (isa<ConstantStruct>(L)) { + const ConstantStruct *CSL = cast<ConstantStruct>(L); + const ConstantStruct *CSR = cast<ConstantStruct>(R); + + const StructType *LTy = cast<StructType>(CSL->getType()); + const StructType *RTy = cast<StructType>(CSR->getType()); + + // The StructTypes should have the same attributes. Don't use + // isLayoutIdentical(), because that just checks the element pointers, + // which may not work here. + if (LTy->getNumElements() != RTy->getNumElements() || + LTy->isPacked() != RTy->isPacked()) + return false; + + for (unsigned I = 0; I < LTy->getNumElements(); I++) { + const Value *LAgg = CSL->getAggregateElement(I); + const Value *RAgg = CSR->getAggregateElement(I); + + if (LAgg == SavedLHS || RAgg == SavedRHS) { + if (LAgg != SavedLHS || RAgg != SavedRHS) + // If the left and right operands aren't both re-analyzing the + // variable, then the initialiers don't match, so report "false". + // Otherwise, we skip these operands.. + return false; + + continue; + } + + if (!equivalentAsOperands(LAgg, RAgg, AC)) { + return false; + } + } + + return true; + } + + return false; + } + + bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R, + const AssumptionContext *AC) { + if (L == R) + return true; + + if (L->getOpcode() != R->getOpcode()) + return false; + + switch (L->getOpcode()) { + case Instruction::ICmp: + case Instruction::FCmp: + if (L->getPredicate() != R->getPredicate()) + return false; + break; + + case Instruction::GetElementPtr: + // FIXME: inbounds? + break; + + default: + break; + } + + if (L->getNumOperands() != R->getNumOperands()) + return false; + + for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { + const auto *LOp = L->getOperand(I); + const auto *ROp = R->getOperand(I); + + if (LOp == SavedLHS || ROp == SavedRHS) { + if (LOp != SavedLHS || ROp != SavedRHS) + // If the left and right operands aren't both re-analyzing the + // variable, then the initialiers don't match, so report "false". + // Otherwise, we skip these operands.. + return false; + + continue; + } + + if (!equivalentAsOperands(LOp, ROp, AC)) + return false; + } + + return true; + } + + // There are cases where we cannot determine whether two values are + // equivalent, because it depends on not yet processed basic blocks -- see the + // documentation on assumptions. + // + // AC is the context in which we are currently performing a diff. + // When we encounter a pair of values for which we can neither prove + // equivalence nor the opposite, we do the following: + // * If AC is nullptr, we treat the pair as non-equivalent. + // * If AC is set, we add an assumption for the basic blocks given by AC, + // and treat the pair as equivalent. The assumption is checked later. + bool equivalentAsOperands(const Value *L, const Value *R, + const AssumptionContext *AC) { + // Fall out if the values have different kind. + // This possibly shouldn't take priority over oracles. + if (L->getValueID() != R->getValueID()) + return false; + + // Value subtypes: Argument, Constant, Instruction, BasicBlock, + // InlineAsm, MDNode, MDString, PseudoSourceValue + + if (isa<Constant>(L)) + return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R), AC); + + if (isa<Instruction>(L)) { + auto It = Values.find(L); + if (It != Values.end()) + return It->second == R; + + if (TentativeValues.count(std::make_pair(L, R))) + return true; + + // L and R might be equivalent, this could depend on not yet processed + // basic blocks, so we cannot decide here. + if (AC) { + // Add an assumption, unless there is a conflict with an existing one + BlockDiffCandidate &BDC = + getOrCreateBlockDiffCandidate(AC->LBB, AC->RBB); + auto InsertionResult = BDC.EquivalenceAssumptions.insert({L, R}); + if (!InsertionResult.second && InsertionResult.first->second != R) { + // We already have a conflicting equivalence assumption for L, so at + // least one must be wrong, and we know that there is a diff. + BDC.KnownToDiffer = true; + BDC.EquivalenceAssumptions.clear(); + return false; + } + // Optimistically assume equivalence, and check later once all BBs + // have been processed. + return true; + } + + // Assumptions disabled, so pessimistically assume non-equivalence. + return false; + } + + if (isa<Argument>(L)) + return Values[L] == R; + + if (isa<BasicBlock>(L)) + return Blocks[cast<BasicBlock>(L)] != R; + + // Pretend everything else is identical. + return true; + } + + // Avoid a gcc warning about accessing 'this' in an initializer. + FunctionDifferenceEngine *this_() { return this; } + +public: + FunctionDifferenceEngine(DifferenceEngine &Engine, + const Value *SavedLHS = nullptr, + const Value *SavedRHS = nullptr) + : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS), + Queue(QueueSorter(*this_())) {} + + void diff(const Function *L, const Function *R) { + assert(Values.empty() && "Multiple diffs per engine are not supported!"); + + if (L->arg_size() != R->arg_size()) + Engine.log("different argument counts"); + + // Map the arguments. + for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(), + RI = R->arg_begin(), RE = R->arg_end(); + LI != LE && RI != RE; ++LI, ++RI) + Values[&*LI] = &*RI; + + tryUnify(&*L->begin(), &*R->begin()); + processQueue(); + checkAndReportDiffCandidates(); + } +}; + +struct DiffEntry { + DiffEntry() : Cost(0) {} + + unsigned Cost; + llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange +}; + +bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L, + const Instruction *R) { + return !diff(L, R, false, false, false); +} + +void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart, + BasicBlock::const_iterator RStart) { + BasicBlock::const_iterator LE = LStart->getParent()->end(); + BasicBlock::const_iterator RE = RStart->getParent()->end(); + + unsigned NL = std::distance(LStart, LE); + + SmallVector<DiffEntry, 20> Paths1(NL+1); + SmallVector<DiffEntry, 20> Paths2(NL+1); + + DiffEntry *Cur = Paths1.data(); + DiffEntry *Next = Paths2.data(); + + const unsigned LeftCost = 2; + const unsigned RightCost = 2; + const unsigned MatchCost = 0; + + assert(TentativeValues.empty()); + + // Initialize the first column. + for (unsigned I = 0; I != NL+1; ++I) { + Cur[I].Cost = I * LeftCost; + for (unsigned J = 0; J != I; ++J) + Cur[I].Path.push_back(DC_left); + } + + for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) { + // Initialize the first row. + Next[0] = Cur[0]; + Next[0].Cost += RightCost; + Next[0].Path.push_back(DC_right); + + unsigned Index = 1; + for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) { + if (matchForBlockDiff(&*LI, &*RI)) { + Next[Index] = Cur[Index-1]; + Next[Index].Cost += MatchCost; + Next[Index].Path.push_back(DC_match); + TentativeValues.insert(std::make_pair(&*LI, &*RI)); + } else if (Next[Index-1].Cost <= Cur[Index].Cost) { + Next[Index] = Next[Index-1]; + Next[Index].Cost += LeftCost; + Next[Index].Path.push_back(DC_left); + } else { + Next[Index] = Cur[Index]; + Next[Index].Cost += RightCost; + Next[Index].Path.push_back(DC_right); + } + } + + std::swap(Cur, Next); + } + + // We don't need the tentative values anymore; everything from here + // on out should be non-tentative. + TentativeValues.clear(); + + SmallVectorImpl<char> &Path = Cur[NL].Path; + BasicBlock::const_iterator LI = LStart, RI = RStart; + + DiffLogBuilder Diff(Engine.getConsumer()); + + // Drop trailing matches. + while (Path.size() && Path.back() == DC_match) + Path.pop_back(); + + // Skip leading matches. + SmallVectorImpl<char>::iterator + PI = Path.begin(), PE = Path.end(); + while (PI != PE && *PI == DC_match) { + unify(&*LI, &*RI); + ++PI; + ++LI; + ++RI; + } + + for (; PI != PE; ++PI) { + switch (static_cast<DiffChange>(*PI)) { + case DC_match: + assert(LI != LE && RI != RE); + { + const Instruction *L = &*LI, *R = &*RI; + unify(L, R); + Diff.addMatch(L, R); + } + ++LI; ++RI; + break; + + case DC_left: + assert(LI != LE); + Diff.addLeft(&*LI); + ++LI; + break; + + case DC_right: + assert(RI != RE); + Diff.addRight(&*RI); + ++RI; + break; + } + } + + // Finishing unifying and complaining about the tails of the block, + // which should be matches all the way through. + while (LI != LE) { + assert(RI != RE); + unify(&*LI, &*RI); + ++LI; + ++RI; + } + + // If the terminators have different kinds, but one is an invoke and the + // other is an unconditional branch immediately following a call, unify + // the results and the destinations. + const Instruction *LTerm = LStart->getParent()->getTerminator(); + const Instruction *RTerm = RStart->getParent()->getTerminator(); + if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) { + if (cast<BranchInst>(LTerm)->isConditional()) return; + BasicBlock::const_iterator I = LTerm->getIterator(); + if (I == LStart->getParent()->begin()) return; + --I; + if (!isa<CallInst>(*I)) return; + const CallInst *LCall = cast<CallInst>(&*I); + const InvokeInst *RInvoke = cast<InvokeInst>(RTerm); + if (!equivalentAsOperands(LCall->getCalledOperand(), + RInvoke->getCalledOperand(), nullptr)) + return; + if (!LCall->use_empty()) + Values[LCall] = RInvoke; + tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest()); + } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) { + if (cast<BranchInst>(RTerm)->isConditional()) return; + BasicBlock::const_iterator I = RTerm->getIterator(); + if (I == RStart->getParent()->begin()) return; + --I; + if (!isa<CallInst>(*I)) return; + const CallInst *RCall = cast<CallInst>(I); + const InvokeInst *LInvoke = cast<InvokeInst>(LTerm); + if (!equivalentAsOperands(LInvoke->getCalledOperand(), + RCall->getCalledOperand(), nullptr)) + return; + if (!LInvoke->use_empty()) + Values[LInvoke] = RCall; + tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0)); + } +} +} + +void DifferenceEngine::Oracle::anchor() { } + +void DifferenceEngine::diff(const Function *L, const Function *R) { + Context C(*this, L, R); + + // FIXME: types + // FIXME: attributes and CC + // FIXME: parameter attributes + + // If both are declarations, we're done. + if (L->empty() && R->empty()) + return; + else if (L->empty()) + log("left function is declaration, right function is definition"); + else if (R->empty()) + log("right function is declaration, left function is definition"); + else + FunctionDifferenceEngine(*this).diff(L, R); +} + +void DifferenceEngine::diff(const Module *L, const Module *R) { + StringSet<> LNames; + SmallVector<std::pair<const Function *, const Function *>, 20> Queue; + + unsigned LeftAnonCount = 0; + unsigned RightAnonCount = 0; + + for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) { + const Function *LFn = &*I; + StringRef Name = LFn->getName(); + if (Name.empty()) { + ++LeftAnonCount; + continue; + } + + LNames.insert(Name); + + if (Function *RFn = R->getFunction(LFn->getName())) + Queue.push_back(std::make_pair(LFn, RFn)); + else + logf("function %l exists only in left module") << LFn; + } + + for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) { + const Function *RFn = &*I; + StringRef Name = RFn->getName(); + if (Name.empty()) { + ++RightAnonCount; + continue; + } + + if (!LNames.count(Name)) + logf("function %r exists only in right module") << RFn; + } + + if (LeftAnonCount != 0 || RightAnonCount != 0) { + SmallString<32> Tmp; + logf(("not comparing " + Twine(LeftAnonCount) + + " anonymous functions in the left module and " + + Twine(RightAnonCount) + " in the right module") + .toStringRef(Tmp)); + } + + for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator + I = Queue.begin(), + E = Queue.end(); + I != E; ++I) + diff(I->first, I->second); +} + +bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L, + const GlobalValue *R) { + if (globalValueOracle) return (*globalValueOracle)(L, R); + + if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) { + const GlobalVariable *GVL = cast<GlobalVariable>(L); + const GlobalVariable *GVR = cast<GlobalVariable>(R); + if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() && + GVR->hasLocalLinkage() && GVR->hasUniqueInitializer()) + return FunctionDifferenceEngine(*this, GVL, GVR) + .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer(), + nullptr); + } + + return L->getName() == R->getName(); +} |