diff options
author | vvvv <vvvv@ydb.tech> | 2024-02-06 20:01:22 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2024-02-06 20:22:16 +0300 |
commit | 0203b7a9a40828bb2bd4c32029b79ff0ea3d1f8f (patch) | |
tree | e630d0d5bd0bd29fc8c2d2842ed2cfde781b993a /contrib/libs/llvm16/tools/llvm-diff | |
parent | ba27db76d99d12a4f1c06960b5449423218614c4 (diff) | |
download | ydb-0203b7a9a40828bb2bd4c32029b79ff0ea3d1f8f.tar.gz |
llvm16 targets
Diffstat (limited to 'contrib/libs/llvm16/tools/llvm-diff')
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/lib/DiffConsumer.cpp | 218 | ||||
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/lib/DiffConsumer.h | 91 | ||||
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/lib/DiffLog.cpp | 54 | ||||
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/lib/DiffLog.h | 83 | ||||
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp | 1038 | ||||
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.h | 90 | ||||
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/lib/ya.make | 30 | ||||
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/llvm-diff.cpp | 96 | ||||
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-diff/ya.make | 37 |
9 files changed, 1737 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/tools/llvm-diff/lib/DiffConsumer.cpp b/contrib/libs/llvm16/tools/llvm-diff/lib/DiffConsumer.cpp new file mode 100644 index 0000000000..b6eb71916a --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/lib/DiffConsumer.cpp @@ -0,0 +1,218 @@ +//===-- DiffConsumer.cpp - Difference Consumer ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files implements the LLVM difference Consumer +// +//===----------------------------------------------------------------------===// + +#include "DiffConsumer.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +static void ComputeNumbering(const Function *F, + DenseMap<const Value *, unsigned> &Numbering) { + unsigned IN = 0; + + // Arguments get the first numbers. + for (const auto &Arg : F->args()) + if (!Arg.hasName()) + Numbering[&Arg] = IN++; + + // Walk the basic blocks in order. + for (const auto &Func : *F) { + if (!Func.hasName()) + Numbering[&Func] = IN++; + + // Walk the instructions in order. + for (const auto &BB : Func) + // void instructions don't get numbers. + if (!BB.hasName() && !BB.getType()->isVoidTy()) + Numbering[&BB] = IN++; + } + + assert(!Numbering.empty() && "asked for numbering but numbering was no-op"); +} + +void Consumer::anchor() { } + +void DiffConsumer::printValue(const Value *V, bool isL) { + if (V->hasName()) { + out << (isa<GlobalValue>(V) ? '@' : '%') << V->getName(); + return; + } + if (V->getType()->isVoidTy()) { + if (auto *SI = dyn_cast<StoreInst>(V)) { + out << "store to "; + printValue(SI->getPointerOperand(), isL); + } else if (auto *CI = dyn_cast<CallInst>(V)) { + out << "call to "; + printValue(CI->getCalledOperand(), isL); + } else if (auto *II = dyn_cast<InvokeInst>(V)) { + out << "invoke to "; + printValue(II->getCalledOperand(), isL); + } else { + out << *V; + } + return; + } + if (isa<Constant>(V)) { + out << *V; + return; + } + + unsigned N = contexts.size(); + while (N > 0) { + --N; + DiffContext &ctxt = contexts[N]; + if (!ctxt.IsFunction) continue; + if (isL) { + if (ctxt.LNumbering.empty()) + ComputeNumbering(cast<Function>(ctxt.L), ctxt.LNumbering); + out << '%' << ctxt.LNumbering[V]; + return; + } else { + if (ctxt.RNumbering.empty()) + ComputeNumbering(cast<Function>(ctxt.R), ctxt.RNumbering); + out << '%' << ctxt.RNumbering[V]; + return; + } + } + + out << "<anonymous>"; +} + +void DiffConsumer::header() { + if (contexts.empty()) return; + for (SmallVectorImpl<DiffContext>::iterator + I = contexts.begin(), E = contexts.end(); I != E; ++I) { + if (I->Differences) continue; + if (isa<Function>(I->L)) { + // Extra newline between functions. + if (Differences) out << "\n"; + + const Function *L = cast<Function>(I->L); + const Function *R = cast<Function>(I->R); + if (L->getName() != R->getName()) + out << "in function " << L->getName() + << " / " << R->getName() << ":\n"; + else + out << "in function " << L->getName() << ":\n"; + } else if (isa<BasicBlock>(I->L)) { + const BasicBlock *L = cast<BasicBlock>(I->L); + const BasicBlock *R = cast<BasicBlock>(I->R); + if (L->hasName() && R->hasName() && L->getName() == R->getName()) + out << " in block %" << L->getName() << ":\n"; + else { + out << " in block "; + printValue(L, true); + out << " / "; + printValue(R, false); + out << ":\n"; + } + } else if (isa<Instruction>(I->L)) { + out << " in instruction "; + printValue(I->L, true); + out << " / "; + printValue(I->R, false); + out << ":\n"; + } + + I->Differences = true; + } +} + +void DiffConsumer::indent() { + unsigned N = Indent; + while (N--) out << ' '; +} + +void DiffConsumer::reset() { + contexts.clear(); + Differences = false; + Indent = 0; +} + +bool DiffConsumer::hadDifferences() const { + return Differences; +} + +void DiffConsumer::enterContext(const Value *L, const Value *R) { + contexts.push_back(DiffContext(L, R)); + Indent += 2; +} + +void DiffConsumer::exitContext() { + Differences |= contexts.back().Differences; + contexts.pop_back(); + Indent -= 2; +} + +void DiffConsumer::log(StringRef text) { + header(); + indent(); + out << text << '\n'; +} + +void DiffConsumer::logf(const LogBuilder &Log) { + header(); + indent(); + + unsigned arg = 0; + + StringRef format = Log.getFormat(); + while (true) { + size_t percent = format.find('%'); + if (percent == StringRef::npos) { + out << format; + break; + } + assert(format[percent] == '%'); + + if (percent > 0) out << format.substr(0, percent); + + switch (format[percent+1]) { + case '%': out << '%'; break; + case 'l': printValue(Log.getArgument(arg++), true); break; + case 'r': printValue(Log.getArgument(arg++), false); break; + default: llvm_unreachable("unknown format character"); + } + + format = format.substr(percent+2); + } + + out << '\n'; +} + +void DiffConsumer::logd(const DiffLogBuilder &Log) { + header(); + + for (unsigned I = 0, E = Log.getNumLines(); I != E; ++I) { + indent(); + switch (Log.getLineKind(I)) { + case DC_match: + out << " "; + Log.getLeft(I)->print(dbgs()); dbgs() << '\n'; + //printValue(Log.getLeft(I), true); + break; + case DC_left: + out << "< "; + Log.getLeft(I)->print(dbgs()); dbgs() << '\n'; + //printValue(Log.getLeft(I), true); + break; + case DC_right: + out << "> "; + Log.getRight(I)->print(dbgs()); dbgs() << '\n'; + //printValue(Log.getRight(I), false); + break; + } + //out << "\n"; + } +} diff --git a/contrib/libs/llvm16/tools/llvm-diff/lib/DiffConsumer.h b/contrib/libs/llvm16/tools/llvm-diff/lib/DiffConsumer.h new file mode 100644 index 0000000000..08c3afcbe1 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/lib/DiffConsumer.h @@ -0,0 +1,91 @@ +//===-- DiffConsumer.h - Difference Consumer --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the interface to the LLVM difference Consumer +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DIFF_DIFFCONSUMER_H +#define LLVM_TOOLS_LLVM_DIFF_DIFFCONSUMER_H + +#include "DiffLog.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +class StringRef; + class Module; + class Value; + class Function; + + /// The interface for consumers of difference data. + class Consumer { + virtual void anchor(); + public: + /// Record that a local context has been entered. Left and + /// Right are IR "containers" of some sort which are being + /// considered for structural equivalence: global variables, + /// functions, blocks, instructions, etc. + virtual void enterContext(const Value *Left, const Value *Right) = 0; + + /// Record that a local context has been exited. + virtual void exitContext() = 0; + + /// Record a difference within the current context. + virtual void log(StringRef Text) = 0; + + /// Record a formatted difference within the current context. + virtual void logf(const LogBuilder &Log) = 0; + + /// Record a line-by-line instruction diff. + virtual void logd(const DiffLogBuilder &Log) = 0; + + protected: + virtual ~Consumer() {} + }; + + class DiffConsumer : public Consumer { + private: + struct DiffContext { + DiffContext(const Value *L, const Value *R) + : L(L), R(R), Differences(false), IsFunction(isa<Function>(L)) {} + const Value *L; + const Value *R; + bool Differences; + bool IsFunction; + DenseMap<const Value *, unsigned> LNumbering; + DenseMap<const Value *, unsigned> RNumbering; + }; + + raw_ostream &out; + SmallVector<DiffContext, 5> contexts; + bool Differences; + unsigned Indent; + + void printValue(const Value *V, bool isL); + void header(); + void indent(); + + public: + DiffConsumer() + : out(errs()), Differences(false), Indent(0) {} + + void reset(); + bool hadDifferences() const; + void enterContext(const Value *L, const Value *R) override; + void exitContext() override; + void log(StringRef text) override; + void logf(const LogBuilder &Log) override; + void logd(const DiffLogBuilder &Log) override; + }; +} + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-diff/lib/DiffLog.cpp b/contrib/libs/llvm16/tools/llvm-diff/lib/DiffLog.cpp new file mode 100644 index 0000000000..d31a345d25 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/lib/DiffLog.cpp @@ -0,0 +1,54 @@ +//===-- DiffLog.h - Difference Log Builder and accessories ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the interface to the LLVM difference log builder. +// +//===----------------------------------------------------------------------===// + +#include "DiffLog.h" +#include "DiffConsumer.h" +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +LogBuilder::~LogBuilder() { + if (consumer) + consumer->logf(*this); +} + +StringRef LogBuilder::getFormat() const { return Format; } + +unsigned LogBuilder::getNumArguments() const { return Arguments.size(); } +const Value *LogBuilder::getArgument(unsigned I) const { return Arguments[I]; } + +DiffLogBuilder::~DiffLogBuilder() { consumer.logd(*this); } + +void DiffLogBuilder::addMatch(const Instruction *L, const Instruction *R) { + Diff.push_back(DiffRecord(L, R)); +} +void DiffLogBuilder::addLeft(const Instruction *L) { + // HACK: VS 2010 has a bug in the stdlib that requires this. + Diff.push_back(DiffRecord(L, DiffRecord::second_type(nullptr))); +} +void DiffLogBuilder::addRight(const Instruction *R) { + // HACK: VS 2010 has a bug in the stdlib that requires this. + Diff.push_back(DiffRecord(DiffRecord::first_type(nullptr), R)); +} + +unsigned DiffLogBuilder::getNumLines() const { return Diff.size(); } + +DiffChange DiffLogBuilder::getLineKind(unsigned I) const { + return (Diff[I].first ? (Diff[I].second ? DC_match : DC_left) + : DC_right); +} +const Instruction *DiffLogBuilder::getLeft(unsigned I) const { + return Diff[I].first; +} +const Instruction *DiffLogBuilder::getRight(unsigned I) const { + return Diff[I].second; +} diff --git a/contrib/libs/llvm16/tools/llvm-diff/lib/DiffLog.h b/contrib/libs/llvm16/tools/llvm-diff/lib/DiffLog.h new file mode 100644 index 0000000000..d8b07b9711 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/lib/DiffLog.h @@ -0,0 +1,83 @@ +//===-- DiffLog.h - Difference Log Builder and accessories ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the interface to the LLVM difference log builder. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DIFF_DIFFLOG_H +#define LLVM_TOOLS_LLVM_DIFF_DIFFLOG_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { + class Instruction; + class Value; + class Consumer; + + /// Trichotomy assumption + enum DiffChange { DC_match, DC_left, DC_right }; + + /// A temporary-object class for building up log messages. + class LogBuilder { + Consumer *consumer; + + /// The use of a stored StringRef here is okay because + /// LogBuilder should be used only as a temporary, and as a + /// temporary it will be destructed before whatever temporary + /// might be initializing this format. + StringRef Format; + + SmallVector<const Value *, 4> Arguments; + + public: + LogBuilder(Consumer &c, StringRef Format) : consumer(&c), Format(Format) {} + LogBuilder(LogBuilder &&L) + : consumer(L.consumer), Format(L.Format), + Arguments(std::move(L.Arguments)) { + L.consumer = nullptr; + } + + LogBuilder &operator<<(const Value *V) { + Arguments.push_back(V); + return *this; + } + + ~LogBuilder(); + + StringRef getFormat() const; + unsigned getNumArguments() const; + const Value *getArgument(unsigned I) const; + }; + + /// A temporary-object class for building up diff messages. + class DiffLogBuilder { + typedef std::pair<const Instruction *, const Instruction *> DiffRecord; + SmallVector<DiffRecord, 20> Diff; + + Consumer &consumer; + + public: + DiffLogBuilder(Consumer &c) : consumer(c) {} + ~DiffLogBuilder(); + + void addMatch(const Instruction *L, const Instruction *R); + // HACK: VS 2010 has a bug in the stdlib that requires this. + void addLeft(const Instruction *L); + void addRight(const Instruction *R); + + unsigned getNumLines() const; + DiffChange getLineKind(unsigned I) const; + const Instruction *getLeft(unsigned I) const; + const Instruction *getRight(unsigned I) const; + }; + +} + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp b/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp new file mode 100644 index 0000000000..6573bf010c --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.cpp @@ -0,0 +1,1038 @@ +//===-- DifferenceEngine.cpp - Structural function/module comparison ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the implementation of the LLVM difference +// engine, which structurally compares global values within a module. +// +//===----------------------------------------------------------------------===// + +#include "DifferenceEngine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/type_traits.h" +#include <utility> + +using namespace llvm; + +namespace { + +/// A priority queue, implemented as a heap. +template <class T, class Sorter, unsigned InlineCapacity> +class PriorityQueue { + Sorter Precedes; + llvm::SmallVector<T, InlineCapacity> Storage; + +public: + PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {} + + /// Checks whether the heap is empty. + bool empty() const { return Storage.empty(); } + + /// Insert a new value on the heap. + void insert(const T &V) { + unsigned Index = Storage.size(); + Storage.push_back(V); + if (Index == 0) return; + + T *data = Storage.data(); + while (true) { + unsigned Target = (Index + 1) / 2 - 1; + if (!Precedes(data[Index], data[Target])) return; + std::swap(data[Index], data[Target]); + if (Target == 0) return; + Index = Target; + } + } + + /// Remove the minimum value in the heap. Only valid on a non-empty heap. + T remove_min() { + assert(!empty()); + T tmp = Storage[0]; + + unsigned NewSize = Storage.size() - 1; + if (NewSize) { + // Move the slot at the end to the beginning. + if (std::is_trivially_copyable<T>::value) + Storage[0] = Storage[NewSize]; + else + std::swap(Storage[0], Storage[NewSize]); + + // Bubble the root up as necessary. + unsigned Index = 0; + while (true) { + // With a 1-based index, the children would be Index*2 and Index*2+1. + unsigned R = (Index + 1) * 2; + unsigned L = R - 1; + + // If R is out of bounds, we're done after this in any case. + if (R >= NewSize) { + // If L is also out of bounds, we're done immediately. + if (L >= NewSize) break; + + // Otherwise, test whether we should swap L and Index. + if (Precedes(Storage[L], Storage[Index])) + std::swap(Storage[L], Storage[Index]); + break; + } + + // Otherwise, we need to compare with the smaller of L and R. + // Prefer R because it's closer to the end of the array. + unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R); + + // If Index is >= the min of L and R, then heap ordering is restored. + if (!Precedes(Storage[IndexToTest], Storage[Index])) + break; + + // Otherwise, keep bubbling up. + std::swap(Storage[IndexToTest], Storage[Index]); + Index = IndexToTest; + } + } + Storage.pop_back(); + + return tmp; + } +}; + +/// A function-scope difference engine. +class FunctionDifferenceEngine { + DifferenceEngine &Engine; + + // Some initializers may reference the variable we're currently checking. This + // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent + // recursing. + const Value *SavedLHS; + const Value *SavedRHS; + + // The current mapping from old local values to new local values. + DenseMap<const Value *, const Value *> Values; + + // The current mapping from old blocks to new blocks. + DenseMap<const BasicBlock *, const BasicBlock *> Blocks; + + // The tentative mapping from old local values while comparing a pair of + // basic blocks. Once the pair has been processed, the tentative mapping is + // committed to the Values map. + DenseSet<std::pair<const Value *, const Value *>> TentativeValues; + + // Equivalence Assumptions + // + // For basic blocks in loops, some values in phi nodes may depend on + // values from not yet processed basic blocks in the loop. When encountering + // such values, we optimistically asssume their equivalence and store this + // assumption in a BlockDiffCandidate for the pair of compared BBs. + // + // Once we have diffed all BBs, for every BlockDiffCandidate, we check all + // stored assumptions using the Values map that stores proven equivalences + // between the old and new values, and report a diff if an assumption cannot + // be proven to be true. + // + // Note that after having made an assumption, all further determined + // equivalences implicitly depend on that assumption. These will not be + // reverted or reported if the assumption proves to be false, because these + // are considered indirect diffs caused by earlier direct diffs. + // + // We aim to avoid false negatives in llvm-diff, that is, ensure that + // whenever no diff is reported, the functions are indeed equal. If + // assumptions were made, this is not entirely clear, because in principle we + // could end up with a circular proof where the proof of equivalence of two + // nodes is depending on the assumption of their equivalence. + // + // To see that assumptions do not add false negatives, note that if we do not + // report a diff, this means that there is an equivalence mapping between old + // and new values that is consistent with all assumptions made. The circular + // dependency that exists on an IR value level does not exist at run time, + // because the values selected by the phi nodes must always already have been + // computed. Hence, we can prove equivalence of the old and new functions by + // considering step-wise parallel execution, and incrementally proving + // equivalence of every new computed value. Another way to think about it is + // to imagine cloning the loop BBs for every iteration, turning the loops + // into (possibly infinite) DAGs, and proving equivalence by induction on the + // iteration, using the computed value mapping. + + // The class BlockDiffCandidate stores pairs which either have already been + // proven to differ, or pairs whose equivalence depends on assumptions to be + // verified later. + struct BlockDiffCandidate { + const BasicBlock *LBB; + const BasicBlock *RBB; + // Maps old values to assumed-to-be-equivalent new values + SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions; + // If set, we already know the blocks differ. + bool KnownToDiffer; + }; + + // List of block diff candidates in the order found by processing. + // We generate reports in this order. + // For every LBB, there may only be one corresponding RBB. + SmallVector<BlockDiffCandidate> BlockDiffCandidates; + // Maps LBB to the index of its BlockDiffCandidate, if existing. + DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices; + + // Note: Every LBB must always be queried together with the same RBB. + // The returned reference is not permanently valid and should not be stored. + BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB, + const BasicBlock *RBB) { + auto It = BlockDiffCandidateIndices.find(LBB); + // Check if LBB already has a diff candidate + if (It == BlockDiffCandidateIndices.end()) { + // Add new one + BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size(); + BlockDiffCandidates.push_back( + {LBB, RBB, SmallDenseMap<const Value *, const Value *>(), false}); + return BlockDiffCandidates.back(); + } + // Use existing one + BlockDiffCandidate &Result = BlockDiffCandidates[It->second]; + assert(Result.RBB == RBB && "Inconsistent basic block pairing!"); + return Result; + } + + // Optionally passed to equivalence checker functions, so these can add + // assumptions in BlockDiffCandidates. Its presence controls whether + // assumptions are generated. + struct AssumptionContext { + // The two basic blocks that need the two compared values to be equivalent. + const BasicBlock *LBB; + const BasicBlock *RBB; + }; + + unsigned getUnprocPredCount(const BasicBlock *Block) const { + unsigned Count = 0; + for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E; + ++I) + if (!Blocks.count(*I)) Count++; + return Count; + } + + typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair; + + /// A type which sorts a priority queue by the number of unprocessed + /// predecessor blocks it has remaining. + /// + /// This is actually really expensive to calculate. + struct QueueSorter { + const FunctionDifferenceEngine &fde; + explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {} + + bool operator()(BlockPair &Old, BlockPair &New) { + return fde.getUnprocPredCount(Old.first) + < fde.getUnprocPredCount(New.first); + } + }; + + /// A queue of unified blocks to process. + PriorityQueue<BlockPair, QueueSorter, 20> Queue; + + /// Try to unify the given two blocks. Enqueues them for processing + /// if they haven't already been processed. + /// + /// Returns true if there was a problem unifying them. + bool tryUnify(const BasicBlock *L, const BasicBlock *R) { + const BasicBlock *&Ref = Blocks[L]; + + if (Ref) { + if (Ref == R) return false; + + Engine.logf("successor %l cannot be equivalent to %r; " + "it's already equivalent to %r") + << L << R << Ref; + return true; + } + + Ref = R; + Queue.insert(BlockPair(L, R)); + return false; + } + + /// Unifies two instructions, given that they're known not to have + /// structural differences. + void unify(const Instruction *L, const Instruction *R) { + DifferenceEngine::Context C(Engine, L, R); + + bool Result = diff(L, R, true, true, true); + assert(!Result && "structural differences second time around?"); + (void) Result; + if (!L->use_empty()) + Values[L] = R; + } + + void processQueue() { + while (!Queue.empty()) { + BlockPair Pair = Queue.remove_min(); + diff(Pair.first, Pair.second); + } + } + + void checkAndReportDiffCandidates() { + for (BlockDiffCandidate &BDC : BlockDiffCandidates) { + + // Check assumptions + for (const auto &[L, R] : BDC.EquivalenceAssumptions) { + auto It = Values.find(L); + if (It == Values.end() || It->second != R) { + BDC.KnownToDiffer = true; + break; + } + } + + // Run block diff if the BBs differ + if (BDC.KnownToDiffer) { + DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB); + runBlockDiff(BDC.LBB->begin(), BDC.RBB->begin()); + } + } + } + + void diff(const BasicBlock *L, const BasicBlock *R) { + DifferenceEngine::Context C(Engine, L, R); + + BasicBlock::const_iterator LI = L->begin(), LE = L->end(); + BasicBlock::const_iterator RI = R->begin(); + + do { + assert(LI != LE && RI != R->end()); + const Instruction *LeftI = &*LI, *RightI = &*RI; + + // If the instructions differ, start the more sophisticated diff + // algorithm at the start of the block. + if (diff(LeftI, RightI, false, false, true)) { + TentativeValues.clear(); + // Register (L, R) as diffing pair. Note that we could directly emit a + // block diff here, but this way we ensure all diffs are emitted in one + // consistent order, independent of whether the diffs were detected + // immediately or via invalid assumptions. + getOrCreateBlockDiffCandidate(L, R).KnownToDiffer = true; + return; + } + + // Otherwise, tentatively unify them. + if (!LeftI->use_empty()) + TentativeValues.insert(std::make_pair(LeftI, RightI)); + + ++LI; + ++RI; + } while (LI != LE); // This is sufficient: we can't get equality of + // terminators if there are residual instructions. + + // Unify everything in the block, non-tentatively this time. + TentativeValues.clear(); + for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI) + unify(&*LI, &*RI); + } + + bool matchForBlockDiff(const Instruction *L, const Instruction *R); + void runBlockDiff(BasicBlock::const_iterator LI, + BasicBlock::const_iterator RI); + + bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) { + // FIXME: call attributes + AssumptionContext AC = {L.getParent(), R.getParent()}; + if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand(), + &AC)) { + if (Complain) Engine.log("called functions differ"); + return true; + } + if (L.arg_size() != R.arg_size()) { + if (Complain) Engine.log("argument counts differ"); + return true; + } + for (unsigned I = 0, E = L.arg_size(); I != E; ++I) + if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I), &AC)) { + if (Complain) + Engine.logf("arguments %l and %r differ") + << L.getArgOperand(I) << R.getArgOperand(I); + return true; + } + return false; + } + + // If AllowAssumptions is enabled, whenever we encounter a pair of values + // that we cannot prove to be equivalent, we assume equivalence and store that + // assumption to be checked later in BlockDiffCandidates. + bool diff(const Instruction *L, const Instruction *R, bool Complain, + bool TryUnify, bool AllowAssumptions) { + // FIXME: metadata (if Complain is set) + AssumptionContext ACValue = {L->getParent(), R->getParent()}; + // nullptr AssumptionContext disables assumption generation. + const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr; + + // Different opcodes always imply different operations. + if (L->getOpcode() != R->getOpcode()) { + if (Complain) Engine.log("different instruction types"); + return true; + } + + if (isa<CmpInst>(L)) { + if (cast<CmpInst>(L)->getPredicate() + != cast<CmpInst>(R)->getPredicate()) { + if (Complain) Engine.log("different predicates"); + return true; + } + } else if (isa<CallInst>(L)) { + return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain); + } else if (isa<PHINode>(L)) { + const PHINode &LI = cast<PHINode>(*L); + const PHINode &RI = cast<PHINode>(*R); + + // This is really weird; type uniquing is broken? + if (LI.getType() != RI.getType()) { + if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) { + if (Complain) Engine.log("different phi types"); + return true; + } + } + + if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) { + if (Complain) + Engine.log("PHI node # of incoming values differ"); + return true; + } + + for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) { + if (TryUnify) + tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I)); + + if (!equivalentAsOperands(LI.getIncomingValue(I), + RI.getIncomingValue(I), AC)) { + if (Complain) + Engine.log("PHI node incoming values differ"); + return true; + } + } + + return false; + + // Terminators. + } else if (isa<InvokeInst>(L)) { + const InvokeInst &LI = cast<InvokeInst>(*L); + const InvokeInst &RI = cast<InvokeInst>(*R); + if (diffCallSites(LI, RI, Complain)) + return true; + + if (TryUnify) { + tryUnify(LI.getNormalDest(), RI.getNormalDest()); + tryUnify(LI.getUnwindDest(), RI.getUnwindDest()); + } + return false; + + } else if (isa<CallBrInst>(L)) { + const CallBrInst &LI = cast<CallBrInst>(*L); + const CallBrInst &RI = cast<CallBrInst>(*R); + if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) { + if (Complain) + Engine.log("callbr # of indirect destinations differ"); + return true; + } + + // Perform the "try unify" step so that we can equate the indirect + // destinations before checking the call site. + for (unsigned I = 0; I < LI.getNumIndirectDests(); I++) + tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I)); + + if (diffCallSites(LI, RI, Complain)) + return true; + + if (TryUnify) + tryUnify(LI.getDefaultDest(), RI.getDefaultDest()); + return false; + + } else if (isa<BranchInst>(L)) { + const BranchInst *LI = cast<BranchInst>(L); + const BranchInst *RI = cast<BranchInst>(R); + if (LI->isConditional() != RI->isConditional()) { + if (Complain) Engine.log("branch conditionality differs"); + return true; + } + + if (LI->isConditional()) { + if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) { + if (Complain) Engine.log("branch conditions differ"); + return true; + } + if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1)); + } + if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0)); + return false; + + } else if (isa<IndirectBrInst>(L)) { + const IndirectBrInst *LI = cast<IndirectBrInst>(L); + const IndirectBrInst *RI = cast<IndirectBrInst>(R); + if (LI->getNumDestinations() != RI->getNumDestinations()) { + if (Complain) Engine.log("indirectbr # of destinations differ"); + return true; + } + + if (!equivalentAsOperands(LI->getAddress(), RI->getAddress(), AC)) { + if (Complain) Engine.log("indirectbr addresses differ"); + return true; + } + + if (TryUnify) { + for (unsigned i = 0; i < LI->getNumDestinations(); i++) { + tryUnify(LI->getDestination(i), RI->getDestination(i)); + } + } + return false; + + } else if (isa<SwitchInst>(L)) { + const SwitchInst *LI = cast<SwitchInst>(L); + const SwitchInst *RI = cast<SwitchInst>(R); + if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) { + if (Complain) Engine.log("switch conditions differ"); + return true; + } + if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest()); + + bool Difference = false; + + DenseMap<const ConstantInt *, const BasicBlock *> LCases; + for (auto Case : LI->cases()) + LCases[Case.getCaseValue()] = Case.getCaseSuccessor(); + + for (auto Case : RI->cases()) { + const ConstantInt *CaseValue = Case.getCaseValue(); + const BasicBlock *LCase = LCases[CaseValue]; + if (LCase) { + if (TryUnify) + tryUnify(LCase, Case.getCaseSuccessor()); + LCases.erase(CaseValue); + } else if (Complain || !Difference) { + if (Complain) + Engine.logf("right switch has extra case %r") << CaseValue; + Difference = true; + } + } + if (!Difference) + for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator + I = LCases.begin(), + E = LCases.end(); + I != E; ++I) { + if (Complain) + Engine.logf("left switch has extra case %l") << I->first; + Difference = true; + } + return Difference; + } else if (isa<UnreachableInst>(L)) { + return false; + } + + if (L->getNumOperands() != R->getNumOperands()) { + if (Complain) Engine.log("instructions have different operand counts"); + return true; + } + + for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { + Value *LO = L->getOperand(I), *RO = R->getOperand(I); + if (!equivalentAsOperands(LO, RO, AC)) { + if (Complain) Engine.logf("operands %l and %r differ") << LO << RO; + return true; + } + } + + return false; + } + +public: + bool equivalentAsOperands(const Constant *L, const Constant *R, + const AssumptionContext *AC) { + // Use equality as a preliminary filter. + if (L == R) + return true; + + if (L->getValueID() != R->getValueID()) + return false; + + // Ask the engine about global values. + if (isa<GlobalValue>(L)) + return Engine.equivalentAsOperands(cast<GlobalValue>(L), + cast<GlobalValue>(R)); + + // Compare constant expressions structurally. + if (isa<ConstantExpr>(L)) + return equivalentAsOperands(cast<ConstantExpr>(L), cast<ConstantExpr>(R), + AC); + + // Constants of the "same type" don't always actually have the same + // type; I don't know why. Just white-list them. + if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L)) + return true; + + // Block addresses only match if we've already encountered the + // block. FIXME: tentative matches? + if (isa<BlockAddress>(L)) + return Blocks[cast<BlockAddress>(L)->getBasicBlock()] + == cast<BlockAddress>(R)->getBasicBlock(); + + // If L and R are ConstantVectors, compare each element + if (isa<ConstantVector>(L)) { + const ConstantVector *CVL = cast<ConstantVector>(L); + const ConstantVector *CVR = cast<ConstantVector>(R); + if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements()) + return false; + for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) { + if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i), AC)) + return false; + } + return true; + } + + // If L and R are ConstantArrays, compare the element count and types. + if (isa<ConstantArray>(L)) { + const ConstantArray *CAL = cast<ConstantArray>(L); + const ConstantArray *CAR = cast<ConstantArray>(R); + // Sometimes a type may be equivalent, but not uniquified---e.g. it may + // contain a GEP instruction. Do a deeper comparison of the types. + if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements()) + return false; + + for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) { + if (!equivalentAsOperands(CAL->getAggregateElement(I), + CAR->getAggregateElement(I), AC)) + return false; + } + + return true; + } + + // If L and R are ConstantStructs, compare each field and type. + if (isa<ConstantStruct>(L)) { + const ConstantStruct *CSL = cast<ConstantStruct>(L); + const ConstantStruct *CSR = cast<ConstantStruct>(R); + + const StructType *LTy = cast<StructType>(CSL->getType()); + const StructType *RTy = cast<StructType>(CSR->getType()); + + // The StructTypes should have the same attributes. Don't use + // isLayoutIdentical(), because that just checks the element pointers, + // which may not work here. + if (LTy->getNumElements() != RTy->getNumElements() || + LTy->isPacked() != RTy->isPacked()) + return false; + + for (unsigned I = 0; I < LTy->getNumElements(); I++) { + const Value *LAgg = CSL->getAggregateElement(I); + const Value *RAgg = CSR->getAggregateElement(I); + + if (LAgg == SavedLHS || RAgg == SavedRHS) { + if (LAgg != SavedLHS || RAgg != SavedRHS) + // If the left and right operands aren't both re-analyzing the + // variable, then the initialiers don't match, so report "false". + // Otherwise, we skip these operands.. + return false; + + continue; + } + + if (!equivalentAsOperands(LAgg, RAgg, AC)) { + return false; + } + } + + return true; + } + + return false; + } + + bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R, + const AssumptionContext *AC) { + if (L == R) + return true; + + if (L->getOpcode() != R->getOpcode()) + return false; + + switch (L->getOpcode()) { + case Instruction::ICmp: + case Instruction::FCmp: + if (L->getPredicate() != R->getPredicate()) + return false; + break; + + case Instruction::GetElementPtr: + // FIXME: inbounds? + break; + + default: + break; + } + + if (L->getNumOperands() != R->getNumOperands()) + return false; + + for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { + const auto *LOp = L->getOperand(I); + const auto *ROp = R->getOperand(I); + + if (LOp == SavedLHS || ROp == SavedRHS) { + if (LOp != SavedLHS || ROp != SavedRHS) + // If the left and right operands aren't both re-analyzing the + // variable, then the initialiers don't match, so report "false". + // Otherwise, we skip these operands.. + return false; + + continue; + } + + if (!equivalentAsOperands(LOp, ROp, AC)) + return false; + } + + return true; + } + + // There are cases where we cannot determine whether two values are + // equivalent, because it depends on not yet processed basic blocks -- see the + // documentation on assumptions. + // + // AC is the context in which we are currently performing a diff. + // When we encounter a pair of values for which we can neither prove + // equivalence nor the opposite, we do the following: + // * If AC is nullptr, we treat the pair as non-equivalent. + // * If AC is set, we add an assumption for the basic blocks given by AC, + // and treat the pair as equivalent. The assumption is checked later. + bool equivalentAsOperands(const Value *L, const Value *R, + const AssumptionContext *AC) { + // Fall out if the values have different kind. + // This possibly shouldn't take priority over oracles. + if (L->getValueID() != R->getValueID()) + return false; + + // Value subtypes: Argument, Constant, Instruction, BasicBlock, + // InlineAsm, MDNode, MDString, PseudoSourceValue + + if (isa<Constant>(L)) + return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R), AC); + + if (isa<Instruction>(L)) { + auto It = Values.find(L); + if (It != Values.end()) + return It->second == R; + + if (TentativeValues.count(std::make_pair(L, R))) + return true; + + // L and R might be equivalent, this could depend on not yet processed + // basic blocks, so we cannot decide here. + if (AC) { + // Add an assumption, unless there is a conflict with an existing one + BlockDiffCandidate &BDC = + getOrCreateBlockDiffCandidate(AC->LBB, AC->RBB); + auto InsertionResult = BDC.EquivalenceAssumptions.insert({L, R}); + if (!InsertionResult.second && InsertionResult.first->second != R) { + // We already have a conflicting equivalence assumption for L, so at + // least one must be wrong, and we know that there is a diff. + BDC.KnownToDiffer = true; + BDC.EquivalenceAssumptions.clear(); + return false; + } + // Optimistically assume equivalence, and check later once all BBs + // have been processed. + return true; + } + + // Assumptions disabled, so pessimistically assume non-equivalence. + return false; + } + + if (isa<Argument>(L)) + return Values[L] == R; + + if (isa<BasicBlock>(L)) + return Blocks[cast<BasicBlock>(L)] != R; + + // Pretend everything else is identical. + return true; + } + + // Avoid a gcc warning about accessing 'this' in an initializer. + FunctionDifferenceEngine *this_() { return this; } + +public: + FunctionDifferenceEngine(DifferenceEngine &Engine, + const Value *SavedLHS = nullptr, + const Value *SavedRHS = nullptr) + : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS), + Queue(QueueSorter(*this_())) {} + + void diff(const Function *L, const Function *R) { + assert(Values.empty() && "Multiple diffs per engine are not supported!"); + + if (L->arg_size() != R->arg_size()) + Engine.log("different argument counts"); + + // Map the arguments. + for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(), + RI = R->arg_begin(), RE = R->arg_end(); + LI != LE && RI != RE; ++LI, ++RI) + Values[&*LI] = &*RI; + + tryUnify(&*L->begin(), &*R->begin()); + processQueue(); + checkAndReportDiffCandidates(); + } +}; + +struct DiffEntry { + DiffEntry() : Cost(0) {} + + unsigned Cost; + llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange +}; + +bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L, + const Instruction *R) { + return !diff(L, R, false, false, false); +} + +void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart, + BasicBlock::const_iterator RStart) { + BasicBlock::const_iterator LE = LStart->getParent()->end(); + BasicBlock::const_iterator RE = RStart->getParent()->end(); + + unsigned NL = std::distance(LStart, LE); + + SmallVector<DiffEntry, 20> Paths1(NL+1); + SmallVector<DiffEntry, 20> Paths2(NL+1); + + DiffEntry *Cur = Paths1.data(); + DiffEntry *Next = Paths2.data(); + + const unsigned LeftCost = 2; + const unsigned RightCost = 2; + const unsigned MatchCost = 0; + + assert(TentativeValues.empty()); + + // Initialize the first column. + for (unsigned I = 0; I != NL+1; ++I) { + Cur[I].Cost = I * LeftCost; + for (unsigned J = 0; J != I; ++J) + Cur[I].Path.push_back(DC_left); + } + + for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) { + // Initialize the first row. + Next[0] = Cur[0]; + Next[0].Cost += RightCost; + Next[0].Path.push_back(DC_right); + + unsigned Index = 1; + for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) { + if (matchForBlockDiff(&*LI, &*RI)) { + Next[Index] = Cur[Index-1]; + Next[Index].Cost += MatchCost; + Next[Index].Path.push_back(DC_match); + TentativeValues.insert(std::make_pair(&*LI, &*RI)); + } else if (Next[Index-1].Cost <= Cur[Index].Cost) { + Next[Index] = Next[Index-1]; + Next[Index].Cost += LeftCost; + Next[Index].Path.push_back(DC_left); + } else { + Next[Index] = Cur[Index]; + Next[Index].Cost += RightCost; + Next[Index].Path.push_back(DC_right); + } + } + + std::swap(Cur, Next); + } + + // We don't need the tentative values anymore; everything from here + // on out should be non-tentative. + TentativeValues.clear(); + + SmallVectorImpl<char> &Path = Cur[NL].Path; + BasicBlock::const_iterator LI = LStart, RI = RStart; + + DiffLogBuilder Diff(Engine.getConsumer()); + + // Drop trailing matches. + while (Path.size() && Path.back() == DC_match) + Path.pop_back(); + + // Skip leading matches. + SmallVectorImpl<char>::iterator + PI = Path.begin(), PE = Path.end(); + while (PI != PE && *PI == DC_match) { + unify(&*LI, &*RI); + ++PI; + ++LI; + ++RI; + } + + for (; PI != PE; ++PI) { + switch (static_cast<DiffChange>(*PI)) { + case DC_match: + assert(LI != LE && RI != RE); + { + const Instruction *L = &*LI, *R = &*RI; + unify(L, R); + Diff.addMatch(L, R); + } + ++LI; ++RI; + break; + + case DC_left: + assert(LI != LE); + Diff.addLeft(&*LI); + ++LI; + break; + + case DC_right: + assert(RI != RE); + Diff.addRight(&*RI); + ++RI; + break; + } + } + + // Finishing unifying and complaining about the tails of the block, + // which should be matches all the way through. + while (LI != LE) { + assert(RI != RE); + unify(&*LI, &*RI); + ++LI; + ++RI; + } + + // If the terminators have different kinds, but one is an invoke and the + // other is an unconditional branch immediately following a call, unify + // the results and the destinations. + const Instruction *LTerm = LStart->getParent()->getTerminator(); + const Instruction *RTerm = RStart->getParent()->getTerminator(); + if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) { + if (cast<BranchInst>(LTerm)->isConditional()) return; + BasicBlock::const_iterator I = LTerm->getIterator(); + if (I == LStart->getParent()->begin()) return; + --I; + if (!isa<CallInst>(*I)) return; + const CallInst *LCall = cast<CallInst>(&*I); + const InvokeInst *RInvoke = cast<InvokeInst>(RTerm); + if (!equivalentAsOperands(LCall->getCalledOperand(), + RInvoke->getCalledOperand(), nullptr)) + return; + if (!LCall->use_empty()) + Values[LCall] = RInvoke; + tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest()); + } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) { + if (cast<BranchInst>(RTerm)->isConditional()) return; + BasicBlock::const_iterator I = RTerm->getIterator(); + if (I == RStart->getParent()->begin()) return; + --I; + if (!isa<CallInst>(*I)) return; + const CallInst *RCall = cast<CallInst>(I); + const InvokeInst *LInvoke = cast<InvokeInst>(LTerm); + if (!equivalentAsOperands(LInvoke->getCalledOperand(), + RCall->getCalledOperand(), nullptr)) + return; + if (!LInvoke->use_empty()) + Values[LInvoke] = RCall; + tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0)); + } +} +} + +void DifferenceEngine::Oracle::anchor() { } + +void DifferenceEngine::diff(const Function *L, const Function *R) { + Context C(*this, L, R); + + // FIXME: types + // FIXME: attributes and CC + // FIXME: parameter attributes + + // If both are declarations, we're done. + if (L->empty() && R->empty()) + return; + else if (L->empty()) + log("left function is declaration, right function is definition"); + else if (R->empty()) + log("right function is declaration, left function is definition"); + else + FunctionDifferenceEngine(*this).diff(L, R); +} + +void DifferenceEngine::diff(const Module *L, const Module *R) { + StringSet<> LNames; + SmallVector<std::pair<const Function *, const Function *>, 20> Queue; + + unsigned LeftAnonCount = 0; + unsigned RightAnonCount = 0; + + for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) { + const Function *LFn = &*I; + StringRef Name = LFn->getName(); + if (Name.empty()) { + ++LeftAnonCount; + continue; + } + + LNames.insert(Name); + + if (Function *RFn = R->getFunction(LFn->getName())) + Queue.push_back(std::make_pair(LFn, RFn)); + else + logf("function %l exists only in left module") << LFn; + } + + for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) { + const Function *RFn = &*I; + StringRef Name = RFn->getName(); + if (Name.empty()) { + ++RightAnonCount; + continue; + } + + if (!LNames.count(Name)) + logf("function %r exists only in right module") << RFn; + } + + if (LeftAnonCount != 0 || RightAnonCount != 0) { + SmallString<32> Tmp; + logf(("not comparing " + Twine(LeftAnonCount) + + " anonymous functions in the left module and " + + Twine(RightAnonCount) + " in the right module") + .toStringRef(Tmp)); + } + + for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator + I = Queue.begin(), + E = Queue.end(); + I != E; ++I) + diff(I->first, I->second); +} + +bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L, + const GlobalValue *R) { + if (globalValueOracle) return (*globalValueOracle)(L, R); + + if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) { + const GlobalVariable *GVL = cast<GlobalVariable>(L); + const GlobalVariable *GVR = cast<GlobalVariable>(R); + if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() && + GVR->hasLocalLinkage() && GVR->hasUniqueInitializer()) + return FunctionDifferenceEngine(*this, GVL, GVR) + .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer(), + nullptr); + } + + return L->getName() == R->getName(); +} diff --git a/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.h b/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.h new file mode 100644 index 0000000000..436a355663 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/lib/DifferenceEngine.h @@ -0,0 +1,90 @@ +//===-- DifferenceEngine.h - Module comparator ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the interface to the LLVM difference engine, +// which structurally compares functions within a module. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DIFF_DIFFERENCEENGINE_H +#define LLVM_TOOLS_LLVM_DIFF_DIFFERENCEENGINE_H + +#include "DiffConsumer.h" +#include "DiffLog.h" +#include "llvm/ADT/StringRef.h" +#include <utility> + +namespace llvm { + class Function; + class GlobalValue; + class Instruction; + class LLVMContext; + class Module; + class Twine; + class Value; + + /// A class for performing structural comparisons of LLVM assembly. + class DifferenceEngine { + public: + /// A RAII object for recording the current context. + struct Context { + Context(DifferenceEngine &Engine, const Value *L, const Value *R) + : Engine(Engine) { + Engine.consumer.enterContext(L, R); + } + + ~Context() { + Engine.consumer.exitContext(); + } + + private: + DifferenceEngine &Engine; + }; + + /// An oracle for answering whether two values are equivalent as + /// operands. + class Oracle { + virtual void anchor(); + public: + virtual bool operator()(const Value *L, const Value *R) = 0; + + protected: + virtual ~Oracle() {} + }; + + DifferenceEngine(Consumer &consumer) + : consumer(consumer), globalValueOracle(nullptr) {} + + void diff(const Module *L, const Module *R); + void diff(const Function *L, const Function *R); + void log(StringRef text) { + consumer.log(text); + } + LogBuilder logf(StringRef text) { + return LogBuilder(consumer, text); + } + Consumer& getConsumer() const { return consumer; } + + /// Installs an oracle to decide whether two global values are + /// equivalent as operands. Without an oracle, global values are + /// considered equivalent as operands precisely when they have the + /// same name. + void setGlobalValueOracle(Oracle *oracle) { + globalValueOracle = oracle; + } + + /// Determines whether two global values are equivalent. + bool equivalentAsOperands(const GlobalValue *L, const GlobalValue *R); + + private: + Consumer &consumer; + Oracle *globalValueOracle; + }; +} + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-diff/lib/ya.make b/contrib/libs/llvm16/tools/llvm-diff/lib/ya.make new file mode 100644 index 0000000000..45a2dfc592 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/lib/ya.make @@ -0,0 +1,30 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm16 + contrib/libs/llvm16/include + contrib/libs/llvm16/lib/IR + contrib/libs/llvm16/lib/Support +) + +ADDINCL( + contrib/libs/llvm16/tools/llvm-diff/lib +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + DiffConsumer.cpp + DiffLog.cpp + DifferenceEngine.cpp +) + +END() diff --git a/contrib/libs/llvm16/tools/llvm-diff/llvm-diff.cpp b/contrib/libs/llvm16/tools/llvm-diff/llvm-diff.cpp new file mode 100644 index 0000000000..7349469c80 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/llvm-diff.cpp @@ -0,0 +1,96 @@ +//===-- llvm-diff.cpp - Module comparator command-line driver ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the command-line driver for the difference engine. +// +//===----------------------------------------------------------------------===// + +#include "lib/DiffLog.h" +#include "lib/DifferenceEngine.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/WithColor.h" +#include <string> +#include <utility> + + +using namespace llvm; + +/// Reads a module from a file. On error, messages are written to stderr +/// and null is returned. +static std::unique_ptr<Module> readModule(LLVMContext &Context, + StringRef Name) { + SMDiagnostic Diag; + std::unique_ptr<Module> M = parseIRFile(Name, Diag, Context); + if (!M) + Diag.print("llvm-diff", errs()); + return M; +} + +static void diffGlobal(DifferenceEngine &Engine, Module &L, Module &R, + StringRef Name) { + // Drop leading sigils from the global name. + if (Name.startswith("@")) Name = Name.substr(1); + + Function *LFn = L.getFunction(Name); + Function *RFn = R.getFunction(Name); + if (LFn && RFn) + Engine.diff(LFn, RFn); + else if (!LFn && !RFn) + errs() << "No function named @" << Name << " in either module\n"; + else if (!LFn) + errs() << "No function named @" << Name << " in left module\n"; + else + errs() << "No function named @" << Name << " in right module\n"; +} + +cl::OptionCategory DiffCategory("Diff Options"); + +static cl::opt<std::string> LeftFilename(cl::Positional, + cl::desc("<first file>"), cl::Required, + cl::cat(DiffCategory)); +static cl::opt<std::string> RightFilename(cl::Positional, + cl::desc("<second file>"), + cl::Required, cl::cat(DiffCategory)); +static cl::list<std::string> GlobalsToCompare(cl::Positional, + cl::desc("<globals to compare>"), + cl::cat(DiffCategory)); + +int main(int argc, char **argv) { + cl::HideUnrelatedOptions({&DiffCategory, &getColorCategory()}); + cl::ParseCommandLineOptions(argc, argv); + + LLVMContext Context; + + // Load both modules. Die if that fails. + std::unique_ptr<Module> LModule = readModule(Context, LeftFilename); + std::unique_ptr<Module> RModule = readModule(Context, RightFilename); + if (!LModule || !RModule) return 1; + + DiffConsumer Consumer; + DifferenceEngine Engine(Consumer); + + // If any global names were given, just diff those. + if (!GlobalsToCompare.empty()) { + for (unsigned I = 0, E = GlobalsToCompare.size(); I != E; ++I) + diffGlobal(Engine, *LModule, *RModule, GlobalsToCompare[I]); + + // Otherwise, diff everything in the module. + } else { + Engine.diff(LModule.get(), RModule.get()); + } + + return Consumer.hadDifferences(); +} diff --git a/contrib/libs/llvm16/tools/llvm-diff/ya.make b/contrib/libs/llvm16/tools/llvm-diff/ya.make new file mode 100644 index 0000000000..db858124e2 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-diff/ya.make @@ -0,0 +1,37 @@ +# Generated by devtools/yamaker. + +PROGRAM() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm16 + contrib/libs/llvm16/include + contrib/libs/llvm16/lib/AsmParser + contrib/libs/llvm16/lib/BinaryFormat + contrib/libs/llvm16/lib/Bitcode/Reader + contrib/libs/llvm16/lib/Bitstream/Reader + contrib/libs/llvm16/lib/Demangle + contrib/libs/llvm16/lib/IR + contrib/libs/llvm16/lib/IRReader + contrib/libs/llvm16/lib/Remarks + contrib/libs/llvm16/lib/Support + contrib/libs/llvm16/lib/TargetParser + contrib/libs/llvm16/tools/llvm-diff/lib +) + +ADDINCL( + contrib/libs/llvm16/tools/llvm-diff +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + llvm-diff.cpp +) + +END() |