diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Transforms/Scalar | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/Scalar')
68 files changed, 9642 insertions, 9642 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp index ce4e5e575f..5f605b8ad4 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp @@ -325,7 +325,7 @@ void AggressiveDeadCodeElimination::initialize() { bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) { // TODO -- use llvm::isInstructionTriviallyDead - if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) { + if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) { // Skip any value profile instrumentation calls if they are // instrumenting constants. if (isInstrumentsConstant(I)) @@ -643,7 +643,7 @@ void AggressiveDeadCodeElimination::computeReversePostOrder() { SmallPtrSet<BasicBlock*, 16> Visited; unsigned PostOrder = 0; for (auto &BB : F) { - if (!succ_empty(&BB)) + if (!succ_empty(&BB)) continue; for (BasicBlock *Block : inverse_post_order_ext(&BB,Visited)) BlockInfo[Block].PostOrder = PostOrder++; diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index bccf94fc21..f57ee657c2 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -15,7 +15,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instructions.h" +#include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #define AA_NAME "alignment-from-assumptions" #define DEBUG_TYPE AA_NAME @@ -204,33 +204,33 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV, } bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I, - unsigned Idx, + unsigned Idx, Value *&AAPtr, const SCEV *&AlignSCEV, const SCEV *&OffSCEV) { - Type *Int64Ty = Type::getInt64Ty(I->getContext()); - OperandBundleUse AlignOB = I->getOperandBundleAt(Idx); - if (AlignOB.getTagName() != "align") + Type *Int64Ty = Type::getInt64Ty(I->getContext()); + OperandBundleUse AlignOB = I->getOperandBundleAt(Idx); + if (AlignOB.getTagName() != "align") return false; - assert(AlignOB.Inputs.size() >= 2); - AAPtr = AlignOB.Inputs[0].get(); - // TODO: Consider accumulating the offset to the base. - AAPtr = AAPtr->stripPointerCastsSameRepresentation(); - AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get()); - AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty); - if (AlignOB.Inputs.size() == 3) - OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get()); - else + assert(AlignOB.Inputs.size() >= 2); + AAPtr = AlignOB.Inputs[0].get(); + // TODO: Consider accumulating the offset to the base. + AAPtr = AAPtr->stripPointerCastsSameRepresentation(); + AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get()); + AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty); + if (AlignOB.Inputs.size() == 3) + OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get()); + else OffSCEV = SE->getZero(Int64Ty); - OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty); + OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty); return true; } -bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall, - unsigned Idx) { +bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall, + unsigned Idx) { Value *AAPtr; const SCEV *AlignSCEV, *OffSCEV; - if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV)) + if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV)) return false; // Skip ConstantPointerNull and UndefValue. Assumptions on these shouldn't @@ -254,8 +254,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall, while (!WorkList.empty()) { Instruction *J = WorkList.pop_back_val(); if (LoadInst *LI = dyn_cast<LoadInst>(J)) { - if (!isValidAssumeForContext(ACall, J, DT)) - continue; + if (!isValidAssumeForContext(ACall, J, DT)) + continue; Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, LI->getPointerOperand(), SE); if (NewAlignment > LI->getAlign()) { @@ -263,8 +263,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall, ++NumLoadAlignChanged; } } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) { - if (!isValidAssumeForContext(ACall, J, DT)) - continue; + if (!isValidAssumeForContext(ACall, J, DT)) + continue; Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, SI->getPointerOperand(), SE); if (NewAlignment > SI->getAlign()) { @@ -272,8 +272,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall, ++NumStoreAlignChanged; } } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) { - if (!isValidAssumeForContext(ACall, J, DT)) - continue; + if (!isValidAssumeForContext(ACall, J, DT)) + continue; Align NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE); @@ -305,7 +305,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall, Visited.insert(J); for (User *UJ : J->users()) { Instruction *K = cast<Instruction>(UJ); - if (!Visited.count(K)) + if (!Visited.count(K)) WorkList.push_back(K); } } @@ -332,11 +332,11 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC, bool Changed = false; for (auto &AssumeVH : AC.assumptions()) - if (AssumeVH) { - CallInst *Call = cast<CallInst>(AssumeVH); - for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++) - Changed |= processAssumption(Call, Idx); - } + if (AssumeVH) { + CallInst *Call = cast<CallInst>(AssumeVH); + for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++) + Changed |= processAssumption(Call, Idx); + } return Changed; } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp index a02d88fe06..360c9b542e 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp @@ -1,90 +1,90 @@ -//===-- AnnotationRemarks.cpp - Generate remarks for annotated instrs. ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Generate remarks for instructions marked with !annotation. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar/AnnotationRemarks.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; -using namespace llvm::ore; - -#define DEBUG_TYPE "annotation-remarks" -#define REMARK_PASS DEBUG_TYPE - -static void runImpl(Function &F) { - if (!OptimizationRemarkEmitter::allowExtraAnalysis(F, REMARK_PASS)) - return; - - OptimizationRemarkEmitter ORE(&F); - // For now, just generate a summary of the annotated instructions. - MapVector<StringRef, unsigned> Mapping; - for (Instruction &I : instructions(F)) { - if (!I.hasMetadata(LLVMContext::MD_annotation)) - continue; - for (const MDOperand &Op : - I.getMetadata(LLVMContext::MD_annotation)->operands()) { - auto Iter = Mapping.insert({cast<MDString>(Op.get())->getString(), 0}); - Iter.first->second++; - } - } - - Instruction *IP = &*F.begin()->begin(); - for (const auto &KV : Mapping) - ORE.emit(OptimizationRemarkAnalysis(REMARK_PASS, "AnnotationSummary", IP) - << "Annotated " << NV("count", KV.second) << " instructions with " - << NV("type", KV.first)); -} - -namespace { - -struct AnnotationRemarksLegacy : public FunctionPass { - static char ID; - - AnnotationRemarksLegacy() : FunctionPass(ID) { - initializeAnnotationRemarksLegacyPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - runImpl(F); - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } -}; - -} // end anonymous namespace - -char AnnotationRemarksLegacy::ID = 0; - -INITIALIZE_PASS_BEGIN(AnnotationRemarksLegacy, "annotation-remarks", - "Annotation Remarks", false, false) -INITIALIZE_PASS_END(AnnotationRemarksLegacy, "annotation-remarks", - "Annotation Remarks", false, false) - -FunctionPass *llvm::createAnnotationRemarksLegacyPass() { - return new AnnotationRemarksLegacy(); -} - -PreservedAnalyses AnnotationRemarksPass::run(Function &F, - FunctionAnalysisManager &AM) { - runImpl(F); - return PreservedAnalyses::all(); -} +//===-- AnnotationRemarks.cpp - Generate remarks for annotated instrs. ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generate remarks for instructions marked with !annotation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/AnnotationRemarks.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; +using namespace llvm::ore; + +#define DEBUG_TYPE "annotation-remarks" +#define REMARK_PASS DEBUG_TYPE + +static void runImpl(Function &F) { + if (!OptimizationRemarkEmitter::allowExtraAnalysis(F, REMARK_PASS)) + return; + + OptimizationRemarkEmitter ORE(&F); + // For now, just generate a summary of the annotated instructions. + MapVector<StringRef, unsigned> Mapping; + for (Instruction &I : instructions(F)) { + if (!I.hasMetadata(LLVMContext::MD_annotation)) + continue; + for (const MDOperand &Op : + I.getMetadata(LLVMContext::MD_annotation)->operands()) { + auto Iter = Mapping.insert({cast<MDString>(Op.get())->getString(), 0}); + Iter.first->second++; + } + } + + Instruction *IP = &*F.begin()->begin(); + for (const auto &KV : Mapping) + ORE.emit(OptimizationRemarkAnalysis(REMARK_PASS, "AnnotationSummary", IP) + << "Annotated " << NV("count", KV.second) << " instructions with " + << NV("type", KV.first)); +} + +namespace { + +struct AnnotationRemarksLegacy : public FunctionPass { + static char ID; + + AnnotationRemarksLegacy() : FunctionPass(ID) { + initializeAnnotationRemarksLegacyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + runImpl(F); + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + +} // end anonymous namespace + +char AnnotationRemarksLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(AnnotationRemarksLegacy, "annotation-remarks", + "Annotation Remarks", false, false) +INITIALIZE_PASS_END(AnnotationRemarksLegacy, "annotation-remarks", + "Annotation Remarks", false, false) + +FunctionPass *llvm::createAnnotationRemarksLegacyPass() { + return new AnnotationRemarksLegacy(); +} + +PreservedAnalyses AnnotationRemarksPass::run(Function &F, + FunctionAnalysisManager &AM) { + runImpl(F); + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp index 2eb94b721d..a9558f3f16 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -208,7 +208,7 @@ static bool canSplitCallSite(CallBase &CB, TargetTransformInfo &TTI) { // instructions before the call is less then DuplicationThreshold. The // instructions before the call will be duplicated in the split blocks and // corresponding uses will be updated. - InstructionCost Cost = 0; + InstructionCost Cost = 0; for (auto &InstBeforeCall : llvm::make_range(CallSiteBB->begin(), CB.getIterator())) { Cost += TTI.getInstructionCost(&InstBeforeCall, diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp index fdab74fc94..29197218f2 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -366,9 +366,9 @@ void ConstantHoistingPass::collectConstantCandidates( ConstInt->getValue(), ConstInt->getType(), TargetTransformInfo::TCK_SizeAndLatency); else - Cost = TTI->getIntImmCostInst( - Inst->getOpcode(), Idx, ConstInt->getValue(), ConstInt->getType(), - TargetTransformInfo::TCK_SizeAndLatency, Inst); + Cost = TTI->getIntImmCostInst( + Inst->getOpcode(), Idx, ConstInt->getValue(), ConstInt->getType(), + TargetTransformInfo::TCK_SizeAndLatency, Inst); // Ignore cheap integer constants. if (Cost > TargetTransformInfo::TCC_Basic) { @@ -418,9 +418,9 @@ void ConstantHoistingPass::collectConstantCandidates( // usually lowered to a load from constant pool. Such operation is unlikely // to be cheaper than compute it by <Base + Offset>, which can be lowered to // an ADD instruction or folded into Load/Store instruction. - int Cost = - TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy, - TargetTransformInfo::TCK_SizeAndLatency, Inst); + int Cost = + TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy, + TargetTransformInfo::TCK_SizeAndLatency, Inst); ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; ConstCandMapType::iterator Itr; bool Inserted; @@ -951,7 +951,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, // base constant. if (!ConstIntCandVec.empty()) findBaseConstants(nullptr); - for (const auto &MapEntry : ConstGEPCandMap) + for (const auto &MapEntry : ConstGEPCandMap) if (!MapEntry.second.empty()) findBaseConstants(MapEntry.first); @@ -960,7 +960,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, bool MadeChange = false; if (!ConstIntInfoVec.empty()) MadeChange = emitBaseConstants(nullptr); - for (const auto &MapEntry : ConstGEPInfoMap) + for (const auto &MapEntry : ConstGEPInfoMap) if (!MapEntry.second.empty()) MadeChange |= emitBaseConstants(MapEntry.first); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp index 3b8af6f21c..e46462aa1f 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1,407 +1,407 @@ -//===-- ConstraintElimination.cpp - Eliminate conds using constraints. ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Eliminate conditions based on constraints collected from dominating -// conditions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar/ConstraintElimination.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ConstraintSystem.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/DebugCounter.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; -using namespace PatternMatch; - -#define DEBUG_TYPE "constraint-elimination" - -STATISTIC(NumCondsRemoved, "Number of instructions removed"); -DEBUG_COUNTER(EliminatedCounter, "conds-eliminated", - "Controls which conditions are eliminated"); - -static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max(); - -// Decomposes \p V into a vector of pairs of the form { c, X } where c * X. The -// sum of the pairs equals \p V. The first pair is the constant-factor and X -// must be nullptr. If the expression cannot be decomposed, returns an empty -// vector. -static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) { - if (auto *CI = dyn_cast<ConstantInt>(V)) { - if (CI->isNegative() || CI->uge(MaxConstraintValue)) - return {}; - return {{CI->getSExtValue(), nullptr}}; - } - auto *GEP = dyn_cast<GetElementPtrInst>(V); - if (GEP && GEP->getNumOperands() == 2) { - if (isa<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1))) { - return {{cast<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1)) - ->getSExtValue(), - nullptr}, - {1, GEP->getPointerOperand()}}; - } - Value *Op0; - ConstantInt *CI; - if (match(GEP->getOperand(GEP->getNumOperands() - 1), - m_NUWShl(m_Value(Op0), m_ConstantInt(CI)))) - return {{0, nullptr}, - {1, GEP->getPointerOperand()}, - {std::pow(int64_t(2), CI->getSExtValue()), Op0}}; - if (match(GEP->getOperand(GEP->getNumOperands() - 1), - m_ZExt(m_NUWShl(m_Value(Op0), m_ConstantInt(CI))))) - return {{0, nullptr}, - {1, GEP->getPointerOperand()}, - {std::pow(int64_t(2), CI->getSExtValue()), Op0}}; - - return {{0, nullptr}, - {1, GEP->getPointerOperand()}, - {1, GEP->getOperand(GEP->getNumOperands() - 1)}}; - } - - Value *Op0; - Value *Op1; - ConstantInt *CI; - if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI)))) - return {{CI->getSExtValue(), nullptr}, {1, Op0}}; - if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1)))) - return {{0, nullptr}, {1, Op0}, {1, Op1}}; - - if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI)))) - return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}}; - if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) - return {{0, nullptr}, {1, Op0}, {1, Op1}}; - - return {{0, nullptr}, {1, V}}; -} - -/// Turn a condition \p CmpI into a constraint vector, using indices from \p -/// Value2Index. If \p ShouldAdd is true, new indices are added for values not -/// yet in \p Value2Index. -static SmallVector<int64_t, 8> -getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, - DenseMap<Value *, unsigned> &Value2Index, bool ShouldAdd) { - int64_t Offset1 = 0; - int64_t Offset2 = 0; - - auto TryToGetIndex = [ShouldAdd, - &Value2Index](Value *V) -> Optional<unsigned> { - if (ShouldAdd) { - Value2Index.insert({V, Value2Index.size() + 1}); - return Value2Index[V]; - } - auto I = Value2Index.find(V); - if (I == Value2Index.end()) - return None; - return I->second; - }; - - if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE) - return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0, - Value2Index, ShouldAdd); - - // Only ULE and ULT predicates are supported at the moment. - if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT) - return {}; - - auto ADec = decompose(Op0); - auto BDec = decompose(Op1); - // Skip if decomposing either of the values failed. - if (ADec.empty() || BDec.empty()) - return {}; - - // Skip trivial constraints without any variables. - if (ADec.size() == 1 && BDec.size() == 1) - return {}; - - Offset1 = ADec[0].first; - Offset2 = BDec[0].first; - Offset1 *= -1; - - // Create iterator ranges that skip the constant-factor. - auto VariablesA = make_range(std::next(ADec.begin()), ADec.end()); - auto VariablesB = make_range(std::next(BDec.begin()), BDec.end()); - - // Check if each referenced value in the constraint is already in the system - // or can be added (if ShouldAdd is true). - for (const auto &KV : - concat<std::pair<int64_t, Value *>>(VariablesA, VariablesB)) - if (!TryToGetIndex(KV.second)) - return {}; - - // Build result constraint, by first adding all coefficients from A and then - // subtracting all coefficients from B. - SmallVector<int64_t, 8> R(Value2Index.size() + 1, 0); - for (const auto &KV : VariablesA) - R[Value2Index[KV.second]] += KV.first; - - for (const auto &KV : VariablesB) - R[Value2Index[KV.second]] -= KV.first; - - R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0); - return R; -} - -static SmallVector<int64_t, 8> -getConstraint(CmpInst *Cmp, DenseMap<Value *, unsigned> &Value2Index, - bool ShouldAdd) { - return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0), - Cmp->getOperand(1), Value2Index, ShouldAdd); -} - -namespace { -/// Represents either a condition that holds on entry to a block or a basic -/// block, with their respective Dominator DFS in and out numbers. -struct ConstraintOrBlock { - unsigned NumIn; - unsigned NumOut; - bool IsBlock; - bool Not; - union { - BasicBlock *BB; - CmpInst *Condition; - }; - - ConstraintOrBlock(DomTreeNode *DTN) - : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(true), - BB(DTN->getBlock()) {} - ConstraintOrBlock(DomTreeNode *DTN, CmpInst *Condition, bool Not) - : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(false), - Not(Not), Condition(Condition) {} -}; - -struct StackEntry { - unsigned NumIn; - unsigned NumOut; - CmpInst *Condition; - bool IsNot; - - StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot) - : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {} -}; -} // namespace - -static bool eliminateConstraints(Function &F, DominatorTree &DT) { - bool Changed = false; - DT.updateDFSNumbers(); - ConstraintSystem CS; - - SmallVector<ConstraintOrBlock, 64> WorkList; - - // First, collect conditions implied by branches and blocks with their - // Dominator DFS in and out numbers. - for (BasicBlock &BB : F) { - if (!DT.getNode(&BB)) - continue; - WorkList.emplace_back(DT.getNode(&BB)); - - auto *Br = dyn_cast<BranchInst>(BB.getTerminator()); - if (!Br || !Br->isConditional()) - continue; - - // If the condition is an OR of 2 compares and the false successor only has - // the current block as predecessor, queue both negated conditions for the - // false successor. - Value *Op0, *Op1; - if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) && - match(Op0, m_Cmp()) && match(Op1, m_Cmp())) { - BasicBlock *FalseSuccessor = Br->getSuccessor(1); - if (FalseSuccessor->getSinglePredecessor()) { - WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op0), - true); - WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op1), - true); - } - continue; - } - - // If the condition is an AND of 2 compares and the true successor only has - // the current block as predecessor, queue both conditions for the true - // successor. - if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) && - match(Op0, m_Cmp()) && match(Op1, m_Cmp())) { - BasicBlock *TrueSuccessor = Br->getSuccessor(0); - if (TrueSuccessor->getSinglePredecessor()) { - WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op0), - false); - WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op1), - false); - } - continue; - } - - auto *CmpI = dyn_cast<CmpInst>(Br->getCondition()); - if (!CmpI) - continue; - if (Br->getSuccessor(0)->getSinglePredecessor()) - WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false); - if (Br->getSuccessor(1)->getSinglePredecessor()) - WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true); - } - - // Next, sort worklist by dominance, so that dominating blocks and conditions - // come before blocks and conditions dominated by them. If a block and a - // condition have the same numbers, the condition comes before the block, as - // it holds on entry to the block. - sort(WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) { - return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock); - }); - - // Finally, process ordered worklist and eliminate implied conditions. - SmallVector<StackEntry, 16> DFSInStack; - DenseMap<Value *, unsigned> Value2Index; - for (ConstraintOrBlock &CB : WorkList) { - // First, pop entries from the stack that are out-of-scope for CB. Remove - // the corresponding entry from the constraint system. - while (!DFSInStack.empty()) { - auto &E = DFSInStack.back(); - LLVM_DEBUG(dbgs() << "Top of stack : " << E.NumIn << " " << E.NumOut - << "\n"); - LLVM_DEBUG(dbgs() << "CB: " << CB.NumIn << " " << CB.NumOut << "\n"); - assert(E.NumIn <= CB.NumIn); - if (CB.NumOut <= E.NumOut) - break; - LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot - << "\n"); - DFSInStack.pop_back(); - CS.popLastConstraint(); - } - - LLVM_DEBUG({ - dbgs() << "Processing "; - if (CB.IsBlock) - dbgs() << *CB.BB; - else - dbgs() << *CB.Condition; - dbgs() << "\n"; - }); - - // For a block, check if any CmpInsts become known based on the current set - // of constraints. - if (CB.IsBlock) { - for (Instruction &I : *CB.BB) { - auto *Cmp = dyn_cast<CmpInst>(&I); - if (!Cmp) - continue; - auto R = getConstraint(Cmp, Value2Index, false); - if (R.empty() || R.size() == 1) - continue; - if (CS.isConditionImplied(R)) { - if (!DebugCounter::shouldExecute(EliminatedCounter)) - continue; - - LLVM_DEBUG(dbgs() << "Condition " << *Cmp - << " implied by dominating constraints\n"); - LLVM_DEBUG({ - for (auto &E : reverse(DFSInStack)) - dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n"; - }); - Cmp->replaceAllUsesWith( - ConstantInt::getTrue(F.getParent()->getContext())); - NumCondsRemoved++; - Changed = true; - } - if (CS.isConditionImplied(ConstraintSystem::negate(R))) { - if (!DebugCounter::shouldExecute(EliminatedCounter)) - continue; - - LLVM_DEBUG(dbgs() << "Condition !" << *Cmp - << " implied by dominating constraints\n"); - LLVM_DEBUG({ - for (auto &E : reverse(DFSInStack)) - dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n"; - }); - Cmp->replaceAllUsesWith( - ConstantInt::getFalse(F.getParent()->getContext())); - NumCondsRemoved++; - Changed = true; - } - } - continue; - } - - // Otherwise, add the condition to the system and stack, if we can transform - // it into a constraint. - auto R = getConstraint(CB.Condition, Value2Index, true); - if (R.empty()) - continue; - - LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n"); - if (CB.Not) - R = ConstraintSystem::negate(R); - - // If R has been added to the system, queue it for removal once it goes - // out-of-scope. - if (CS.addVariableRowFill(R)) - DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not); - } - - return Changed; -} - -PreservedAnalyses ConstraintEliminationPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - if (!eliminateConstraints(F, DT)) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<GlobalsAA>(); - PA.preserveSet<CFGAnalyses>(); - return PA; -} - -namespace { - -class ConstraintElimination : public FunctionPass { -public: - static char ID; - - ConstraintElimination() : FunctionPass(ID) { - initializeConstraintEliminationPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - return eliminateConstraints(F, DT); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - } -}; - -} // end anonymous namespace - -char ConstraintElimination::ID = 0; - -INITIALIZE_PASS_BEGIN(ConstraintElimination, "constraint-elimination", - "Constraint Elimination", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) -INITIALIZE_PASS_END(ConstraintElimination, "constraint-elimination", - "Constraint Elimination", false, false) - -FunctionPass *llvm::createConstraintEliminationPass() { - return new ConstraintElimination(); -} +//===-- ConstraintElimination.cpp - Eliminate conds using constraints. ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Eliminate conditions based on constraints collected from dominating +// conditions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/ConstraintElimination.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstraintSystem.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "constraint-elimination" + +STATISTIC(NumCondsRemoved, "Number of instructions removed"); +DEBUG_COUNTER(EliminatedCounter, "conds-eliminated", + "Controls which conditions are eliminated"); + +static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max(); + +// Decomposes \p V into a vector of pairs of the form { c, X } where c * X. The +// sum of the pairs equals \p V. The first pair is the constant-factor and X +// must be nullptr. If the expression cannot be decomposed, returns an empty +// vector. +static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) { + if (auto *CI = dyn_cast<ConstantInt>(V)) { + if (CI->isNegative() || CI->uge(MaxConstraintValue)) + return {}; + return {{CI->getSExtValue(), nullptr}}; + } + auto *GEP = dyn_cast<GetElementPtrInst>(V); + if (GEP && GEP->getNumOperands() == 2) { + if (isa<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1))) { + return {{cast<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1)) + ->getSExtValue(), + nullptr}, + {1, GEP->getPointerOperand()}}; + } + Value *Op0; + ConstantInt *CI; + if (match(GEP->getOperand(GEP->getNumOperands() - 1), + m_NUWShl(m_Value(Op0), m_ConstantInt(CI)))) + return {{0, nullptr}, + {1, GEP->getPointerOperand()}, + {std::pow(int64_t(2), CI->getSExtValue()), Op0}}; + if (match(GEP->getOperand(GEP->getNumOperands() - 1), + m_ZExt(m_NUWShl(m_Value(Op0), m_ConstantInt(CI))))) + return {{0, nullptr}, + {1, GEP->getPointerOperand()}, + {std::pow(int64_t(2), CI->getSExtValue()), Op0}}; + + return {{0, nullptr}, + {1, GEP->getPointerOperand()}, + {1, GEP->getOperand(GEP->getNumOperands() - 1)}}; + } + + Value *Op0; + Value *Op1; + ConstantInt *CI; + if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI)))) + return {{CI->getSExtValue(), nullptr}, {1, Op0}}; + if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1)))) + return {{0, nullptr}, {1, Op0}, {1, Op1}}; + + if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI)))) + return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}}; + if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) + return {{0, nullptr}, {1, Op0}, {1, Op1}}; + + return {{0, nullptr}, {1, V}}; +} + +/// Turn a condition \p CmpI into a constraint vector, using indices from \p +/// Value2Index. If \p ShouldAdd is true, new indices are added for values not +/// yet in \p Value2Index. +static SmallVector<int64_t, 8> +getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, + DenseMap<Value *, unsigned> &Value2Index, bool ShouldAdd) { + int64_t Offset1 = 0; + int64_t Offset2 = 0; + + auto TryToGetIndex = [ShouldAdd, + &Value2Index](Value *V) -> Optional<unsigned> { + if (ShouldAdd) { + Value2Index.insert({V, Value2Index.size() + 1}); + return Value2Index[V]; + } + auto I = Value2Index.find(V); + if (I == Value2Index.end()) + return None; + return I->second; + }; + + if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE) + return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0, + Value2Index, ShouldAdd); + + // Only ULE and ULT predicates are supported at the moment. + if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT) + return {}; + + auto ADec = decompose(Op0); + auto BDec = decompose(Op1); + // Skip if decomposing either of the values failed. + if (ADec.empty() || BDec.empty()) + return {}; + + // Skip trivial constraints without any variables. + if (ADec.size() == 1 && BDec.size() == 1) + return {}; + + Offset1 = ADec[0].first; + Offset2 = BDec[0].first; + Offset1 *= -1; + + // Create iterator ranges that skip the constant-factor. + auto VariablesA = make_range(std::next(ADec.begin()), ADec.end()); + auto VariablesB = make_range(std::next(BDec.begin()), BDec.end()); + + // Check if each referenced value in the constraint is already in the system + // or can be added (if ShouldAdd is true). + for (const auto &KV : + concat<std::pair<int64_t, Value *>>(VariablesA, VariablesB)) + if (!TryToGetIndex(KV.second)) + return {}; + + // Build result constraint, by first adding all coefficients from A and then + // subtracting all coefficients from B. + SmallVector<int64_t, 8> R(Value2Index.size() + 1, 0); + for (const auto &KV : VariablesA) + R[Value2Index[KV.second]] += KV.first; + + for (const auto &KV : VariablesB) + R[Value2Index[KV.second]] -= KV.first; + + R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0); + return R; +} + +static SmallVector<int64_t, 8> +getConstraint(CmpInst *Cmp, DenseMap<Value *, unsigned> &Value2Index, + bool ShouldAdd) { + return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0), + Cmp->getOperand(1), Value2Index, ShouldAdd); +} + +namespace { +/// Represents either a condition that holds on entry to a block or a basic +/// block, with their respective Dominator DFS in and out numbers. +struct ConstraintOrBlock { + unsigned NumIn; + unsigned NumOut; + bool IsBlock; + bool Not; + union { + BasicBlock *BB; + CmpInst *Condition; + }; + + ConstraintOrBlock(DomTreeNode *DTN) + : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(true), + BB(DTN->getBlock()) {} + ConstraintOrBlock(DomTreeNode *DTN, CmpInst *Condition, bool Not) + : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(false), + Not(Not), Condition(Condition) {} +}; + +struct StackEntry { + unsigned NumIn; + unsigned NumOut; + CmpInst *Condition; + bool IsNot; + + StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot) + : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {} +}; +} // namespace + +static bool eliminateConstraints(Function &F, DominatorTree &DT) { + bool Changed = false; + DT.updateDFSNumbers(); + ConstraintSystem CS; + + SmallVector<ConstraintOrBlock, 64> WorkList; + + // First, collect conditions implied by branches and blocks with their + // Dominator DFS in and out numbers. + for (BasicBlock &BB : F) { + if (!DT.getNode(&BB)) + continue; + WorkList.emplace_back(DT.getNode(&BB)); + + auto *Br = dyn_cast<BranchInst>(BB.getTerminator()); + if (!Br || !Br->isConditional()) + continue; + + // If the condition is an OR of 2 compares and the false successor only has + // the current block as predecessor, queue both negated conditions for the + // false successor. + Value *Op0, *Op1; + if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) && + match(Op0, m_Cmp()) && match(Op1, m_Cmp())) { + BasicBlock *FalseSuccessor = Br->getSuccessor(1); + if (FalseSuccessor->getSinglePredecessor()) { + WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op0), + true); + WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op1), + true); + } + continue; + } + + // If the condition is an AND of 2 compares and the true successor only has + // the current block as predecessor, queue both conditions for the true + // successor. + if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) && + match(Op0, m_Cmp()) && match(Op1, m_Cmp())) { + BasicBlock *TrueSuccessor = Br->getSuccessor(0); + if (TrueSuccessor->getSinglePredecessor()) { + WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op0), + false); + WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op1), + false); + } + continue; + } + + auto *CmpI = dyn_cast<CmpInst>(Br->getCondition()); + if (!CmpI) + continue; + if (Br->getSuccessor(0)->getSinglePredecessor()) + WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false); + if (Br->getSuccessor(1)->getSinglePredecessor()) + WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true); + } + + // Next, sort worklist by dominance, so that dominating blocks and conditions + // come before blocks and conditions dominated by them. If a block and a + // condition have the same numbers, the condition comes before the block, as + // it holds on entry to the block. + sort(WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) { + return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock); + }); + + // Finally, process ordered worklist and eliminate implied conditions. + SmallVector<StackEntry, 16> DFSInStack; + DenseMap<Value *, unsigned> Value2Index; + for (ConstraintOrBlock &CB : WorkList) { + // First, pop entries from the stack that are out-of-scope for CB. Remove + // the corresponding entry from the constraint system. + while (!DFSInStack.empty()) { + auto &E = DFSInStack.back(); + LLVM_DEBUG(dbgs() << "Top of stack : " << E.NumIn << " " << E.NumOut + << "\n"); + LLVM_DEBUG(dbgs() << "CB: " << CB.NumIn << " " << CB.NumOut << "\n"); + assert(E.NumIn <= CB.NumIn); + if (CB.NumOut <= E.NumOut) + break; + LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot + << "\n"); + DFSInStack.pop_back(); + CS.popLastConstraint(); + } + + LLVM_DEBUG({ + dbgs() << "Processing "; + if (CB.IsBlock) + dbgs() << *CB.BB; + else + dbgs() << *CB.Condition; + dbgs() << "\n"; + }); + + // For a block, check if any CmpInsts become known based on the current set + // of constraints. + if (CB.IsBlock) { + for (Instruction &I : *CB.BB) { + auto *Cmp = dyn_cast<CmpInst>(&I); + if (!Cmp) + continue; + auto R = getConstraint(Cmp, Value2Index, false); + if (R.empty() || R.size() == 1) + continue; + if (CS.isConditionImplied(R)) { + if (!DebugCounter::shouldExecute(EliminatedCounter)) + continue; + + LLVM_DEBUG(dbgs() << "Condition " << *Cmp + << " implied by dominating constraints\n"); + LLVM_DEBUG({ + for (auto &E : reverse(DFSInStack)) + dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n"; + }); + Cmp->replaceAllUsesWith( + ConstantInt::getTrue(F.getParent()->getContext())); + NumCondsRemoved++; + Changed = true; + } + if (CS.isConditionImplied(ConstraintSystem::negate(R))) { + if (!DebugCounter::shouldExecute(EliminatedCounter)) + continue; + + LLVM_DEBUG(dbgs() << "Condition !" << *Cmp + << " implied by dominating constraints\n"); + LLVM_DEBUG({ + for (auto &E : reverse(DFSInStack)) + dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n"; + }); + Cmp->replaceAllUsesWith( + ConstantInt::getFalse(F.getParent()->getContext())); + NumCondsRemoved++; + Changed = true; + } + } + continue; + } + + // Otherwise, add the condition to the system and stack, if we can transform + // it into a constraint. + auto R = getConstraint(CB.Condition, Value2Index, true); + if (R.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n"); + if (CB.Not) + R = ConstraintSystem::negate(R); + + // If R has been added to the system, queue it for removal once it goes + // out-of-scope. + if (CS.addVariableRowFill(R)) + DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not); + } + + return Changed; +} + +PreservedAnalyses ConstraintEliminationPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + if (!eliminateConstraints(F, DT)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<GlobalsAA>(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +namespace { + +class ConstraintElimination : public FunctionPass { +public: + static char ID; + + ConstraintElimination() : FunctionPass(ID) { + initializeConstraintEliminationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + return eliminateConstraints(F, DT); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } +}; + +} // end anonymous namespace + +char ConstraintElimination::ID = 0; + +INITIALIZE_PASS_BEGIN(ConstraintElimination, "constraint-elimination", + "Constraint Elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) +INITIALIZE_PASS_END(ConstraintElimination, "constraint-elimination", + "Constraint Elimination", false, false) + +FunctionPass *llvm::createConstraintEliminationPass() { + return new ConstraintElimination(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index b671d68031..c6a0c3ee7d 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -58,11 +58,11 @@ STATISTIC(NumMemAccess, "Number of memory access targets propagated"); STATISTIC(NumCmps, "Number of comparisons propagated"); STATISTIC(NumReturns, "Number of return values propagated"); STATISTIC(NumDeadCases, "Number of switch cases removed"); -STATISTIC(NumSDivSRemsNarrowed, - "Number of sdivs/srems whose width was decreased"); +STATISTIC(NumSDivSRemsNarrowed, + "Number of sdivs/srems whose width was decreased"); STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); -STATISTIC(NumUDivURemsNarrowed, - "Number of udivs/urems whose width was decreased"); +STATISTIC(NumUDivURemsNarrowed, + "Number of udivs/urems whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumSExt, "Number of sext converted to zext"); @@ -129,7 +129,7 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { if (S->getType()->isVectorTy()) return false; if (isa<Constant>(S->getCondition())) return false; - Constant *C = LVI->getConstant(S->getCondition(), S); + Constant *C = LVI->getConstant(S->getCondition(), S); if (!C) return false; ConstantInt *CI = dyn_cast<ConstantInt>(C); @@ -286,7 +286,7 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) { if (isa<Constant>(Pointer)) return false; - Constant *C = LVI->getConstant(Pointer, I); + Constant *C = LVI->getConstant(Pointer, I); if (!C) return false; ++NumMemAccess; @@ -305,8 +305,8 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) { return false; LazyValueInfo::Tristate Result = - LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp, - /*UseBlockValue=*/true); + LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp, + /*UseBlockValue=*/true); if (Result == LazyValueInfo::Unknown) return false; @@ -342,9 +342,9 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { ConstantInt *Case = CI->getCaseValue(); - LazyValueInfo::Tristate State = - LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, - /* UseBlockValue */ true); + LazyValueInfo::Tristate State = + LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, + /* UseBlockValue */ true); if (State == LazyValueInfo::False) { // This case never fires - remove it. @@ -388,8 +388,8 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, // See if we can prove that the given binary op intrinsic will not overflow. static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) { - ConstantRange LRange = LVI->getConstantRange(BO->getLHS(), BO); - ConstantRange RRange = LVI->getConstantRange(BO->getRHS(), BO); + ConstantRange LRange = LVI->getConstantRange(BO->getLHS(), BO); + ConstantRange RRange = LVI->getConstantRange(BO->getRHS(), BO); ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion( BO->getBinaryOp(), RRange, BO->getNoWrapKind()); return NWRegion.contains(LRange); @@ -504,8 +504,8 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { } } - bool Changed = false; - + bool Changed = false; + // Deopt bundle operands are intended to capture state with minimal // perturbance of the code otherwise. If we can find a constant value for // any such operand and remove a use of the original value, that's @@ -520,16 +520,16 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { if (V->getType()->isVectorTy()) continue; if (isa<Constant>(V)) continue; - Constant *C = LVI->getConstant(V, &CB); + Constant *C = LVI->getConstant(V, &CB); if (!C) continue; U.set(C); - Changed = true; + Changed = true; } } - SmallVector<unsigned, 4> ArgNos; - unsigned ArgNo = 0; - + SmallVector<unsigned, 4> ArgNos; + unsigned ArgNo = 0; + for (Value *V : CB.args()) { PointerType *Type = dyn_cast<PointerType>(V->getType()); // Try to mark pointer typed parameters as non-null. We skip the @@ -547,7 +547,7 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { assert(ArgNo == CB.arg_size() && "sanity check"); if (ArgNos.empty()) - return Changed; + return Changed; AttributeList AS = CB.getAttributes(); LLVMContext &Ctx = CB.getContext(); @@ -558,79 +558,79 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { return true; } -static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { - Constant *Zero = ConstantInt::get(V->getType(), 0); - auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI); - return Result == LazyValueInfo::True; -} - -static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { - Constant *Zero = ConstantInt::get(V->getType(), 0); - auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI); - return Result == LazyValueInfo::True; -} - -enum class Domain { NonNegative, NonPositive, Unknown }; - -Domain getDomain(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { - if (isNonNegative(V, LVI, CxtI)) - return Domain::NonNegative; - if (isNonPositive(V, LVI, CxtI)) - return Domain::NonPositive; - return Domain::Unknown; -} - -/// Try to shrink a sdiv/srem's width down to the smallest power of two that's -/// sufficient to contain its operands. -static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { - assert(Instr->getOpcode() == Instruction::SDiv || - Instr->getOpcode() == Instruction::SRem); - if (Instr->getType()->isVectorTy()) - return false; - - // Find the smallest power of two bitwidth that's sufficient to hold Instr's - // operands. - unsigned OrigWidth = Instr->getType()->getIntegerBitWidth(); - - // What is the smallest bit width that can accomodate the entire value ranges - // of both of the operands? - std::array<Optional<ConstantRange>, 2> CRs; - unsigned MinSignedBits = 0; - for (auto I : zip(Instr->operands(), CRs)) { - std::get<1>(I) = LVI->getConstantRange(std::get<0>(I), Instr); - MinSignedBits = std::max(std::get<1>(I)->getMinSignedBits(), MinSignedBits); +static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + Constant *Zero = ConstantInt::get(V->getType(), 0); + auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI); + return Result == LazyValueInfo::True; +} + +static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + Constant *Zero = ConstantInt::get(V->getType(), 0); + auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI); + return Result == LazyValueInfo::True; +} + +enum class Domain { NonNegative, NonPositive, Unknown }; + +Domain getDomain(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + if (isNonNegative(V, LVI, CxtI)) + return Domain::NonNegative; + if (isNonPositive(V, LVI, CxtI)) + return Domain::NonPositive; + return Domain::Unknown; +} + +/// Try to shrink a sdiv/srem's width down to the smallest power of two that's +/// sufficient to contain its operands. +static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { + assert(Instr->getOpcode() == Instruction::SDiv || + Instr->getOpcode() == Instruction::SRem); + if (Instr->getType()->isVectorTy()) + return false; + + // Find the smallest power of two bitwidth that's sufficient to hold Instr's + // operands. + unsigned OrigWidth = Instr->getType()->getIntegerBitWidth(); + + // What is the smallest bit width that can accomodate the entire value ranges + // of both of the operands? + std::array<Optional<ConstantRange>, 2> CRs; + unsigned MinSignedBits = 0; + for (auto I : zip(Instr->operands(), CRs)) { + std::get<1>(I) = LVI->getConstantRange(std::get<0>(I), Instr); + MinSignedBits = std::max(std::get<1>(I)->getMinSignedBits(), MinSignedBits); } - - // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can - // prove that such a combination is impossible, we need to bump the bitwidth. - if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) && - CRs[0]->contains( - APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth))) - ++MinSignedBits; - - // Don't shrink below 8 bits wide. - unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8); - - // NewWidth might be greater than OrigWidth if OrigWidth is not a power of - // two. - if (NewWidth >= OrigWidth) - return false; - - ++NumSDivSRemsNarrowed; - IRBuilder<> B{Instr}; - auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); - auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, - Instr->getName() + ".lhs.trunc"); - auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, - Instr->getName() + ".rhs.trunc"); - auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); - auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext"); - if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) - if (BinOp->getOpcode() == Instruction::SDiv) - BinOp->setIsExact(Instr->isExact()); - - Instr->replaceAllUsesWith(Sext); - Instr->eraseFromParent(); + + // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can + // prove that such a combination is impossible, we need to bump the bitwidth. + if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) && + CRs[0]->contains( + APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth))) + ++MinSignedBits; + + // Don't shrink below 8 bits wide. + unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8); + + // NewWidth might be greater than OrigWidth if OrigWidth is not a power of + // two. + if (NewWidth >= OrigWidth) + return false; + + ++NumSDivSRemsNarrowed; + IRBuilder<> B{Instr}; + auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); + auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, + Instr->getName() + ".lhs.trunc"); + auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, + Instr->getName() + ".rhs.trunc"); + auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); + auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext"); + if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) + if (BinOp->getOpcode() == Instruction::SDiv) + BinOp->setIsExact(Instr->isExact()); + + Instr->replaceAllUsesWith(Sext); + Instr->eraseFromParent(); return true; } @@ -644,23 +644,23 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { // Find the smallest power of two bitwidth that's sufficient to hold Instr's // operands. - - // What is the smallest bit width that can accomodate the entire value ranges - // of both of the operands? - unsigned MaxActiveBits = 0; + + // What is the smallest bit width that can accomodate the entire value ranges + // of both of the operands? + unsigned MaxActiveBits = 0; for (Value *Operand : Instr->operands()) { - ConstantRange CR = LVI->getConstantRange(Operand, Instr); - MaxActiveBits = std::max(CR.getActiveBits(), MaxActiveBits); + ConstantRange CR = LVI->getConstantRange(Operand, Instr); + MaxActiveBits = std::max(CR.getActiveBits(), MaxActiveBits); } // Don't shrink below 8 bits wide. - unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8); - + unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8); + // NewWidth might be greater than OrigWidth if OrigWidth is not a power of // two. - if (NewWidth >= Instr->getType()->getIntegerBitWidth()) + if (NewWidth >= Instr->getType()->getIntegerBitWidth()) return false; - ++NumUDivURemsNarrowed; + ++NumUDivURemsNarrowed; IRBuilder<> B{Instr}; auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, @@ -679,135 +679,135 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { } static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { - assert(SDI->getOpcode() == Instruction::SRem); - if (SDI->getType()->isVectorTy()) + assert(SDI->getOpcode() == Instruction::SRem); + if (SDI->getType()->isVectorTy()) return false; - struct Operand { - Value *V; - Domain D; - }; - std::array<Operand, 2> Ops; - - for (const auto I : zip(Ops, SDI->operands())) { - Operand &Op = std::get<0>(I); - Op.V = std::get<1>(I); - Op.D = getDomain(Op.V, LVI, SDI); - if (Op.D == Domain::Unknown) - return false; - } - - // We know domains of both of the operands! + struct Operand { + Value *V; + Domain D; + }; + std::array<Operand, 2> Ops; + + for (const auto I : zip(Ops, SDI->operands())) { + Operand &Op = std::get<0>(I); + Op.V = std::get<1>(I); + Op.D = getDomain(Op.V, LVI, SDI); + if (Op.D == Domain::Unknown) + return false; + } + + // We know domains of both of the operands! ++NumSRems; - - // We need operands to be non-negative, so negate each one that isn't. - for (Operand &Op : Ops) { - if (Op.D == Domain::NonNegative) - continue; - auto *BO = - BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); - BO->setDebugLoc(SDI->getDebugLoc()); - Op.V = BO; - } - - auto *URem = - BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI); - URem->setDebugLoc(SDI->getDebugLoc()); - - Value *Res = URem; - - // If the divident was non-positive, we need to negate the result. - if (Ops[0].D == Domain::NonPositive) - Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); - - SDI->replaceAllUsesWith(Res); + + // We need operands to be non-negative, so negate each one that isn't. + for (Operand &Op : Ops) { + if (Op.D == Domain::NonNegative) + continue; + auto *BO = + BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); + BO->setDebugLoc(SDI->getDebugLoc()); + Op.V = BO; + } + + auto *URem = + BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI); + URem->setDebugLoc(SDI->getDebugLoc()); + + Value *Res = URem; + + // If the divident was non-positive, we need to negate the result. + if (Ops[0].D == Domain::NonPositive) + Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + + SDI->replaceAllUsesWith(Res); SDI->eraseFromParent(); - // Try to simplify our new urem. - processUDivOrURem(URem, LVI); + // Try to simplify our new urem. + processUDivOrURem(URem, LVI); return true; } /// See if LazyValueInfo's ability to exploit edge conditions or range -/// information is sufficient to prove the signs of both operands of this SDiv. -/// If this is the case, replace the SDiv with a UDiv. Even for local +/// information is sufficient to prove the signs of both operands of this SDiv. +/// If this is the case, replace the SDiv with a UDiv. Even for local /// conditions, this can sometimes prove conditions instcombine can't by /// exploiting range information. static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) { - assert(SDI->getOpcode() == Instruction::SDiv); - if (SDI->getType()->isVectorTy()) + assert(SDI->getOpcode() == Instruction::SDiv); + if (SDI->getType()->isVectorTy()) return false; - struct Operand { - Value *V; - Domain D; - }; - std::array<Operand, 2> Ops; - - for (const auto I : zip(Ops, SDI->operands())) { - Operand &Op = std::get<0>(I); - Op.V = std::get<1>(I); - Op.D = getDomain(Op.V, LVI, SDI); - if (Op.D == Domain::Unknown) - return false; - } - - // We know domains of both of the operands! + struct Operand { + Value *V; + Domain D; + }; + std::array<Operand, 2> Ops; + + for (const auto I : zip(Ops, SDI->operands())) { + Operand &Op = std::get<0>(I); + Op.V = std::get<1>(I); + Op.D = getDomain(Op.V, LVI, SDI); + if (Op.D == Domain::Unknown) + return false; + } + + // We know domains of both of the operands! ++NumSDivs; - - // We need operands to be non-negative, so negate each one that isn't. - for (Operand &Op : Ops) { - if (Op.D == Domain::NonNegative) - continue; - auto *BO = - BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); - BO->setDebugLoc(SDI->getDebugLoc()); - Op.V = BO; - } - - auto *UDiv = - BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI); - UDiv->setDebugLoc(SDI->getDebugLoc()); - UDiv->setIsExact(SDI->isExact()); - - Value *Res = UDiv; - - // If the operands had two different domains, we need to negate the result. - if (Ops[0].D != Ops[1].D) - Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); - - SDI->replaceAllUsesWith(Res); + + // We need operands to be non-negative, so negate each one that isn't. + for (Operand &Op : Ops) { + if (Op.D == Domain::NonNegative) + continue; + auto *BO = + BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); + BO->setDebugLoc(SDI->getDebugLoc()); + Op.V = BO; + } + + auto *UDiv = + BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI); + UDiv->setDebugLoc(SDI->getDebugLoc()); + UDiv->setIsExact(SDI->isExact()); + + Value *Res = UDiv; + + // If the operands had two different domains, we need to negate the result. + if (Ops[0].D != Ops[1].D) + Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + + SDI->replaceAllUsesWith(Res); SDI->eraseFromParent(); // Try to simplify our new udiv. - processUDivOrURem(UDiv, LVI); + processUDivOrURem(UDiv, LVI); return true; } -static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { - assert(Instr->getOpcode() == Instruction::SDiv || - Instr->getOpcode() == Instruction::SRem); - if (Instr->getType()->isVectorTy()) - return false; - - if (Instr->getOpcode() == Instruction::SDiv) - if (processSDiv(Instr, LVI)) - return true; - - if (Instr->getOpcode() == Instruction::SRem) - if (processSRem(Instr, LVI)) - return true; - - return narrowSDivOrSRem(Instr, LVI); -} - +static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { + assert(Instr->getOpcode() == Instruction::SDiv || + Instr->getOpcode() == Instruction::SRem); + if (Instr->getType()->isVectorTy()) + return false; + + if (Instr->getOpcode() == Instruction::SDiv) + if (processSDiv(Instr, LVI)) + return true; + + if (Instr->getOpcode() == Instruction::SRem) + if (processSRem(Instr, LVI)) + return true; + + return narrowSDivOrSRem(Instr, LVI); +} + static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { if (SDI->getType()->isVectorTy()) return false; - if (!isNonNegative(SDI->getOperand(0), LVI, SDI)) + if (!isNonNegative(SDI->getOperand(0), LVI, SDI)) return false; ++NumAShrs; @@ -827,7 +827,7 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { Value *Base = SDI->getOperand(0); - if (!isNonNegative(Base, LVI, SDI)) + if (!isNonNegative(Base, LVI, SDI)) return false; ++NumSExt; @@ -858,8 +858,8 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { Value *LHS = BinOp->getOperand(0); Value *RHS = BinOp->getOperand(1); - ConstantRange LRange = LVI->getConstantRange(LHS, BinOp); - ConstantRange RRange = LVI->getConstantRange(RHS, BinOp); + ConstantRange LRange = LVI->getConstantRange(LHS, BinOp); + ConstantRange RRange = LVI->getConstantRange(RHS, BinOp); bool Changed = false; bool NewNUW = false, NewNSW = false; @@ -895,7 +895,7 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { // We can only replace the AND with LHS based on range info if the range does // not include undef. ConstantRange LRange = - LVI->getConstantRange(LHS, BinOp, /*UndefAllowed=*/false); + LVI->getConstantRange(LHS, BinOp, /*UndefAllowed=*/false); if (!LRange.getUnsignedMax().ule(RHS->getValue())) return false; @@ -907,7 +907,7 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { - if (Constant *C = LVI->getConstant(V, At)) + if (Constant *C = LVI->getConstant(V, At)) return C; // TODO: The following really should be sunk inside LVI's core algorithm, or @@ -962,7 +962,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, break; case Instruction::SRem: case Instruction::SDiv: - BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI); + BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI); break; case Instruction::UDiv: case Instruction::URem: @@ -1031,18 +1031,18 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F)); PreservedAnalyses PA; - if (!Changed) { - PA = PreservedAnalyses::all(); - } else { - PA.preserve<GlobalsAA>(); - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LazyValueAnalysis>(); - } - - // Keeping LVI alive is expensive, both because it uses a lot of memory, and - // because invalidating values in LVI is expensive. While CVP does preserve - // LVI, we know that passes after JumpThreading+CVP will not need the result - // of this analysis, so we forcefully discard it early. - PA.abandon<LazyValueAnalysis>(); + if (!Changed) { + PA = PreservedAnalyses::all(); + } else { + PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LazyValueAnalysis>(); + } + + // Keeping LVI alive is expensive, both because it uses a lot of memory, and + // because invalidating values in LVI is expensive. While CVP does preserve + // LVI, we know that passes after JumpThreading+CVP will not need the result + // of this analysis, so we forcefully discard it early. + PA.abandon<LazyValueAnalysis>(); return PA; } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp index d55adf7c2d..5826d9dc96 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp @@ -69,18 +69,18 @@ Pass *llvm::createRedundantDbgInstEliminationPass() { return new RedundantDbgInstElimination(); } -PreservedAnalyses -RedundantDbgInstEliminationPass::run(Function &F, FunctionAnalysisManager &AM) { - bool Changed = false; - for (auto &BB : F) - Changed |= RemoveRedundantDbgInstrs(&BB); - if (!Changed) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; -} - +PreservedAnalyses +RedundantDbgInstEliminationPass::run(Function &F, FunctionAnalysisManager &AM) { + bool Changed = false; + for (auto &BB : F) + Changed |= RemoveRedundantDbgInstrs(&BB); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + //===--------------------------------------------------------------------===// // DeadCodeElimination pass implementation // @@ -143,7 +143,7 @@ static bool eliminateDeadCode(Function &F, TargetLibraryInfo *TLI) { } PreservedAnalyses DCEPass::run(Function &F, FunctionAnalysisManager &AM) { - if (!eliminateDeadCode(F, &AM.getResult<TargetLibraryAnalysis>(F))) + if (!eliminateDeadCode(F, &AM.getResult<TargetLibraryAnalysis>(F))) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -162,14 +162,14 @@ struct DCELegacyPass : public FunctionPass { if (skipFunction(F)) return false; - TargetLibraryInfo *TLI = - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); return eliminateDeadCode(F, TLI); } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.setPreservesCFG(); } }; diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp index 2979225c60..e57b1d974b 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -87,10 +87,10 @@ STATISTIC(NumModifiedStores, "Number of stores modified"); STATISTIC(NumCFGChecks, "Number of stores modified"); STATISTIC(NumCFGTries, "Number of stores modified"); STATISTIC(NumCFGSuccess, "Number of stores modified"); -STATISTIC(NumGetDomMemoryDefPassed, - "Number of times a valid candidate is returned from getDomMemoryDef"); -STATISTIC(NumDomMemDefChecks, - "Number iterations check for reads in getDomMemoryDef"); +STATISTIC(NumGetDomMemoryDefPassed, + "Number of times a valid candidate is returned from getDomMemoryDef"); +STATISTIC(NumDomMemDefChecks, + "Number iterations check for reads in getDomMemoryDef"); DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated."); @@ -106,42 +106,42 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::desc("Enable partial store merging in DSE")); static cl::opt<bool> - EnableMemorySSA("enable-dse-memoryssa", cl::init(true), cl::Hidden, + EnableMemorySSA("enable-dse-memoryssa", cl::init(true), cl::Hidden, cl::desc("Use the new MemorySSA-backed DSE.")); static cl::opt<unsigned> - MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, + MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 100)")); -static cl::opt<unsigned> MemorySSAUpwardsStepLimit( - "dse-memoryssa-walklimit", cl::init(90), cl::Hidden, - cl::desc("The maximum number of steps while walking upwards to find " - "MemoryDefs that may be killed (default = 90)")); - -static cl::opt<unsigned> MemorySSAPartialStoreLimit( - "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, - cl::desc("The maximum number candidates that only partially overwrite the " - "killing MemoryDef to consider" - " (default = 5)")); - +static cl::opt<unsigned> MemorySSAUpwardsStepLimit( + "dse-memoryssa-walklimit", cl::init(90), cl::Hidden, + cl::desc("The maximum number of steps while walking upwards to find " + "MemoryDefs that may be killed (default = 90)")); + +static cl::opt<unsigned> MemorySSAPartialStoreLimit( + "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, + cl::desc("The maximum number candidates that only partially overwrite the " + "killing MemoryDef to consider" + " (default = 5)")); + static cl::opt<unsigned> MemorySSADefsPerBlockLimit( "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)")); -static cl::opt<unsigned> MemorySSASameBBStepCost( - "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, - cl::desc( - "The cost of a step in the same basic block as the killing MemoryDef" - "(default = 1)")); - -static cl::opt<unsigned> - MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), - cl::Hidden, - cl::desc("The cost of a step in a different basic " - "block than the killing MemoryDef" - "(default = 5)")); - +static cl::opt<unsigned> MemorySSASameBBStepCost( + "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, + cl::desc( + "The cost of a step in the same basic block as the killing MemoryDef" + "(default = 1)")); + +static cl::opt<unsigned> + MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), + cl::Hidden, + cl::desc("The cost of a step in a different basic " + "block than the killing MemoryDef" + "(default = 5)")); + static cl::opt<unsigned> MemorySSAPathCheckLimit( "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " @@ -229,13 +229,13 @@ static bool hasAnalyzableMemoryWrite(Instruction *I, case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: - case Intrinsic::memcpy_inline: + case Intrinsic::memcpy_inline: case Intrinsic::memcpy_element_unordered_atomic: case Intrinsic::memmove_element_unordered_atomic: case Intrinsic::memset_element_unordered_atomic: case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: - case Intrinsic::masked_store: + case Intrinsic::masked_store: return true; } } @@ -259,23 +259,23 @@ static bool hasAnalyzableMemoryWrite(Instruction *I, /// Return a Location stored to by the specified instruction. If isRemovable /// returns true, this function and getLocForRead completely describe the memory /// operations for this instruction. -static MemoryLocation getLocForWrite(Instruction *Inst, - const TargetLibraryInfo &TLI) { +static MemoryLocation getLocForWrite(Instruction *Inst, + const TargetLibraryInfo &TLI) { if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return MemoryLocation::get(SI); - // memcpy/memmove/memset. - if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) - return MemoryLocation::getForDest(MI); + // memcpy/memmove/memset. + if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) + return MemoryLocation::getForDest(MI); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { switch (II->getIntrinsicID()) { default: return MemoryLocation(); // Unhandled intrinsic. case Intrinsic::init_trampoline: - return MemoryLocation::getAfter(II->getArgOperand(0)); - case Intrinsic::masked_store: - return MemoryLocation::getForArgument(II, 1, TLI); + return MemoryLocation::getAfter(II->getArgOperand(0)); + case Intrinsic::masked_store: + return MemoryLocation::getForArgument(II, 1, TLI); case Intrinsic::lifetime_end: { uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); return MemoryLocation(II->getArgOperand(1), Len); @@ -285,7 +285,7 @@ static MemoryLocation getLocForWrite(Instruction *Inst, if (auto *CB = dyn_cast<CallBase>(Inst)) // All the supported TLI functions so far happen to have dest as their // first argument. - return MemoryLocation::getAfter(CB->getArgOperand(0)); + return MemoryLocation::getAfter(CB->getArgOperand(0)); return MemoryLocation(); } @@ -322,13 +322,13 @@ static bool isRemovable(Instruction *I) { case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: - case Intrinsic::memcpy_inline: + case Intrinsic::memcpy_inline: // Don't remove volatile memory intrinsics. return !cast<MemIntrinsic>(II)->isVolatile(); case Intrinsic::memcpy_element_unordered_atomic: case Intrinsic::memmove_element_unordered_atomic: case Intrinsic::memset_element_unordered_atomic: - case Intrinsic::masked_store: + case Intrinsic::masked_store: return true; } } @@ -374,10 +374,10 @@ static bool isShortenableAtTheBeginning(Instruction *I) { } /// Return the pointer that is being written to. -static Value *getStoredPointerOperand(Instruction *I, - const TargetLibraryInfo &TLI) { +static Value *getStoredPointerOperand(Instruction *I, + const TargetLibraryInfo &TLI) { //TODO: factor this to reuse getLocForWrite - MemoryLocation Loc = getLocForWrite(I, TLI); + MemoryLocation Loc = getLocForWrite(I, TLI); assert(Loc.Ptr && "unable to find pointer written for analyzable instruction?"); // TODO: most APIs don't expect const Value * @@ -403,59 +403,59 @@ enum OverwriteResult { OW_Complete, OW_End, OW_PartialEarlierWithFullLater, - OW_MaybePartial, + OW_MaybePartial, OW_Unknown }; } // end anonymous namespace -/// Check if two instruction are masked stores that completely -/// overwrite one another. More specifically, \p Later has to -/// overwrite \p Earlier. -template <typename AATy> -static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later, - const Instruction *Earlier, - AATy &AA) { - const auto *IIL = dyn_cast<IntrinsicInst>(Later); - const auto *IIE = dyn_cast<IntrinsicInst>(Earlier); - if (IIL == nullptr || IIE == nullptr) - return OW_Unknown; - if (IIL->getIntrinsicID() != Intrinsic::masked_store || - IIE->getIntrinsicID() != Intrinsic::masked_store) - return OW_Unknown; - // Pointers. - Value *LP = IIL->getArgOperand(1)->stripPointerCasts(); - Value *EP = IIE->getArgOperand(1)->stripPointerCasts(); - if (LP != EP && !AA.isMustAlias(LP, EP)) - return OW_Unknown; - // Masks. - // TODO: check that Later's mask is a superset of the Earlier's mask. - if (IIL->getArgOperand(3) != IIE->getArgOperand(3)) - return OW_Unknown; - return OW_Complete; -} - -/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI -/// instruction) completely overwrites a store to the 'Earlier' location. -/// (by \p EarlierI instruction). -/// Return OW_MaybePartial if \p Later does not completely overwrite -/// \p Earlier, but they both write to the same underlying object. In that -/// case, use isPartialOverwrite to check if \p Later partially overwrites -/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined. -template <typename AATy> -static OverwriteResult -isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, - const MemoryLocation &Later, const MemoryLocation &Earlier, - const DataLayout &DL, const TargetLibraryInfo &TLI, - int64_t &EarlierOff, int64_t &LaterOff, AATy &AA, - const Function *F) { +/// Check if two instruction are masked stores that completely +/// overwrite one another. More specifically, \p Later has to +/// overwrite \p Earlier. +template <typename AATy> +static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later, + const Instruction *Earlier, + AATy &AA) { + const auto *IIL = dyn_cast<IntrinsicInst>(Later); + const auto *IIE = dyn_cast<IntrinsicInst>(Earlier); + if (IIL == nullptr || IIE == nullptr) + return OW_Unknown; + if (IIL->getIntrinsicID() != Intrinsic::masked_store || + IIE->getIntrinsicID() != Intrinsic::masked_store) + return OW_Unknown; + // Pointers. + Value *LP = IIL->getArgOperand(1)->stripPointerCasts(); + Value *EP = IIE->getArgOperand(1)->stripPointerCasts(); + if (LP != EP && !AA.isMustAlias(LP, EP)) + return OW_Unknown; + // Masks. + // TODO: check that Later's mask is a superset of the Earlier's mask. + if (IIL->getArgOperand(3) != IIE->getArgOperand(3)) + return OW_Unknown; + return OW_Complete; +} + +/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI +/// instruction) completely overwrites a store to the 'Earlier' location. +/// (by \p EarlierI instruction). +/// Return OW_MaybePartial if \p Later does not completely overwrite +/// \p Earlier, but they both write to the same underlying object. In that +/// case, use isPartialOverwrite to check if \p Later partially overwrites +/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined. +template <typename AATy> +static OverwriteResult +isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, + const MemoryLocation &Later, const MemoryLocation &Earlier, + const DataLayout &DL, const TargetLibraryInfo &TLI, + int64_t &EarlierOff, int64_t &LaterOff, AATy &AA, + const Function *F) { // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). - if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { - // Masked stores have imprecise locations, but we can reason about them - // to some extent. - return isMaskedStoreOverwrite(LaterI, EarlierI, AA); - } + if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { + // Masked stores have imprecise locations, but we can reason about them + // to some extent. + return isMaskedStoreOverwrite(LaterI, EarlierI, AA); + } const uint64_t LaterSize = Later.Size.getValue(); const uint64_t EarlierSize = Earlier.Size.getValue(); @@ -474,7 +474,7 @@ isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval/inalloca argument). If so, then it clearly // overwrites any other store to the same object. - const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); + const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. @@ -499,59 +499,59 @@ isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, if (BP1 != BP2) return OW_Unknown; - // The later access completely overlaps the earlier store if and only if - // both start and end of the earlier one is "inside" the later one: - // |<->|--earlier--|<->| - // |-------later-------| - // Accesses may overlap if and only if start of one of them is "inside" - // another one: - // |<->|--earlier--|<----->| - // |-------later-------| - // OR - // |----- earlier -----| - // |<->|---later---|<----->| + // The later access completely overlaps the earlier store if and only if + // both start and end of the earlier one is "inside" the later one: + // |<->|--earlier--|<->| + // |-------later-------| + // Accesses may overlap if and only if start of one of them is "inside" + // another one: + // |<->|--earlier--|<----->| + // |-------later-------| + // OR + // |----- earlier -----| + // |<->|---later---|<----->| // // We have to be careful here as *Off is signed while *.Size is unsigned. - // Check if the earlier access starts "not before" the later one. - if (EarlierOff >= LaterOff) { - // If the earlier access ends "not after" the later access then the earlier - // one is completely overwritten by the later one. - if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) - return OW_Complete; - // If start of the earlier access is "before" end of the later access then - // accesses overlap. - else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize) - return OW_MaybePartial; - } - // If start of the later access is "before" end of the earlier access then - // accesses overlap. - else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) { - return OW_MaybePartial; - } - - // Can reach here only if accesses are known not to overlap. There is no - // dedicated code to indicate no overlap so signal "unknown". - return OW_Unknown; -} - -/// Return 'OW_Complete' if a store to the 'Later' location completely -/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the -/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the -/// beginning of the 'Earlier' location is overwritten by 'Later'. -/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was -/// overwritten by a latter (smaller) store which doesn't write outside the big -/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. -/// NOTE: This function must only be called if both \p Later and \p Earlier -/// write to the same underlying object with valid \p EarlierOff and \p -/// LaterOff. -static OverwriteResult isPartialOverwrite(const MemoryLocation &Later, - const MemoryLocation &Earlier, - int64_t EarlierOff, int64_t LaterOff, - Instruction *DepWrite, - InstOverlapIntervalsTy &IOL) { - const uint64_t LaterSize = Later.Size.getValue(); - const uint64_t EarlierSize = Earlier.Size.getValue(); + // Check if the earlier access starts "not before" the later one. + if (EarlierOff >= LaterOff) { + // If the earlier access ends "not after" the later access then the earlier + // one is completely overwritten by the later one. + if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) + return OW_Complete; + // If start of the earlier access is "before" end of the later access then + // accesses overlap. + else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize) + return OW_MaybePartial; + } + // If start of the later access is "before" end of the earlier access then + // accesses overlap. + else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) { + return OW_MaybePartial; + } + + // Can reach here only if accesses are known not to overlap. There is no + // dedicated code to indicate no overlap so signal "unknown". + return OW_Unknown; +} + +/// Return 'OW_Complete' if a store to the 'Later' location completely +/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the +/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the +/// beginning of the 'Earlier' location is overwritten by 'Later'. +/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was +/// overwritten by a latter (smaller) store which doesn't write outside the big +/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. +/// NOTE: This function must only be called if both \p Later and \p Earlier +/// write to the same underlying object with valid \p EarlierOff and \p +/// LaterOff. +static OverwriteResult isPartialOverwrite(const MemoryLocation &Later, + const MemoryLocation &Earlier, + int64_t EarlierOff, int64_t LaterOff, + Instruction *DepWrite, + InstOverlapIntervalsTy &IOL) { + const uint64_t LaterSize = Later.Size.getValue(); + const uint64_t EarlierSize = Earlier.Size.getValue(); // We may now overlap, although the overlap is not complete. There might also // be other incomplete overlaps, and together, they might cover the complete // earlier write. @@ -718,10 +718,10 @@ static bool isPossibleSelfRead(Instruction *Inst, /// modified between the first and the second instruction. /// Precondition: Second instruction must be dominated by the first /// instruction. -template <typename AATy> -static bool -memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, AATy &AA, - const DataLayout &DL, DominatorTree *DT) { +template <typename AATy> +static bool +memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, AATy &AA, + const DataLayout &DL, DominatorTree *DT) { // Do a backwards scan through the CFG from SecondI to FirstI. Look for // instructions which can modify the memory location accessed by SecondI. // @@ -770,7 +770,7 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, AATy &AA, for (; BI != EI; ++BI) { Instruction *I = &*BI; if (I->mayWriteToMemory() && I != SecondI) - if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr)))) + if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr)))) return false; } if (B != FirstBB) { @@ -826,7 +826,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA, MapVector<Instruction *, bool> &ThrowableInst) { bool MadeChange = false; - MemoryLocation Loc = MemoryLocation::getAfter(F->getOperand(0)); + MemoryLocation Loc = MemoryLocation::getAfter(F->getOperand(0)); SmallVector<BasicBlock *, 16> Blocks; Blocks.push_back(F->getParent()); @@ -844,7 +844,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA, break; Value *DepPointer = - getUnderlyingObject(getStoredPointerOperand(Dependency, *TLI)); + getUnderlyingObject(getStoredPointerOperand(Dependency, *TLI)); // Check for aliasing. if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) @@ -884,7 +884,7 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc, const DataLayout &DL, AliasAnalysis *AA, const TargetLibraryInfo *TLI, const Function *F) { - const Value *UnderlyingPointer = getUnderlyingObject(LoadedLoc.Ptr); + const Value *UnderlyingPointer = getUnderlyingObject(LoadedLoc.Ptr); // A constant can't be in the dead pointer set. if (isa<Constant>(UnderlyingPointer)) @@ -937,7 +937,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, // Treat byval or inalloca arguments the same, stores to them are dead at the // end of the function. for (Argument &AI : BB.getParent()->args()) - if (AI.hasPassPointeeByValueCopyAttr()) + if (AI.hasPassPointeeByValueCopyAttr()) DeadStackObjects.insert(&AI); const DataLayout &DL = BB.getModule()->getDataLayout(); @@ -950,7 +950,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { // See through pointer-to-pointer bitcasts SmallVector<const Value *, 4> Pointers; - getUnderlyingObjects(getStoredPointerOperand(&*BBI, *TLI), Pointers); + getUnderlyingObjects(getStoredPointerOperand(&*BBI, *TLI), Pointers); // Stores to stack values are valid candidates for removal. bool AllDead = true; @@ -1069,8 +1069,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, } static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, - uint64_t &EarlierSize, int64_t LaterOffset, - uint64_t LaterSize, bool IsOverwriteEnd) { + uint64_t &EarlierSize, int64_t LaterOffset, + uint64_t LaterSize, bool IsOverwriteEnd) { // TODO: base this on the target vector size so that if the earlier // store was too small to get vector writes anyway then its likely // a good idea to shorten it @@ -1125,23 +1125,23 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, static bool tryToShortenEnd(Instruction *EarlierWrite, OverlapIntervalsTy &IntervalMap, - int64_t &EarlierStart, uint64_t &EarlierSize) { + int64_t &EarlierStart, uint64_t &EarlierSize) { if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) return false; OverlapIntervalsTy::iterator OII = --IntervalMap.end(); int64_t LaterStart = OII->second; - uint64_t LaterSize = OII->first - LaterStart; - - assert(OII->first - LaterStart >= 0 && "Size expected to be positive"); - - if (LaterStart > EarlierStart && - // Note: "LaterStart - EarlierStart" is known to be positive due to - // preceding check. - (uint64_t)(LaterStart - EarlierStart) < EarlierSize && - // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to - // be non negative due to preceding checks. - LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) { + uint64_t LaterSize = OII->first - LaterStart; + + assert(OII->first - LaterStart >= 0 && "Size expected to be positive"); + + if (LaterStart > EarlierStart && + // Note: "LaterStart - EarlierStart" is known to be positive due to + // preceding check. + (uint64_t)(LaterStart - EarlierStart) < EarlierSize && + // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to + // be non negative due to preceding checks. + LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) { if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, LaterSize, true)) { IntervalMap.erase(OII); @@ -1153,23 +1153,23 @@ static bool tryToShortenEnd(Instruction *EarlierWrite, static bool tryToShortenBegin(Instruction *EarlierWrite, OverlapIntervalsTy &IntervalMap, - int64_t &EarlierStart, uint64_t &EarlierSize) { + int64_t &EarlierStart, uint64_t &EarlierSize) { if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) return false; OverlapIntervalsTy::iterator OII = IntervalMap.begin(); int64_t LaterStart = OII->second; - uint64_t LaterSize = OII->first - LaterStart; - - assert(OII->first - LaterStart >= 0 && "Size expected to be positive"); - - if (LaterStart <= EarlierStart && - // Note: "EarlierStart - LaterStart" is known to be non negative due to - // preceding check. - LaterSize > (uint64_t)(EarlierStart - LaterStart)) { - // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be - // positive due to preceding checks. - assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize && + uint64_t LaterSize = OII->first - LaterStart; + + assert(OII->first - LaterStart >= 0 && "Size expected to be positive"); + + if (LaterStart <= EarlierStart && + // Note: "EarlierStart - LaterStart" is known to be non negative due to + // preceding check. + LaterSize > (uint64_t)(EarlierStart - LaterStart)) { + // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be + // positive due to preceding checks. + assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize && "Should have been handled as OW_Complete"); if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, LaterSize, false)) { @@ -1180,18 +1180,18 @@ static bool tryToShortenBegin(Instruction *EarlierWrite, return false; } -static bool removePartiallyOverlappedStores(const DataLayout &DL, - InstOverlapIntervalsTy &IOL, - const TargetLibraryInfo &TLI) { +static bool removePartiallyOverlappedStores(const DataLayout &DL, + InstOverlapIntervalsTy &IOL, + const TargetLibraryInfo &TLI) { bool Changed = false; for (auto OI : IOL) { Instruction *EarlierWrite = OI.first; - MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI); + MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI); assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); const Value *Ptr = Loc.Ptr->stripPointerCasts(); int64_t EarlierStart = 0; - uint64_t EarlierSize = Loc.Size.getValue(); + uint64_t EarlierSize = Loc.Size.getValue(); GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); OverlapIntervalsTy &IntervalMap = OI.second; Changed |= @@ -1221,7 +1221,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && isRemovable(SI) && - memoryIsNotModifiedBetween(DepLoad, SI, *AA, DL, DT)) { + memoryIsNotModifiedBetween(DepLoad, SI, *AA, DL, DT)) { LLVM_DEBUG( dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " @@ -1237,10 +1237,10 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { Instruction *UnderlyingPointer = - dyn_cast<Instruction>(getUnderlyingObject(SI->getPointerOperand())); + dyn_cast<Instruction>(getUnderlyingObject(SI->getPointerOperand())); if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && - memoryIsNotModifiedBetween(UnderlyingPointer, SI, *AA, DL, DT)) { + memoryIsNotModifiedBetween(UnderlyingPointer, SI, *AA, DL, DT)) { LLVM_DEBUG( dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); @@ -1253,10 +1253,10 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, return false; } -template <typename AATy> -static Constant *tryToMergePartialOverlappingStores( - StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset, - int64_t DepWriteOffset, const DataLayout &DL, AATy &AA, DominatorTree *DT) { +template <typename AATy> +static Constant *tryToMergePartialOverlappingStores( + StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset, + int64_t DepWriteOffset, const DataLayout &DL, AATy &AA, DominatorTree *DT) { if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) && DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) && @@ -1347,7 +1347,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, continue; // Figure out what location is being stored to. - MemoryLocation Loc = getLocForWrite(Inst, *TLI); + MemoryLocation Loc = getLocForWrite(Inst, *TLI); // If we didn't get a useful location, fail. if (!Loc.Ptr) @@ -1371,7 +1371,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, Instruction *DepWrite = InstDep.getInst(); if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) break; - MemoryLocation DepLoc = getLocForWrite(DepWrite, *TLI); + MemoryLocation DepLoc = getLocForWrite(DepWrite, *TLI); // If we didn't get a useful location, or if it isn't a size, bail out. if (!DepLoc.Ptr) break; @@ -1391,7 +1391,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, // to it is dead along the unwind edge. Otherwise, we need to preserve // the store. if (LastThrowing && DepWrite->comesBefore(LastThrowing)) { - const Value *Underlying = getUnderlyingObject(DepLoc.Ptr); + const Value *Underlying = getUnderlyingObject(DepLoc.Ptr); bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying); if (!IsStoreDeadOnUnwind) { // We're looking for a call to an allocation function @@ -1413,13 +1413,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, - DepWriteOffset, InstWriteOffset, *AA, + OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, + DepWriteOffset, InstWriteOffset, *AA, BB.getParent()); - if (OR == OW_MaybePartial) - OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset, - DepWrite, IOL); - + if (OR == OW_MaybePartial) + OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset, + DepWrite, IOL); + if (OR == OW_Complete) { LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite << "\n KILLER: " << *Inst << '\n'); @@ -1440,8 +1440,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, "when partial-overwrite " "tracking is enabled"); // The overwrite result is known, so these must be known, too. - uint64_t EarlierSize = DepLoc.Size.getValue(); - uint64_t LaterSize = Loc.Size.getValue(); + uint64_t EarlierSize = DepLoc.Size.getValue(); + uint64_t LaterSize = Loc.Size.getValue(); bool IsOverwriteEnd = (OR == OW_End); MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, InstWriteOffset, LaterSize, IsOverwriteEnd); @@ -1450,7 +1450,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, auto *Earlier = dyn_cast<StoreInst>(DepWrite); auto *Later = dyn_cast<StoreInst>(Inst); if (Constant *C = tryToMergePartialOverlappingStores( - Earlier, Later, InstWriteOffset, DepWriteOffset, DL, *AA, + Earlier, Later, InstWriteOffset, DepWriteOffset, DL, *AA, DT)) { auto *SI = new StoreInst( C, Earlier->getPointerOperand(), false, Earlier->getAlign(), @@ -1497,7 +1497,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, } if (EnablePartialOverwriteTracking) - MadeChange |= removePartiallyOverlappedStores(DL, IOL, *TLI); + MadeChange |= removePartiallyOverlappedStores(DL, IOL, *TLI); // If this block ends in a return, unwind, or unreachable, all allocas are // dead at its end, which means stores to them are also dead. @@ -1531,21 +1531,21 @@ namespace { // in between both MemoryDefs. A bit more concretely: // // For all MemoryDefs StartDef: -// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking +// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking // upwards. -// 2. Check that there are no reads between EarlierAccess and the StartDef by -// checking all uses starting at EarlierAccess and walking until we see -// StartDef. -// 3. For each found CurrentDef, check that: -// 1. There are no barrier instructions between CurrentDef and StartDef (like +// 2. Check that there are no reads between EarlierAccess and the StartDef by +// checking all uses starting at EarlierAccess and walking until we see +// StartDef. +// 3. For each found CurrentDef, check that: +// 1. There are no barrier instructions between CurrentDef and StartDef (like // throws or stores with ordering constraints). -// 2. StartDef is executed whenever CurrentDef is executed. -// 3. StartDef completely overwrites CurrentDef. -// 4. Erase CurrentDef from the function and MemorySSA. +// 2. StartDef is executed whenever CurrentDef is executed. +// 3. StartDef completely overwrites CurrentDef. +// 4. Erase CurrentDef from the function and MemorySSA. -// Returns true if \p I is an intrisnic that does not read or write memory. -bool isNoopIntrinsic(Instruction *I) { - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { +// Returns true if \p I is an intrisnic that does not read or write memory. +bool isNoopIntrinsic(Instruction *I) { + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: @@ -1588,7 +1588,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { return true; // Skip intrinsics that do not really read or modify memory. - if (isNoopIntrinsic(D->getMemoryInst())) + if (isNoopIntrinsic(D->getMemoryInst())) return true; return false; @@ -1597,21 +1597,21 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { struct DSEState { Function &F; AliasAnalysis &AA; - - /// The single BatchAA instance that is used to cache AA queries. It will - /// not be invalidated over the whole run. This is safe, because: - /// 1. Only memory writes are removed, so the alias cache for memory - /// locations remains valid. - /// 2. No new instructions are added (only instructions removed), so cached - /// information for a deleted value cannot be accessed by a re-used new - /// value pointer. - BatchAAResults BatchAA; - + + /// The single BatchAA instance that is used to cache AA queries. It will + /// not be invalidated over the whole run. This is safe, because: + /// 1. Only memory writes are removed, so the alias cache for memory + /// locations remains valid. + /// 2. No new instructions are added (only instructions removed), so cached + /// information for a deleted value cannot be accessed by a re-used new + /// value pointer. + BatchAAResults BatchAA; + MemorySSA &MSSA; DominatorTree &DT; PostDominatorTree &PDT; const TargetLibraryInfo &TLI; - const DataLayout &DL; + const DataLayout &DL; // All MemoryDefs that potentially could kill other MemDefs. SmallVector<MemoryDef *, 64> MemDefs; @@ -1619,11 +1619,11 @@ struct DSEState { SmallPtrSet<MemoryAccess *, 4> SkipStores; // Keep track of all of the objects that are invisible to the caller before // the function returns. - // SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet; - DenseMap<const Value *, bool> InvisibleToCallerBeforeRet; + // SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet; + DenseMap<const Value *, bool> InvisibleToCallerBeforeRet; // Keep track of all of the objects that are invisible to the caller after // the function returns. - DenseMap<const Value *, bool> InvisibleToCallerAfterRet; + DenseMap<const Value *, bool> InvisibleToCallerAfterRet; // Keep track of blocks with throwing instructions not modeled in MemorySSA. SmallPtrSet<BasicBlock *, 16> ThrowingBlocks; // Post-order numbers for each basic block. Used to figure out if memory @@ -1636,8 +1636,8 @@ struct DSEState { DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) - : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()) {} + : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), + DL(F.getParent()->getDataLayout()) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, @@ -1663,48 +1663,48 @@ struct DSEState { // Treat byval or inalloca arguments the same as Allocas, stores to them are // dead at the end of the function. for (Argument &AI : F.args()) - if (AI.hasPassPointeeByValueCopyAttr()) { + if (AI.hasPassPointeeByValueCopyAttr()) { // For byval, the caller doesn't know the address of the allocation. if (AI.hasByValAttr()) - State.InvisibleToCallerBeforeRet.insert({&AI, true}); - State.InvisibleToCallerAfterRet.insert({&AI, true}); + State.InvisibleToCallerBeforeRet.insert({&AI, true}); + State.InvisibleToCallerAfterRet.insert({&AI, true}); } return State; } - bool isInvisibleToCallerAfterRet(const Value *V) { - if (isa<AllocaInst>(V)) - return true; - auto I = InvisibleToCallerAfterRet.insert({V, false}); - if (I.second) { - if (!isInvisibleToCallerBeforeRet(V)) { - I.first->second = false; - } else { - auto *Inst = dyn_cast<Instruction>(V); - if (Inst && isAllocLikeFn(Inst, &TLI)) - I.first->second = !PointerMayBeCaptured(V, true, false); - } - } - return I.first->second; - } - - bool isInvisibleToCallerBeforeRet(const Value *V) { - if (isa<AllocaInst>(V)) - return true; - auto I = InvisibleToCallerBeforeRet.insert({V, false}); - if (I.second) { - auto *Inst = dyn_cast<Instruction>(V); - if (Inst && isAllocLikeFn(Inst, &TLI)) - // NOTE: This could be made more precise by PointerMayBeCapturedBefore - // with the killing MemoryDef. But we refrain from doing so for now to - // limit compile-time and this does not cause any changes to the number - // of stores removed on a large test set in practice. - I.first->second = !PointerMayBeCaptured(V, false, true); - } - return I.first->second; - } - + bool isInvisibleToCallerAfterRet(const Value *V) { + if (isa<AllocaInst>(V)) + return true; + auto I = InvisibleToCallerAfterRet.insert({V, false}); + if (I.second) { + if (!isInvisibleToCallerBeforeRet(V)) { + I.first->second = false; + } else { + auto *Inst = dyn_cast<Instruction>(V); + if (Inst && isAllocLikeFn(Inst, &TLI)) + I.first->second = !PointerMayBeCaptured(V, true, false); + } + } + return I.first->second; + } + + bool isInvisibleToCallerBeforeRet(const Value *V) { + if (isa<AllocaInst>(V)) + return true; + auto I = InvisibleToCallerBeforeRet.insert({V, false}); + if (I.second) { + auto *Inst = dyn_cast<Instruction>(V); + if (Inst && isAllocLikeFn(Inst, &TLI)) + // NOTE: This could be made more precise by PointerMayBeCapturedBefore + // with the killing MemoryDef. But we refrain from doing so for now to + // limit compile-time and this does not cause any changes to the number + // of stores removed on a large test set in practice. + I.first->second = !PointerMayBeCaptured(V, false, true); + } + return I.first->second; + } + Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const { if (!I->mayWriteToMemory()) return None; @@ -1713,11 +1713,11 @@ struct DSEState { return {MemoryLocation::getForDest(MTI)}; if (auto *CB = dyn_cast<CallBase>(I)) { - // If the functions may write to memory we do not know about, bail out. - if (!CB->onlyAccessesArgMemory() && - !CB->onlyAccessesInaccessibleMemOrArgMem()) - return None; - + // If the functions may write to memory we do not know about, bail out. + if (!CB->onlyAccessesArgMemory() && + !CB->onlyAccessesInaccessibleMemOrArgMem()) + return None; + LibFunc LF; if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { switch (LF) { @@ -1725,29 +1725,29 @@ struct DSEState { case LibFunc_strncpy: case LibFunc_strcat: case LibFunc_strncat: - return {MemoryLocation::getAfter(CB->getArgOperand(0))}; + return {MemoryLocation::getAfter(CB->getArgOperand(0))}; default: break; } } - switch (CB->getIntrinsicID()) { - case Intrinsic::init_trampoline: - return {MemoryLocation::getAfter(CB->getArgOperand(0))}; - case Intrinsic::masked_store: - return {MemoryLocation::getForArgument(CB, 1, TLI)}; - default: - break; - } + switch (CB->getIntrinsicID()) { + case Intrinsic::init_trampoline: + return {MemoryLocation::getAfter(CB->getArgOperand(0))}; + case Intrinsic::masked_store: + return {MemoryLocation::getForArgument(CB, 1, TLI)}; + default: + break; + } return None; } return MemoryLocation::getOrNone(I); } - /// Returns true if \p UseInst completely overwrites \p DefLoc - /// (stored by \p DefInst). - bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst, - Instruction *UseInst) { + /// Returns true if \p UseInst completely overwrites \p DefLoc + /// (stored by \p DefInst). + bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst, + Instruction *UseInst) { // UseInst has a MemoryDef associated in MemorySSA. It's possible for a // MemoryDef to not write to memory, e.g. a volatile load is modeled as a // MemoryDef. @@ -1759,10 +1759,10 @@ struct DSEState { return false; int64_t InstWriteOffset, DepWriteOffset; - if (auto CC = getLocForWriteEx(UseInst)) - return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F) == OW_Complete; - return false; + if (auto CC = getLocForWriteEx(UseInst)) + return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, + InstWriteOffset, BatchAA, &F) == OW_Complete; + return false; } /// Returns true if \p Def is not read before returning from the function. @@ -1793,12 +1793,12 @@ struct DSEState { } MemoryAccess *UseAccess = WorkList[I]; - // Simply adding the users of MemoryPhi to the worklist is not enough, - // because we might miss read clobbers in different iterations of a loop, - // for example. - // TODO: Add support for phi translation to handle the loop case. - if (isa<MemoryPhi>(UseAccess)) - return false; + // Simply adding the users of MemoryPhi to the worklist is not enough, + // because we might miss read clobbers in different iterations of a loop, + // for example. + // TODO: Add support for phi translation to handle the loop case. + if (isa<MemoryPhi>(UseAccess)) + return false; // TODO: Checking for aliasing is expensive. Consider reducing the amount // of times this is called and/or caching it. @@ -1827,8 +1827,8 @@ struct DSEState { if (auto *CB = dyn_cast<CallBase>(I)) { if (isFreeCall(I, &TLI)) - return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)), - true)}; + return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)), + true)}; } return None; @@ -1842,10 +1842,10 @@ struct DSEState { isFreeCall(I, &TLI); } - /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from - /// instruction \p AccessI. - bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI, - Instruction *MaybeTerm) { + /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from + /// instruction \p AccessI. + bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI, + Instruction *MaybeTerm) { Optional<std::pair<MemoryLocation, bool>> MaybeTermLoc = getLocForTerminator(MaybeTerm); @@ -1854,31 +1854,31 @@ struct DSEState { // If the terminator is a free-like call, all accesses to the underlying // object can be considered terminated. - if (getUnderlyingObject(Loc.Ptr) != - getUnderlyingObject(MaybeTermLoc->first.Ptr)) - return false; - - auto TermLoc = MaybeTermLoc->first; + if (getUnderlyingObject(Loc.Ptr) != + getUnderlyingObject(MaybeTermLoc->first.Ptr)) + return false; + + auto TermLoc = MaybeTermLoc->first; if (MaybeTermLoc->second) { - const Value *LocUO = getUnderlyingObject(Loc.Ptr); - return BatchAA.isMustAlias(TermLoc.Ptr, LocUO); + const Value *LocUO = getUnderlyingObject(Loc.Ptr); + return BatchAA.isMustAlias(TermLoc.Ptr, LocUO); } - int64_t InstWriteOffset, DepWriteOffset; - return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, - &F) == OW_Complete; + int64_t InstWriteOffset, DepWriteOffset; + return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI, + DepWriteOffset, InstWriteOffset, BatchAA, + &F) == OW_Complete; } // Returns true if \p Use may read from \p DefLoc. - bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) { - if (isNoopIntrinsic(UseInst)) - return false; - - // Monotonic or weaker atomic stores can be re-ordered and do not need to be - // treated as read clobber. - if (auto SI = dyn_cast<StoreInst>(UseInst)) - return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic); - + bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) { + if (isNoopIntrinsic(UseInst)) + return false; + + // Monotonic or weaker atomic stores can be re-ordered and do not need to be + // treated as read clobber. + if (auto SI = dyn_cast<StoreInst>(UseInst)) + return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic); + if (!UseInst->mayReadFromMemory()) return false; @@ -1886,246 +1886,246 @@ struct DSEState { if (CB->onlyAccessesInaccessibleMemory()) return false; - // NOTE: For calls, the number of stores removed could be slightly improved - // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to - // be expensive compared to the benefits in practice. For now, avoid more - // expensive analysis to limit compile-time. - return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); + // NOTE: For calls, the number of stores removed could be slightly improved + // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to + // be expensive compared to the benefits in practice. For now, avoid more + // expensive analysis to limit compile-time. + return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); } - /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible - /// loop. In particular, this guarantees that it only references a single - /// MemoryLocation during execution of the containing function. - bool IsGuaranteedLoopInvariant(Value *Ptr) { - auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) { - Ptr = Ptr->stripPointerCasts(); - if (auto *I = dyn_cast<Instruction>(Ptr)) { - if (isa<AllocaInst>(Ptr)) - return true; - - if (isAllocLikeFn(I, &TLI)) - return true; - - return false; - } - return true; - }; - - Ptr = Ptr->stripPointerCasts(); - if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { - return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && - GEP->hasAllConstantIndices(); - } - return IsGuaranteedLoopInvariantBase(Ptr); - } - - // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with - // no read access between them or on any other path to a function exit block - // if \p DefLoc is not accessible after the function returns. If there is no - // such MemoryDef, return None. The returned value may not (completely) - // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing - // MemoryUse (read). + /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible + /// loop. In particular, this guarantees that it only references a single + /// MemoryLocation during execution of the containing function. + bool IsGuaranteedLoopInvariant(Value *Ptr) { + auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) { + Ptr = Ptr->stripPointerCasts(); + if (auto *I = dyn_cast<Instruction>(Ptr)) { + if (isa<AllocaInst>(Ptr)) + return true; + + if (isAllocLikeFn(I, &TLI)) + return true; + + return false; + } + return true; + }; + + Ptr = Ptr->stripPointerCasts(); + if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { + return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && + GEP->hasAllConstantIndices(); + } + return IsGuaranteedLoopInvariantBase(Ptr); + } + + // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with + // no read access between them or on any other path to a function exit block + // if \p DefLoc is not accessible after the function returns. If there is no + // such MemoryDef, return None. The returned value may not (completely) + // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing + // MemoryUse (read). Optional<MemoryAccess *> - getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, - const MemoryLocation &DefLoc, const Value *DefUO, - unsigned &ScanLimit, unsigned &WalkerStepLimit, - bool IsMemTerm, unsigned &PartialLimit) { - if (ScanLimit == 0 || WalkerStepLimit == 0) { - LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); - return None; - } - - MemoryAccess *Current = StartAccess; - Instruction *KillingI = KillingDef->getMemoryInst(); + getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, + const MemoryLocation &DefLoc, const Value *DefUO, + unsigned &ScanLimit, unsigned &WalkerStepLimit, + bool IsMemTerm, unsigned &PartialLimit) { + if (ScanLimit == 0 || WalkerStepLimit == 0) { + LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); + return None; + } + + MemoryAccess *Current = StartAccess; + Instruction *KillingI = KillingDef->getMemoryInst(); bool StepAgain; - LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); - - // Find the next clobbering Mod access for DefLoc, starting at StartAccess. - Optional<MemoryLocation> CurrentLoc; + LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); + + // Find the next clobbering Mod access for DefLoc, starting at StartAccess. + Optional<MemoryLocation> CurrentLoc; do { StepAgain = false; - LLVM_DEBUG({ - dbgs() << " visiting " << *Current; - if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current)) - dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst() - << ")"; - dbgs() << "\n"; - }); - + LLVM_DEBUG({ + dbgs() << " visiting " << *Current; + if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current)) + dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst() + << ")"; + dbgs() << "\n"; + }); + // Reached TOP. - if (MSSA.isLiveOnEntryDef(Current)) { - LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n"); + if (MSSA.isLiveOnEntryDef(Current)) { + LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n"); return None; - } - - // Cost of a step. Accesses in the same block are more likely to be valid - // candidates for elimination, hence consider them cheaper. - unsigned StepCost = KillingDef->getBlock() == Current->getBlock() - ? MemorySSASameBBStepCost - : MemorySSAOtherBBStepCost; - if (WalkerStepLimit <= StepCost) { - LLVM_DEBUG(dbgs() << " ... hit walker step limit\n"); - return None; - } - WalkerStepLimit -= StepCost; - - // Return for MemoryPhis. They cannot be eliminated directly and the - // caller is responsible for traversing them. + } + + // Cost of a step. Accesses in the same block are more likely to be valid + // candidates for elimination, hence consider them cheaper. + unsigned StepCost = KillingDef->getBlock() == Current->getBlock() + ? MemorySSASameBBStepCost + : MemorySSAOtherBBStepCost; + if (WalkerStepLimit <= StepCost) { + LLVM_DEBUG(dbgs() << " ... hit walker step limit\n"); + return None; + } + WalkerStepLimit -= StepCost; + + // Return for MemoryPhis. They cannot be eliminated directly and the + // caller is responsible for traversing them. if (isa<MemoryPhi>(Current)) { - LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n"); - return Current; - } - - // Below, check if CurrentDef is a valid candidate to be eliminated by - // KillingDef. If it is not, check the next candidate. - MemoryDef *CurrentDef = cast<MemoryDef>(Current); - Instruction *CurrentI = CurrentDef->getMemoryInst(); - - if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - continue; - } - - // Before we try to remove anything, check for any extra throwing - // instructions that block us from DSEing - if (mayThrowBetween(KillingI, CurrentI, DefUO)) { - LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); - return None; - } - - // Check for anything that looks like it will be a barrier to further - // removal - if (isDSEBarrier(DefUO, CurrentI)) { - LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); - return None; + LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n"); + return Current; } - - // If Current is known to be on path that reads DefLoc or is a read - // clobber, bail out, as the path is not profitable. We skip this check - // for intrinsic calls, because the code knows how to handle memcpy - // intrinsics. - if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI)) - return None; - - // Quick check if there are direct uses that are read-clobbers. - if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { - if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser())) - return !MSSA.dominates(StartAccess, UseOrDef) && - isReadClobber(DefLoc, UseOrDef->getMemoryInst()); - return false; - })) { - LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); + + // Below, check if CurrentDef is a valid candidate to be eliminated by + // KillingDef. If it is not, check the next candidate. + MemoryDef *CurrentDef = cast<MemoryDef>(Current); + Instruction *CurrentI = CurrentDef->getMemoryInst(); + + if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + continue; + } + + // Before we try to remove anything, check for any extra throwing + // instructions that block us from DSEing + if (mayThrowBetween(KillingI, CurrentI, DefUO)) { + LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); return None; - } - - // If Current cannot be analyzed or is not removable, check the next - // candidate. - if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) { + } + + // Check for anything that looks like it will be a barrier to further + // removal + if (isDSEBarrier(DefUO, CurrentI)) { + LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); + return None; + } + + // If Current is known to be on path that reads DefLoc or is a read + // clobber, bail out, as the path is not profitable. We skip this check + // for intrinsic calls, because the code knows how to handle memcpy + // intrinsics. + if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI)) + return None; + + // Quick check if there are direct uses that are read-clobbers. + if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { + if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser())) + return !MSSA.dominates(StartAccess, UseOrDef) && + isReadClobber(DefLoc, UseOrDef->getMemoryInst()); + return false; + })) { + LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); + return None; + } + + // If Current cannot be analyzed or is not removable, check the next + // candidate. + if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) { StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - continue; + Current = CurrentDef->getDefiningAccess(); + continue; } - // If Current does not have an analyzable write location, skip it - CurrentLoc = getLocForWriteEx(CurrentI); - if (!CurrentLoc) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - continue; - } - - // AliasAnalysis does not account for loops. Limit elimination to - // candidates for which we can guarantee they always store to the same - // memory location and not multiple locations in a loop. - if (Current->getBlock() != KillingDef->getBlock() && - !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - WalkerStepLimit -= 1; - continue; - } - - if (IsMemTerm) { - // If the killing def is a memory terminator (e.g. lifetime.end), check - // the next candidate if the current Current does not write the same - // underlying object as the terminator. - if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - } - continue; - } else { - int64_t InstWriteOffset, DepWriteOffset; - auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, &F); - // If Current does not write to the same object as KillingDef, check - // the next candidate. - if (OR == OW_Unknown) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - } else if (OR == OW_MaybePartial) { - // If KillingDef only partially overwrites Current, check the next - // candidate if the partial step limit is exceeded. This aggressively - // limits the number of candidates for partial store elimination, - // which are less likely to be removable in the end. - if (PartialLimit <= 1) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - WalkerStepLimit -= 1; - continue; - } - PartialLimit -= 1; - } - } + // If Current does not have an analyzable write location, skip it + CurrentLoc = getLocForWriteEx(CurrentI); + if (!CurrentLoc) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + continue; + } + + // AliasAnalysis does not account for loops. Limit elimination to + // candidates for which we can guarantee they always store to the same + // memory location and not multiple locations in a loop. + if (Current->getBlock() != KillingDef->getBlock() && + !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + WalkerStepLimit -= 1; + continue; + } + + if (IsMemTerm) { + // If the killing def is a memory terminator (e.g. lifetime.end), check + // the next candidate if the current Current does not write the same + // underlying object as the terminator. + if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + } + continue; + } else { + int64_t InstWriteOffset, DepWriteOffset; + auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, + DepWriteOffset, InstWriteOffset, BatchAA, &F); + // If Current does not write to the same object as KillingDef, check + // the next candidate. + if (OR == OW_Unknown) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + } else if (OR == OW_MaybePartial) { + // If KillingDef only partially overwrites Current, check the next + // candidate if the partial step limit is exceeded. This aggressively + // limits the number of candidates for partial store elimination, + // which are less likely to be removable in the end. + if (PartialLimit <= 1) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + WalkerStepLimit -= 1; + continue; + } + PartialLimit -= 1; + } + } } while (StepAgain); // Accesses to objects accessible after the function returns can only be // eliminated if the access is killed along all paths to the exit. Collect // the blocks with killing (=completely overwriting MemoryDefs) and check if - // they cover all paths from EarlierAccess to any function exit. - SmallPtrSet<Instruction *, 16> KillingDefs; - KillingDefs.insert(KillingDef->getMemoryInst()); - MemoryAccess *EarlierAccess = Current; - Instruction *EarlierMemInst = - cast<MemoryDef>(EarlierAccess)->getMemoryInst(); - LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " (" - << *EarlierMemInst << ")\n"); + // they cover all paths from EarlierAccess to any function exit. + SmallPtrSet<Instruction *, 16> KillingDefs; + KillingDefs.insert(KillingDef->getMemoryInst()); + MemoryAccess *EarlierAccess = Current; + Instruction *EarlierMemInst = + cast<MemoryDef>(EarlierAccess)->getMemoryInst(); + LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " (" + << *EarlierMemInst << ")\n"); SmallSetVector<MemoryAccess *, 32> WorkList; auto PushMemUses = [&WorkList](MemoryAccess *Acc) { for (Use &U : Acc->uses()) WorkList.insert(cast<MemoryAccess>(U.getUser())); }; - PushMemUses(EarlierAccess); - - // Optimistically collect all accesses for reads. If we do not find any - // read clobbers, add them to the cache. - SmallPtrSet<MemoryAccess *, 16> KnownNoReads; - if (!EarlierMemInst->mayReadFromMemory()) - KnownNoReads.insert(EarlierAccess); - // Check if EarlierDef may be read. + PushMemUses(EarlierAccess); + + // Optimistically collect all accesses for reads. If we do not find any + // read clobbers, add them to the cache. + SmallPtrSet<MemoryAccess *, 16> KnownNoReads; + if (!EarlierMemInst->mayReadFromMemory()) + KnownNoReads.insert(EarlierAccess); + // Check if EarlierDef may be read. for (unsigned I = 0; I < WorkList.size(); I++) { MemoryAccess *UseAccess = WorkList[I]; LLVM_DEBUG(dbgs() << " " << *UseAccess); - // Bail out if the number of accesses to check exceeds the scan limit. - if (ScanLimit < (WorkList.size() - I)) { + // Bail out if the number of accesses to check exceeds the scan limit. + if (ScanLimit < (WorkList.size() - I)) { LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); return None; } - --ScanLimit; - NumDomMemDefChecks++; - KnownNoReads.insert(UseAccess); + --ScanLimit; + NumDomMemDefChecks++; + KnownNoReads.insert(UseAccess); if (isa<MemoryPhi>(UseAccess)) { - if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) { - return DT.properlyDominates(KI->getParent(), - UseAccess->getBlock()); - })) { - LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n"); - continue; - } + if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) { + return DT.properlyDominates(KI->getParent(), + UseAccess->getBlock()); + })) { + LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n"); + continue; + } LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n"); PushMemUses(UseAccess); continue; @@ -2134,45 +2134,45 @@ struct DSEState { Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst(); LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); - if (any_of(KillingDefs, [this, UseInst](Instruction *KI) { - return DT.dominates(KI, UseInst); - })) { - LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n"); + if (any_of(KillingDefs, [this, UseInst](Instruction *KI) { + return DT.dominates(KI, UseInst); + })) { + LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n"); continue; } // A memory terminator kills all preceeding MemoryDefs and all succeeding // MemoryAccesses. We do not have to check it's users. - if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) { - LLVM_DEBUG( - dbgs() - << " ... skipping, memterminator invalidates following accesses\n"); + if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) { + LLVM_DEBUG( + dbgs() + << " ... skipping, memterminator invalidates following accesses\n"); continue; - } - - if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) { - LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n"); - PushMemUses(UseAccess); - continue; - } - - if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) { - LLVM_DEBUG(dbgs() << " ... found throwing instruction\n"); - return None; - } - + } + + if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) { + LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n"); + PushMemUses(UseAccess); + continue; + } + + if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) { + LLVM_DEBUG(dbgs() << " ... found throwing instruction\n"); + return None; + } + // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. - if (isReadClobber(*CurrentLoc, UseInst)) { + if (isReadClobber(*CurrentLoc, UseInst)) { LLVM_DEBUG(dbgs() << " ... found read clobber\n"); return None; } - // For the KillingDef and EarlierAccess we only have to check if it reads - // the memory location. + // For the KillingDef and EarlierAccess we only have to check if it reads + // the memory location. // TODO: It would probably be better to check for self-reads before // calling the function. - if (KillingDef == UseAccess || EarlierAccess == UseAccess) { + if (KillingDef == UseAccess || EarlierAccess == UseAccess) { LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); continue; } @@ -2181,23 +2181,23 @@ struct DSEState { // the original location. Otherwise we have to check uses of *all* // MemoryDefs we discover, including non-aliasing ones. Otherwise we might // miss cases like the following - // 1 = Def(LoE) ; <----- EarlierDef stores [0,1] + // 1 = Def(LoE) ; <----- EarlierDef stores [0,1] // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. // (The Use points to the *first* Def it may alias) // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) { - if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) { - if (!isInvisibleToCallerAfterRet(DefUO) && - UseAccess != EarlierAccess) { + if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) { + if (!isInvisibleToCallerAfterRet(DefUO) && + UseAccess != EarlierAccess) { BasicBlock *MaybeKillingBlock = UseInst->getParent(); if (PostOrderNumbers.find(MaybeKillingBlock)->second < - PostOrderNumbers.find(EarlierAccess->getBlock())->second) { + PostOrderNumbers.find(EarlierAccess->getBlock())->second) { - LLVM_DEBUG(dbgs() - << " ... found killing def " << *UseInst << "\n"); - KillingDefs.insert(UseInst); + LLVM_DEBUG(dbgs() + << " ... found killing def " << *UseInst << "\n"); + KillingDefs.insert(UseInst); } } } else @@ -2206,15 +2206,15 @@ struct DSEState { } // For accesses to locations visible after the function returns, make sure - // that the location is killed (=overwritten) along all paths from - // EarlierAccess to the exit. - if (!isInvisibleToCallerAfterRet(DefUO)) { - SmallPtrSet<BasicBlock *, 16> KillingBlocks; - for (Instruction *KD : KillingDefs) - KillingBlocks.insert(KD->getParent()); + // that the location is killed (=overwritten) along all paths from + // EarlierAccess to the exit. + if (!isInvisibleToCallerAfterRet(DefUO)) { + SmallPtrSet<BasicBlock *, 16> KillingBlocks; + for (Instruction *KD : KillingDefs) + KillingBlocks.insert(KD->getParent()); assert(!KillingBlocks.empty() && "Expected at least a single killing block"); - + // Find the common post-dominator of all killing blocks. BasicBlock *CommonPred = *KillingBlocks.begin(); for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end(); @@ -2225,17 +2225,17 @@ struct DSEState { } // If CommonPred is in the set of killing blocks, just check if it - // post-dominates EarlierAccess. + // post-dominates EarlierAccess. if (KillingBlocks.count(CommonPred)) { - if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) - return {EarlierAccess}; + if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) + return {EarlierAccess}; return None; } - // If the common post-dominator does not post-dominate EarlierAccess, - // there is a path from EarlierAccess to an exit not going through a - // killing block. - if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) { + // If the common post-dominator does not post-dominate EarlierAccess, + // there is a path from EarlierAccess to an exit not going through a + // killing block. + if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) { SetVector<BasicBlock *> WorkList; // If CommonPred is null, there are multiple exits from the function. @@ -2248,17 +2248,17 @@ struct DSEState { NumCFGTries++; // Check if all paths starting from an exit node go through one of the - // killing blocks before reaching EarlierAccess. + // killing blocks before reaching EarlierAccess. for (unsigned I = 0; I < WorkList.size(); I++) { NumCFGChecks++; BasicBlock *Current = WorkList[I]; if (KillingBlocks.count(Current)) continue; - if (Current == EarlierAccess->getBlock()) + if (Current == EarlierAccess->getBlock()) return None; - // EarlierAccess is reachable from the entry, so we don't have to - // explore unreachable blocks further. + // EarlierAccess is reachable from the entry, so we don't have to + // explore unreachable blocks further. if (!DT.isReachableFromEntry(Current)) continue; @@ -2269,14 +2269,14 @@ struct DSEState { return None; } NumCFGSuccess++; - return {EarlierAccess}; + return {EarlierAccess}; } return None; } - // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is - // potentially dead. - return {EarlierAccess}; + // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is + // potentially dead. + return {EarlierAccess}; } // Delete dead memory defs @@ -2321,11 +2321,11 @@ struct DSEState { // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may // throw are handled during the walk from one def to the next. bool mayThrowBetween(Instruction *SI, Instruction *NI, - const Value *SILocUnd) { + const Value *SILocUnd) { // First see if we can ignore it by using the fact that SI is an // alloca/alloca like object that is not visible to the caller during // execution of the function. - if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd)) + if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd)) return false; if (SI->getParent() == NI->getParent()) @@ -2338,10 +2338,10 @@ struct DSEState { // * A memory instruction that may throw and \p SI accesses a non-stack // object. // * Atomic stores stronger that monotonic. - bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) { + bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) { // If NI may throw it acts as a barrier, unless we are to an alloca/alloca // like object that does not escape. - if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd)) + if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd)) return true; // If NI is an atomic load/store stronger than monotonic, do not try to @@ -2351,11 +2351,11 @@ struct DSEState { return isStrongerThanMonotonic(LI->getOrdering()); if (auto *SI = dyn_cast<StoreInst>(NI)) return isStrongerThanMonotonic(SI->getOrdering()); - if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI)) - return isStrongerThanMonotonic(ARMW->getOrdering()); - if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI)) - return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) || - isStrongerThanMonotonic(CmpXchg->getFailureOrdering()); + if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI)) + return isStrongerThanMonotonic(ARMW->getOrdering()); + if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI)) + return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) || + isStrongerThanMonotonic(CmpXchg->getFailureOrdering()); llvm_unreachable("other instructions should be skipped in MemorySSA"); } return false; @@ -2370,31 +2370,31 @@ struct DSEState { << "Trying to eliminate MemoryDefs at the end of the function\n"); for (int I = MemDefs.size() - 1; I >= 0; I--) { MemoryDef *Def = MemDefs[I]; - if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst())) - continue; - - Instruction *DefI = Def->getMemoryInst(); - SmallVector<const Value *, 4> Pointers; - auto DefLoc = getLocForWriteEx(DefI); - if (!DefLoc) - continue; - - // NOTE: Currently eliminating writes at the end of a function is limited - // to MemoryDefs with a single underlying object, to save compile-time. In - // practice it appears the case with multiple underlying objects is very - // uncommon. If it turns out to be important, we can use - // getUnderlyingObjects here instead. - const Value *UO = getUnderlyingObject(DefLoc->Ptr); - if (!UO || !isInvisibleToCallerAfterRet(UO)) + if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst())) continue; + Instruction *DefI = Def->getMemoryInst(); + SmallVector<const Value *, 4> Pointers; + auto DefLoc = getLocForWriteEx(DefI); + if (!DefLoc) + continue; + + // NOTE: Currently eliminating writes at the end of a function is limited + // to MemoryDefs with a single underlying object, to save compile-time. In + // practice it appears the case with multiple underlying objects is very + // uncommon. If it turns out to be important, we can use + // getUnderlyingObjects here instead. + const Value *UO = getUnderlyingObject(DefLoc->Ptr); + if (!UO || !isInvisibleToCallerAfterRet(UO)) + continue; + if (isWriteAtEndOfFunction(Def)) { // See through pointer-to-pointer bitcasts LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " "of the function\n"); - deleteDeadInstruction(DefI); - ++NumFastStores; - MadeChange = true; + deleteDeadInstruction(DefI); + ++NumFastStores; + MadeChange = true; } } return MadeChange; @@ -2402,53 +2402,53 @@ struct DSEState { /// \returns true if \p Def is a no-op store, either because it /// directly stores back a loaded value or stores zero to a calloced object. - bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc, - const Value *DefUO) { + bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc, + const Value *DefUO) { StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst()); if (!Store) return false; if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) { if (LoadI->getPointerOperand() == Store->getOperand(1)) { - // Get the defining access for the load. + // Get the defining access for the load. auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess(); - // Fast path: the defining accesses are the same. - if (LoadAccess == Def->getDefiningAccess()) - return true; - - // Look through phi accesses. Recursively scan all phi accesses by - // adding them to a worklist. Bail when we run into a memory def that - // does not match LoadAccess. - SetVector<MemoryAccess *> ToCheck; - MemoryAccess *Current = - MSSA.getWalker()->getClobberingMemoryAccess(Def); - // We don't want to bail when we run into the store memory def. But, - // the phi access may point to it. So, pretend like we've already - // checked it. - ToCheck.insert(Def); - ToCheck.insert(Current); - // Start at current (1) to simulate already having checked Def. - for (unsigned I = 1; I < ToCheck.size(); ++I) { - Current = ToCheck[I]; - if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) { - // Check all the operands. - for (auto &Use : PhiAccess->incoming_values()) - ToCheck.insert(cast<MemoryAccess>(&Use)); - continue; - } - - // If we found a memory def, bail. This happens when we have an - // unrelated write in between an otherwise noop store. - assert(isa<MemoryDef>(Current) && - "Only MemoryDefs should reach here."); - // TODO: Skip no alias MemoryDefs that have no aliasing reads. - // We are searching for the definition of the store's destination. - // So, if that is the same definition as the load, then this is a - // noop. Otherwise, fail. - if (LoadAccess != Current) - return false; - } - return true; + // Fast path: the defining accesses are the same. + if (LoadAccess == Def->getDefiningAccess()) + return true; + + // Look through phi accesses. Recursively scan all phi accesses by + // adding them to a worklist. Bail when we run into a memory def that + // does not match LoadAccess. + SetVector<MemoryAccess *> ToCheck; + MemoryAccess *Current = + MSSA.getWalker()->getClobberingMemoryAccess(Def); + // We don't want to bail when we run into the store memory def. But, + // the phi access may point to it. So, pretend like we've already + // checked it. + ToCheck.insert(Def); + ToCheck.insert(Current); + // Start at current (1) to simulate already having checked Def. + for (unsigned I = 1; I < ToCheck.size(); ++I) { + Current = ToCheck[I]; + if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) { + // Check all the operands. + for (auto &Use : PhiAccess->incoming_values()) + ToCheck.insert(cast<MemoryAccess>(&Use)); + continue; + } + + // If we found a memory def, bail. This happens when we have an + // unrelated write in between an otherwise noop store. + assert(isa<MemoryDef>(Current) && + "Only MemoryDefs should reach here."); + // TODO: Skip no alias MemoryDefs that have no aliasing reads. + // We are searching for the definition of the store's destination. + // So, if that is the same definition as the load, then this is a + // noop. Otherwise, fail. + if (LoadAccess != Current) + return false; + } + return true; } } @@ -2482,7 +2482,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, continue; Instruction *SI = KillingDef->getMemoryInst(); - Optional<MemoryLocation> MaybeSILoc; + Optional<MemoryLocation> MaybeSILoc; if (State.isMemTerminatorInst(SI)) MaybeSILoc = State.getLocForTerminator(SI).map( [](const std::pair<MemoryLocation, bool> &P) { return P.first; }); @@ -2496,23 +2496,23 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, } MemoryLocation SILoc = *MaybeSILoc; assert(SILoc.Ptr && "SILoc should not be null"); - const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr); + const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr); MemoryAccess *Current = KillingDef; LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " << *KillingDef << " (" << *SI << ")\n"); - unsigned ScanLimit = MemorySSAScanLimit; - unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit; - unsigned PartialLimit = MemorySSAPartialStoreLimit; + unsigned ScanLimit = MemorySSAScanLimit; + unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit; + unsigned PartialLimit = MemorySSAPartialStoreLimit; // Worklist of MemoryAccesses that may be killed by KillingDef. SetVector<MemoryAccess *> ToCheck; - if (SILocUnd) - ToCheck.insert(KillingDef->getDefiningAccess()); - - bool Shortend = false; - bool IsMemTerm = State.isMemTerminatorInst(SI); + if (SILocUnd) + ToCheck.insert(KillingDef->getDefiningAccess()); + + bool Shortend = false; + bool IsMemTerm = State.isMemTerminatorInst(SI); // Check if MemoryAccesses in the worklist are killed by KillingDef. for (unsigned I = 0; I < ToCheck.size(); I++) { Current = ToCheck[I]; @@ -2520,22 +2520,22 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, continue; Optional<MemoryAccess *> Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit, - IsMemTerm, PartialLimit); + KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit, + IsMemTerm, PartialLimit); if (!Next) { LLVM_DEBUG(dbgs() << " finished walk\n"); continue; } - MemoryAccess *EarlierAccess = *Next; - LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess); - if (isa<MemoryPhi>(EarlierAccess)) { + MemoryAccess *EarlierAccess = *Next; + LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess); + if (isa<MemoryPhi>(EarlierAccess)) { LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); - for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) { + for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) { MemoryAccess *IncomingAccess = cast<MemoryAccess>(V); BasicBlock *IncomingBlock = IncomingAccess->getBlock(); - BasicBlock *PhiBlock = EarlierAccess->getBlock(); + BasicBlock *PhiBlock = EarlierAccess->getBlock(); // We only consider incoming MemoryAccesses that come before the // MemoryPhi. Otherwise we could discover candidates that do not @@ -2546,20 +2546,20 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, } continue; } - auto *NextDef = cast<MemoryDef>(EarlierAccess); + auto *NextDef = cast<MemoryDef>(EarlierAccess); Instruction *NI = NextDef->getMemoryInst(); LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); ToCheck.insert(NextDef->getDefiningAccess()); - NumGetDomMemoryDefPassed++; + NumGetDomMemoryDefPassed++; if (!DebugCounter::shouldExecute(MemorySSACounter)) continue; MemoryLocation NILoc = *State.getLocForWriteEx(NI); - if (IsMemTerm) { - const Value *NIUnd = getUnderlyingObject(NILoc.Ptr); - if (SILocUnd != NIUnd) + if (IsMemTerm) { + const Value *NIUnd = getUnderlyingObject(NILoc.Ptr); + if (SILocUnd != NIUnd) continue; LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI << "\n KILLER: " << *SI << '\n'); @@ -2569,43 +2569,43 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, } else { // Check if NI overwrites SI. int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = - isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, - InstWriteOffset, State.BatchAA, &F); - if (OR == OW_MaybePartial) { - auto Iter = State.IOLs.insert( - std::make_pair<BasicBlock *, InstOverlapIntervalsTy>( - NI->getParent(), InstOverlapIntervalsTy())); - auto &IOL = Iter.first->second; - OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset, - NI, IOL); - } + OverwriteResult OR = + isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, + InstWriteOffset, State.BatchAA, &F); + if (OR == OW_MaybePartial) { + auto Iter = State.IOLs.insert( + std::make_pair<BasicBlock *, InstOverlapIntervalsTy>( + NI->getParent(), InstOverlapIntervalsTy())); + auto &IOL = Iter.first->second; + OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset, + NI, IOL); + } if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { auto *Earlier = dyn_cast<StoreInst>(NI); auto *Later = dyn_cast<StoreInst>(SI); - // We are re-using tryToMergePartialOverlappingStores, which requires - // Earlier to domiante Later. - // TODO: implement tryToMergeParialOverlappingStores using MemorySSA. - if (Earlier && Later && DT.dominates(Earlier, Later)) { - if (Constant *Merged = tryToMergePartialOverlappingStores( - Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL, - State.BatchAA, &DT)) { - - // Update stored value of earlier store to merged constant. - Earlier->setOperand(0, Merged); - ++NumModifiedStores; - MadeChange = true; - - Shortend = true; - // Remove later store and remove any outstanding overlap intervals - // for the updated store. - State.deleteDeadInstruction(Later); - auto I = State.IOLs.find(Earlier->getParent()); - if (I != State.IOLs.end()) - I->second.erase(Earlier); - break; - } + // We are re-using tryToMergePartialOverlappingStores, which requires + // Earlier to domiante Later. + // TODO: implement tryToMergeParialOverlappingStores using MemorySSA. + if (Earlier && Later && DT.dominates(Earlier, Later)) { + if (Constant *Merged = tryToMergePartialOverlappingStores( + Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL, + State.BatchAA, &DT)) { + + // Update stored value of earlier store to merged constant. + Earlier->setOperand(0, Merged); + ++NumModifiedStores; + MadeChange = true; + + Shortend = true; + // Remove later store and remove any outstanding overlap intervals + // for the updated store. + State.deleteDeadInstruction(Later); + auto I = State.IOLs.find(Earlier->getParent()); + if (I != State.IOLs.end()) + I->second.erase(Earlier); + break; + } } } @@ -2618,21 +2618,21 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, } } } - - // Check if the store is a no-op. - if (!Shortend && isRemovable(SI) && - State.storeIsNoop(KillingDef, SILoc, SILocUnd)) { - LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n'); - State.deleteDeadInstruction(SI); - NumRedundantStores++; - MadeChange = true; - continue; - } + + // Check if the store is a no-op. + if (!Shortend && isRemovable(SI) && + State.storeIsNoop(KillingDef, SILoc, SILocUnd)) { + LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n'); + State.deleteDeadInstruction(SI); + NumRedundantStores++; + MadeChange = true; + continue; + } } if (EnablePartialOverwriteTracking) for (auto &KV : State.IOLs) - MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI); + MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI); MadeChange |= State.eliminateDeadWritesAtEndOfFunction(); return MadeChange; diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp index 3c6c444d66..10cf0580f8 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp @@ -151,8 +151,8 @@ static DivRemWorklistTy getWorklist(Function &F) { // rare than division. for (auto &RemPair : RemMap) { // Find the matching division instruction from the division map. - auto It = DivMap.find(RemPair.first); - if (It == DivMap.end()) + auto It = DivMap.find(RemPair.first); + if (It == DivMap.end()) continue; // We have a matching pair of div/rem instructions. @@ -160,7 +160,7 @@ static DivRemWorklistTy getWorklist(Function &F) { Instruction *RemInst = RemPair.second; // Place it in the worklist. - Worklist.emplace_back(It->second, RemInst); + Worklist.emplace_back(It->second, RemInst); } return Worklist; @@ -315,14 +315,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // %rem = sub %x, %mul // %rem = undef - undef = undef // If X is not frozen, %rem becomes undef after transformation. // TODO: We need a undef-specific checking function in ValueTracking - if (!isGuaranteedNotToBeUndefOrPoison(X, nullptr, DivInst, &DT)) { + if (!isGuaranteedNotToBeUndefOrPoison(X, nullptr, DivInst, &DT)) { auto *FrX = new FreezeInst(X, X->getName() + ".frozen", DivInst); DivInst->setOperand(0, FrX); Sub->setOperand(0, FrX); } // Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0, // but %rem in tgt can be one of many integer values. - if (!isGuaranteedNotToBeUndefOrPoison(Y, nullptr, DivInst, &DT)) { + if (!isGuaranteedNotToBeUndefOrPoison(Y, nullptr, DivInst, &DT)) { auto *FrY = new FreezeInst(Y, Y->getName() + ".frozen", DivInst); DivInst->setOperand(1, FrY); Mul->setOperand(1, FrY); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp index 180a82917f..dc144ff173 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp @@ -154,7 +154,7 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, std::swap(A, B); } - // Match canonical forms of min/max. We are not using ValueTracking's + // Match canonical forms of min/max. We are not using ValueTracking's // more powerful matchSelectPattern() because it may rely on instruction flags // such as "nsw". That would be incompatible with the current hashing // mechanism that may remove flags to increase the likelihood of CSE. @@ -176,11 +176,11 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break; case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break; case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break; - // Non-strict inequalities. - case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break; - case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break; - case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break; - case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break; + // Non-strict inequalities. + case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break; + case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break; + case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break; + case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break; default: break; } @@ -219,7 +219,7 @@ static unsigned getHashValueImpl(SimpleValue Val) { SelectPatternFlavor SPF; Value *Cond, *A, *B; if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) { - // Hash min/max (cmp + select) to allow for commuted operands. + // Hash min/max (cmp + select) to allow for commuted operands. // Min/max may also have non-canonical compare predicate (eg, the compare for // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the // compare. @@ -269,17 +269,17 @@ static unsigned getHashValueImpl(SimpleValue Val) { isa<FreezeInst>(Inst)) && "Invalid/unknown instruction"); - // Handle intrinsics with commutative operands. - // TODO: Extend this to handle intrinsics with >2 operands where the 1st - // 2 operands are commutative. - auto *II = dyn_cast<IntrinsicInst>(Inst); - if (II && II->isCommutative() && II->getNumArgOperands() == 2) { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (LHS > RHS) - std::swap(LHS, RHS); - return hash_combine(II->getOpcode(), LHS, RHS); - } - + // Handle intrinsics with commutative operands. + // TODO: Extend this to handle intrinsics with >2 operands where the 1st + // 2 operands are commutative. + auto *II = dyn_cast<IntrinsicInst>(Inst); + if (II && II->isCommutative() && II->getNumArgOperands() == 2) { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + if (LHS > RHS) + std::swap(LHS, RHS); + return hash_combine(II->getOpcode(), LHS, RHS); + } + // Mix in the opcode. return hash_combine( Inst->getOpcode(), @@ -332,16 +332,16 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) { LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate(); } - // TODO: Extend this for >2 args by matching the trailing N-2 args. - auto *LII = dyn_cast<IntrinsicInst>(LHSI); - auto *RII = dyn_cast<IntrinsicInst>(RHSI); - if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() && - LII->isCommutative() && LII->getNumArgOperands() == 2) { - return LII->getArgOperand(0) == RII->getArgOperand(1) && - LII->getArgOperand(1) == RII->getArgOperand(0); - } - - // Min/max can occur with commuted operands, non-canonical predicates, + // TODO: Extend this for >2 args by matching the trailing N-2 args. + auto *LII = dyn_cast<IntrinsicInst>(LHSI); + auto *RII = dyn_cast<IntrinsicInst>(RHSI); + if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() && + LII->isCommutative() && LII->getNumArgOperands() == 2) { + return LII->getArgOperand(0) == RII->getArgOperand(1) && + LII->getArgOperand(1) == RII->getArgOperand(0); + } + + // Min/max can occur with commuted operands, non-canonical predicates, // and/or non-canonical operands. // Selects can be non-trivially equivalent via inverted conditions and swaps. SelectPatternFlavor LSPF, RSPF; @@ -372,7 +372,7 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) { // This intentionally does NOT handle patterns with a double-negation in // the sense of not + not, because doing so could result in values // comparing - // as equal that hash differently in the min/max cases like: + // as equal that hash differently in the min/max cases like: // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y // ^ hashes as min ^ would not hash as min // In the context of the EarlyCSE pass, however, such cases never reach @@ -627,11 +627,11 @@ private: StackNode &operator=(const StackNode &) = delete; // Accessors. - unsigned currentGeneration() const { return CurrentGeneration; } - unsigned childGeneration() const { return ChildGeneration; } + unsigned currentGeneration() const { return CurrentGeneration; } + unsigned childGeneration() const { return ChildGeneration; } void childGeneration(unsigned generation) { ChildGeneration = generation; } DomTreeNode *node() { return Node; } - DomTreeNode::const_iterator childIter() const { return ChildIter; } + DomTreeNode::const_iterator childIter() const { return ChildIter; } DomTreeNode *nextChild() { DomTreeNode *child = *ChildIter; @@ -639,8 +639,8 @@ private: return child; } - DomTreeNode::const_iterator end() const { return EndIter; } - bool isProcessed() const { return Processed; } + DomTreeNode::const_iterator end() const { return EndIter; } + bool isProcessed() const { return Processed; } void process() { Processed = true; } private: @@ -659,60 +659,60 @@ private: public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) : Inst(Inst) { - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - IntrID = II->getIntrinsicID(); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + IntrID = II->getIntrinsicID(); if (TTI.getTgtMemIntrinsic(II, Info)) - return; - if (isHandledNonTargetIntrinsic(IntrID)) { - switch (IntrID) { - case Intrinsic::masked_load: - Info.PtrVal = Inst->getOperand(0); - Info.MatchingId = Intrinsic::masked_load; - Info.ReadMem = true; - Info.WriteMem = false; - Info.IsVolatile = false; - break; - case Intrinsic::masked_store: - Info.PtrVal = Inst->getOperand(1); - // Use the ID of masked load as the "matching id". This will - // prevent matching non-masked loads/stores with masked ones - // (which could be done), but at the moment, the code here - // does not support matching intrinsics with non-intrinsics, - // so keep the MatchingIds specific to masked instructions - // for now (TODO). - Info.MatchingId = Intrinsic::masked_load; - Info.ReadMem = false; - Info.WriteMem = true; - Info.IsVolatile = false; - break; - } - } - } + return; + if (isHandledNonTargetIntrinsic(IntrID)) { + switch (IntrID) { + case Intrinsic::masked_load: + Info.PtrVal = Inst->getOperand(0); + Info.MatchingId = Intrinsic::masked_load; + Info.ReadMem = true; + Info.WriteMem = false; + Info.IsVolatile = false; + break; + case Intrinsic::masked_store: + Info.PtrVal = Inst->getOperand(1); + // Use the ID of masked load as the "matching id". This will + // prevent matching non-masked loads/stores with masked ones + // (which could be done), but at the moment, the code here + // does not support matching intrinsics with non-intrinsics, + // so keep the MatchingIds specific to masked instructions + // for now (TODO). + Info.MatchingId = Intrinsic::masked_load; + Info.ReadMem = false; + Info.WriteMem = true; + Info.IsVolatile = false; + break; + } + } + } } - Instruction *get() { return Inst; } - const Instruction *get() const { return Inst; } - + Instruction *get() { return Inst; } + const Instruction *get() const { return Inst; } + bool isLoad() const { - if (IntrID != 0) - return Info.ReadMem; + if (IntrID != 0) + return Info.ReadMem; return isa<LoadInst>(Inst); } bool isStore() const { - if (IntrID != 0) - return Info.WriteMem; + if (IntrID != 0) + return Info.WriteMem; return isa<StoreInst>(Inst); } bool isAtomic() const { - if (IntrID != 0) + if (IntrID != 0) return Info.Ordering != AtomicOrdering::NotAtomic; return Inst->isAtomic(); } bool isUnordered() const { - if (IntrID != 0) + if (IntrID != 0) return Info.isUnordered(); if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { @@ -725,7 +725,7 @@ private: } bool isVolatile() const { - if (IntrID != 0) + if (IntrID != 0) return Info.IsVolatile; if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { @@ -750,85 +750,85 @@ private: // field in the MemIntrinsicInfo structure. That field contains // non-negative values only. int getMatchingId() const { - if (IntrID != 0) - return Info.MatchingId; + if (IntrID != 0) + return Info.MatchingId; return -1; } Value *getPointerOperand() const { - if (IntrID != 0) - return Info.PtrVal; + if (IntrID != 0) + return Info.PtrVal; return getLoadStorePointerOperand(Inst); } bool mayReadFromMemory() const { - if (IntrID != 0) - return Info.ReadMem; + if (IntrID != 0) + return Info.ReadMem; return Inst->mayReadFromMemory(); } bool mayWriteToMemory() const { - if (IntrID != 0) - return Info.WriteMem; + if (IntrID != 0) + return Info.WriteMem; return Inst->mayWriteToMemory(); } private: - Intrinsic::ID IntrID = 0; + Intrinsic::ID IntrID = 0; MemIntrinsicInfo Info; Instruction *Inst; }; - // This function is to prevent accidentally passing a non-target - // intrinsic ID to TargetTransformInfo. - static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) { - switch (ID) { - case Intrinsic::masked_load: - case Intrinsic::masked_store: - return true; - } - return false; - } - static bool isHandledNonTargetIntrinsic(const Value *V) { - if (auto *II = dyn_cast<IntrinsicInst>(V)) - return isHandledNonTargetIntrinsic(II->getIntrinsicID()); - return false; - } - + // This function is to prevent accidentally passing a non-target + // intrinsic ID to TargetTransformInfo. + static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::masked_load: + case Intrinsic::masked_store: + return true; + } + return false; + } + static bool isHandledNonTargetIntrinsic(const Value *V) { + if (auto *II = dyn_cast<IntrinsicInst>(V)) + return isHandledNonTargetIntrinsic(II->getIntrinsicID()); + return false; + } + bool processNode(DomTreeNode *Node); bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI, const BasicBlock *BB, const BasicBlock *Pred); - Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst, - unsigned CurrentGeneration); - - bool overridingStores(const ParseMemoryInst &Earlier, - const ParseMemoryInst &Later); - + Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst, + unsigned CurrentGeneration); + + bool overridingStores(const ParseMemoryInst &Earlier, + const ParseMemoryInst &Later); + Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const { if (auto *LI = dyn_cast<LoadInst>(Inst)) return LI; if (auto *SI = dyn_cast<StoreInst>(Inst)) return SI->getValueOperand(); assert(isa<IntrinsicInst>(Inst) && "Instruction not supported"); - auto *II = cast<IntrinsicInst>(Inst); - if (isHandledNonTargetIntrinsic(II->getIntrinsicID())) - return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType); - return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType); - } - - Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II, - Type *ExpectedType) const { - switch (II->getIntrinsicID()) { - case Intrinsic::masked_load: - return II; - case Intrinsic::masked_store: - return II->getOperand(0); - } - return nullptr; + auto *II = cast<IntrinsicInst>(Inst); + if (isHandledNonTargetIntrinsic(II->getIntrinsicID())) + return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType); + return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType); } + Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II, + Type *ExpectedType) const { + switch (II->getIntrinsicID()) { + case Intrinsic::masked_load: + return II; + case Intrinsic::masked_store: + return II->getOperand(0); + } + return nullptr; + } + /// Return true if the instruction is known to only operate on memory /// provably invariant in the given "generation". bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt); @@ -836,101 +836,101 @@ private: bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration, Instruction *EarlierInst, Instruction *LaterInst); - bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier, - const IntrinsicInst *Later) { - auto IsSubmask = [](const Value *Mask0, const Value *Mask1) { - // Is Mask0 a submask of Mask1? - if (Mask0 == Mask1) - return true; - if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1)) - return false; - auto *Vec0 = dyn_cast<ConstantVector>(Mask0); - auto *Vec1 = dyn_cast<ConstantVector>(Mask1); - if (!Vec0 || !Vec1) - return false; - assert(Vec0->getType() == Vec1->getType() && - "Masks should have the same type"); - for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) { - Constant *Elem0 = Vec0->getOperand(i); - Constant *Elem1 = Vec1->getOperand(i); - auto *Int0 = dyn_cast<ConstantInt>(Elem0); - if (Int0 && Int0->isZero()) - continue; - auto *Int1 = dyn_cast<ConstantInt>(Elem1); - if (Int1 && !Int1->isZero()) - continue; - if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1)) - return false; - if (Elem0 == Elem1) - continue; - return false; - } - return true; - }; - auto PtrOp = [](const IntrinsicInst *II) { - if (II->getIntrinsicID() == Intrinsic::masked_load) - return II->getOperand(0); - if (II->getIntrinsicID() == Intrinsic::masked_store) - return II->getOperand(1); - llvm_unreachable("Unexpected IntrinsicInst"); - }; - auto MaskOp = [](const IntrinsicInst *II) { - if (II->getIntrinsicID() == Intrinsic::masked_load) - return II->getOperand(2); - if (II->getIntrinsicID() == Intrinsic::masked_store) - return II->getOperand(3); - llvm_unreachable("Unexpected IntrinsicInst"); - }; - auto ThruOp = [](const IntrinsicInst *II) { - if (II->getIntrinsicID() == Intrinsic::masked_load) - return II->getOperand(3); - llvm_unreachable("Unexpected IntrinsicInst"); - }; - - if (PtrOp(Earlier) != PtrOp(Later)) - return false; - - Intrinsic::ID IDE = Earlier->getIntrinsicID(); - Intrinsic::ID IDL = Later->getIntrinsicID(); - // We could really use specific intrinsic classes for masked loads - // and stores in IntrinsicInst.h. - if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) { - // Trying to replace later masked load with the earlier one. - // Check that the pointers are the same, and - // - masks and pass-throughs are the same, or - // - replacee's pass-through is "undef" and replacer's mask is a - // super-set of the replacee's mask. - if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later)) - return true; - if (!isa<UndefValue>(ThruOp(Later))) - return false; - return IsSubmask(MaskOp(Later), MaskOp(Earlier)); - } - if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) { - // Trying to replace a load of a stored value with the store's value. - // Check that the pointers are the same, and - // - load's mask is a subset of store's mask, and - // - load's pass-through is "undef". - if (!IsSubmask(MaskOp(Later), MaskOp(Earlier))) - return false; - return isa<UndefValue>(ThruOp(Later)); - } - if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) { - // Trying to remove a store of the loaded value. - // Check that the pointers are the same, and - // - store's mask is a subset of the load's mask. - return IsSubmask(MaskOp(Later), MaskOp(Earlier)); - } - if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) { - // Trying to remove a dead store (earlier). - // Check that the pointers are the same, - // - the to-be-removed store's mask is a subset of the other store's - // mask. - return IsSubmask(MaskOp(Earlier), MaskOp(Later)); - } - return false; - } - + bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier, + const IntrinsicInst *Later) { + auto IsSubmask = [](const Value *Mask0, const Value *Mask1) { + // Is Mask0 a submask of Mask1? + if (Mask0 == Mask1) + return true; + if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1)) + return false; + auto *Vec0 = dyn_cast<ConstantVector>(Mask0); + auto *Vec1 = dyn_cast<ConstantVector>(Mask1); + if (!Vec0 || !Vec1) + return false; + assert(Vec0->getType() == Vec1->getType() && + "Masks should have the same type"); + for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) { + Constant *Elem0 = Vec0->getOperand(i); + Constant *Elem1 = Vec1->getOperand(i); + auto *Int0 = dyn_cast<ConstantInt>(Elem0); + if (Int0 && Int0->isZero()) + continue; + auto *Int1 = dyn_cast<ConstantInt>(Elem1); + if (Int1 && !Int1->isZero()) + continue; + if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1)) + return false; + if (Elem0 == Elem1) + continue; + return false; + } + return true; + }; + auto PtrOp = [](const IntrinsicInst *II) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return II->getOperand(0); + if (II->getIntrinsicID() == Intrinsic::masked_store) + return II->getOperand(1); + llvm_unreachable("Unexpected IntrinsicInst"); + }; + auto MaskOp = [](const IntrinsicInst *II) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return II->getOperand(2); + if (II->getIntrinsicID() == Intrinsic::masked_store) + return II->getOperand(3); + llvm_unreachable("Unexpected IntrinsicInst"); + }; + auto ThruOp = [](const IntrinsicInst *II) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return II->getOperand(3); + llvm_unreachable("Unexpected IntrinsicInst"); + }; + + if (PtrOp(Earlier) != PtrOp(Later)) + return false; + + Intrinsic::ID IDE = Earlier->getIntrinsicID(); + Intrinsic::ID IDL = Later->getIntrinsicID(); + // We could really use specific intrinsic classes for masked loads + // and stores in IntrinsicInst.h. + if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) { + // Trying to replace later masked load with the earlier one. + // Check that the pointers are the same, and + // - masks and pass-throughs are the same, or + // - replacee's pass-through is "undef" and replacer's mask is a + // super-set of the replacee's mask. + if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later)) + return true; + if (!isa<UndefValue>(ThruOp(Later))) + return false; + return IsSubmask(MaskOp(Later), MaskOp(Earlier)); + } + if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) { + // Trying to replace a load of a stored value with the store's value. + // Check that the pointers are the same, and + // - load's mask is a subset of store's mask, and + // - load's pass-through is "undef". + if (!IsSubmask(MaskOp(Later), MaskOp(Earlier))) + return false; + return isa<UndefValue>(ThruOp(Later)); + } + if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) { + // Trying to remove a store of the loaded value. + // Check that the pointers are the same, and + // - store's mask is a subset of the load's mask. + return IsSubmask(MaskOp(Later), MaskOp(Earlier)); + } + if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) { + // Trying to remove a dead store (earlier). + // Check that the pointers are the same, + // - the to-be-removed store's mask is a subset of the other store's + // mask. + return IsSubmask(MaskOp(Earlier), MaskOp(Later)); + } + return false; + } + void removeMSSA(Instruction &Inst) { if (!MSSA) return; @@ -1033,14 +1033,14 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst, auto *TorF = (BI->getSuccessor(0) == BB) ? ConstantInt::getTrue(BB->getContext()) : ConstantInt::getFalse(BB->getContext()); - auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS, - Value *&RHS) { - if (Opcode == Instruction::And && - match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) - return true; - else if (Opcode == Instruction::Or && - match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) - return true; + auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS, + Value *&RHS) { + if (Opcode == Instruction::And && + match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) + return true; + else if (Opcode == Instruction::Or && + match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) + return true; return false; }; // If the condition is AND operation, we can propagate its operands into the @@ -1071,9 +1071,9 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst, } } - Value *LHS, *RHS; - if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS)) - for (auto &Op : { LHS, RHS }) + Value *LHS, *RHS; + if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS)) + for (auto &Op : { LHS, RHS }) if (Instruction *OPI = dyn_cast<Instruction>(Op)) if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second) WorkList.push_back(OPI); @@ -1082,86 +1082,86 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst, return MadeChanges; } -Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst, - unsigned CurrentGeneration) { - if (InVal.DefInst == nullptr) - return nullptr; - if (InVal.MatchingId != MemInst.getMatchingId()) - return nullptr; - // We don't yet handle removing loads with ordering of any kind. - if (MemInst.isVolatile() || !MemInst.isUnordered()) - return nullptr; - // We can't replace an atomic load with one which isn't also atomic. - if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic()) - return nullptr; - // The value V returned from this function is used differently depending - // on whether MemInst is a load or a store. If it's a load, we will replace - // MemInst with V, if it's a store, we will check if V is the same as the - // available value. - bool MemInstMatching = !MemInst.isLoad(); - Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst; - Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get(); - - // For stores check the result values before checking memory generation - // (otherwise isSameMemGeneration may crash). - Value *Result = MemInst.isStore() - ? getOrCreateResult(Matching, Other->getType()) - : nullptr; - if (MemInst.isStore() && InVal.DefInst != Result) - return nullptr; - - // Deal with non-target memory intrinsics. - bool MatchingNTI = isHandledNonTargetIntrinsic(Matching); - bool OtherNTI = isHandledNonTargetIntrinsic(Other); - if (OtherNTI != MatchingNTI) - return nullptr; - if (OtherNTI && MatchingNTI) { - if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst), - cast<IntrinsicInst>(MemInst.get()))) - return nullptr; - } - - if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) && - !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst, - MemInst.get())) - return nullptr; - - if (!Result) - Result = getOrCreateResult(Matching, Other->getType()); - return Result; -} - -bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier, - const ParseMemoryInst &Later) { - // Can we remove Earlier store because of Later store? - - assert(Earlier.isUnordered() && !Earlier.isVolatile() && - "Violated invariant"); - if (Earlier.getPointerOperand() != Later.getPointerOperand()) - return false; - if (Earlier.getMatchingId() != Later.getMatchingId()) - return false; - // At the moment, we don't remove ordered stores, but do remove - // unordered atomic stores. There's no special requirement (for - // unordered atomics) about removing atomic stores only in favor of - // other atomic stores since we were going to execute the non-atomic - // one anyway and the atomic one might never have become visible. - if (!Earlier.isUnordered() || !Later.isUnordered()) - return false; - - // Deal with non-target memory intrinsics. - bool ENTI = isHandledNonTargetIntrinsic(Earlier.get()); - bool LNTI = isHandledNonTargetIntrinsic(Later.get()); - if (ENTI && LNTI) - return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()), - cast<IntrinsicInst>(Later.get())); - - // Because of the check above, at least one of them is false. - // For now disallow matching intrinsics with non-intrinsics, - // so assume that the stores match if neither is an intrinsic. - return ENTI == LNTI; -} - +Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst, + unsigned CurrentGeneration) { + if (InVal.DefInst == nullptr) + return nullptr; + if (InVal.MatchingId != MemInst.getMatchingId()) + return nullptr; + // We don't yet handle removing loads with ordering of any kind. + if (MemInst.isVolatile() || !MemInst.isUnordered()) + return nullptr; + // We can't replace an atomic load with one which isn't also atomic. + if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic()) + return nullptr; + // The value V returned from this function is used differently depending + // on whether MemInst is a load or a store. If it's a load, we will replace + // MemInst with V, if it's a store, we will check if V is the same as the + // available value. + bool MemInstMatching = !MemInst.isLoad(); + Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst; + Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get(); + + // For stores check the result values before checking memory generation + // (otherwise isSameMemGeneration may crash). + Value *Result = MemInst.isStore() + ? getOrCreateResult(Matching, Other->getType()) + : nullptr; + if (MemInst.isStore() && InVal.DefInst != Result) + return nullptr; + + // Deal with non-target memory intrinsics. + bool MatchingNTI = isHandledNonTargetIntrinsic(Matching); + bool OtherNTI = isHandledNonTargetIntrinsic(Other); + if (OtherNTI != MatchingNTI) + return nullptr; + if (OtherNTI && MatchingNTI) { + if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst), + cast<IntrinsicInst>(MemInst.get()))) + return nullptr; + } + + if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) && + !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst, + MemInst.get())) + return nullptr; + + if (!Result) + Result = getOrCreateResult(Matching, Other->getType()); + return Result; +} + +bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier, + const ParseMemoryInst &Later) { + // Can we remove Earlier store because of Later store? + + assert(Earlier.isUnordered() && !Earlier.isVolatile() && + "Violated invariant"); + if (Earlier.getPointerOperand() != Later.getPointerOperand()) + return false; + if (Earlier.getMatchingId() != Later.getMatchingId()) + return false; + // At the moment, we don't remove ordered stores, but do remove + // unordered atomic stores. There's no special requirement (for + // unordered atomics) about removing atomic stores only in favor of + // other atomic stores since we were going to execute the non-atomic + // one anyway and the atomic one might never have become visible. + if (!Earlier.isUnordered() || !Later.isUnordered()) + return false; + + // Deal with non-target memory intrinsics. + bool ENTI = isHandledNonTargetIntrinsic(Earlier.get()); + bool LNTI = isHandledNonTargetIntrinsic(Later.get()); + if (ENTI && LNTI) + return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()), + cast<IntrinsicInst>(Later.get())); + + // Because of the check above, at least one of them is false. + // For now disallow matching intrinsics with non-intrinsics, + // so assume that the stores match if neither is an intrinsic. + return ENTI == LNTI; +} + bool EarlyCSE::processNode(DomTreeNode *Node) { bool Changed = false; BasicBlock *BB = Node->getBlock(); @@ -1232,14 +1232,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } - // Likewise, noalias intrinsics don't actually write. - if (match(&Inst, - m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) { - LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst - << '\n'); - continue; - } - + // Likewise, noalias intrinsics don't actually write. + if (match(&Inst, + m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) { + LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst + << '\n'); + continue; + } + // Skip sideeffect intrinsics, for the same reason as assume intrinsics. if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) { LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n'); @@ -1386,21 +1386,21 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // we can assume the current load loads the same value as the dominating // load. LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); - if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) { - LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst - << " to: " << *InVal.DefInst << '\n'); - if (!DebugCounter::shouldExecute(CSECounter)) { - LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) { + LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst + << " to: " << *InVal.DefInst << '\n'); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); continue; } - if (!Inst.use_empty()) - Inst.replaceAllUsesWith(Op); - salvageKnowledge(&Inst, &AC); - removeMSSA(Inst); - Inst.eraseFromParent(); - Changed = true; - ++NumCSELoad; - continue; + if (!Inst.use_empty()) + Inst.replaceAllUsesWith(Op); + salvageKnowledge(&Inst, &AC); + removeMSSA(Inst); + Inst.eraseFromParent(); + Changed = true; + ++NumCSELoad; + continue; } // Otherwise, remember that we have this instruction. @@ -1470,7 +1470,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (MemInst.isValid() && MemInst.isStore()) { LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.DefInst && - InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) { + InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) { // It is okay to have a LastStore to a different pointer here if MemorySSA // tells us that the load and store are from the same memory generation. // In that case, LastStore should keep its present value since we're @@ -1506,7 +1506,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // We do a trivial form of DSE if there are two stores to the same // location with no intervening loads. Delete the earlier store. if (LastStore) { - if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) { + if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) { LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " << Inst << '\n'); if (!DebugCounter::shouldExecute(CSECounter)) { @@ -1667,7 +1667,7 @@ public: AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); if (UseMemorySSA) { - AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MemorySSAWrapperPass>(); AU.addPreserved<MemorySSAWrapperPass>(); } @@ -1709,7 +1709,7 @@ INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa", "Early CSE w/ MemorySSA", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp index e54a270fb2..ab88f253c6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -12,7 +12,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp index c6b6d75aef..a0e7dec90f 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp @@ -26,7 +26,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" @@ -36,8 +36,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -99,33 +99,33 @@ STATISTIC(NumGVNSimpl, "Number of instructions simplified"); STATISTIC(NumGVNEqProp, "Number of equalities propagated"); STATISTIC(NumPRELoad, "Number of loads PRE'd"); -STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax, - "Number of blocks speculated as available in " - "IsValueFullyAvailableInBlock(), max"); -STATISTIC(MaxBBSpeculationCutoffReachedTimes, - "Number of times we we reached gvn-max-block-speculations cut-off " - "preventing further exploration"); - +STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax, + "Number of blocks speculated as available in " + "IsValueFullyAvailableInBlock(), max"); +STATISTIC(MaxBBSpeculationCutoffReachedTimes, + "Number of times we we reached gvn-max-block-speculations cut-off " + "preventing further exploration"); + static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true)); static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre", cl::init(true)); -static cl::opt<bool> -GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", - cl::init(true)); +static cl::opt<bool> +GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", + cl::init(true)); static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true)); static cl::opt<uint32_t> MaxNumDeps( "gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("Max number of dependences to attempt Load PRE (default = 100)")); -// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat. -static cl::opt<uint32_t> MaxBBSpeculations( - "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore, - cl::desc("Max number of blocks we're willing to speculate on (and recurse " - "into) when deducing if a value is fully available or not in GVN " - "(default = 600)")); - +// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat. +static cl::opt<uint32_t> MaxBBSpeculations( + "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore, + cl::desc("Max number of blocks we're willing to speculate on (and recurse " + "into) when deducing if a value is fully available or not in GVN " + "(default = 600)")); + struct llvm::GVN::Expression { uint32_t opcode; bool commutative = false; @@ -295,9 +295,9 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { if (I->isCommutative()) { // Ensure that commutative instructions that only differ by a permutation // of their operands get the same value number by sorting the operand value - // numbers. Since commutative operands are the 1st two operands it is more + // numbers. Since commutative operands are the 1st two operands it is more // efficient to sort by hand rather than using, say, std::sort. - assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!"); + assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); e.commutative = true; @@ -366,7 +366,7 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) { OI != OE; ++OI) e.varargs.push_back(lookupOrAdd(*OI)); - append_range(e.varargs, EI->indices()); + append_range(e.varargs, EI->indices()); return e; } @@ -410,12 +410,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { } if (local_dep.isDef()) { - // For masked load/store intrinsics, the local_dep may actully be - // a normal load or store instruction. - CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst()); + // For masked load/store intrinsics, the local_dep may actully be + // a normal load or store instruction. + CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst()); - if (!local_cdep || - local_cdep->getNumArgOperands() != C->getNumArgOperands()) { + if (!local_cdep || + local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } @@ -640,11 +640,11 @@ bool GVN::isLoadInLoopPREEnabled() const { return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE); } -bool GVN::isLoadPRESplitBackedgeEnabled() const { - return Options.AllowLoadPRESplitBackedge.getValueOr( - GVNEnableSplitBackedgeInLoadPRE); -} - +bool GVN::isLoadPRESplitBackedgeEnabled() const { + return Options.AllowLoadPRESplitBackedge.getValueOr( + GVNEnableSplitBackedgeInLoadPRE); +} + bool GVN::isMemDepEnabled() const { return Options.AllowMemDep.getValueOr(GVNEnableMemDep); } @@ -661,18 +661,18 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { auto *MemDep = isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr; auto *LI = AM.getCachedResult<LoopAnalysis>(F); - auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F); + auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, - MSSA ? &MSSA->getMSSA() : nullptr); + bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, + MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve<DominatorTreeAnalysis>(); PA.preserve<GlobalsAA>(); PA.preserve<TargetLibraryAnalysis>(); - if (MSSA) - PA.preserve<MemorySSAAnalysis>(); + if (MSSA) + PA.preserve<MemorySSAAnalysis>(); if (LI) PA.preserve<LoopAnalysis>(); return PA; @@ -690,18 +690,18 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const { } #endif -enum class AvailabilityState : char { - /// We know the block *is not* fully available. This is a fixpoint. - Unavailable = 0, - /// We know the block *is* fully available. This is a fixpoint. - Available = 1, - /// We do not know whether the block is fully available or not, - /// but we are currently speculating that it will be. - /// If it would have turned out that the block was, in fact, not fully - /// available, this would have been cleaned up into an Unavailable. - SpeculativelyAvailable = 2, -}; - +enum class AvailabilityState : char { + /// We know the block *is not* fully available. This is a fixpoint. + Unavailable = 0, + /// We know the block *is* fully available. This is a fixpoint. + Available = 1, + /// We do not know whether the block is fully available or not, + /// but we are currently speculating that it will be. + /// If it would have turned out that the block was, in fact, not fully + /// available, this would have been cleaned up into an Unavailable. + SpeculativelyAvailable = 2, +}; + /// Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep /// track of which blocks we know are fully alive in FullyAvailableBlocks. This @@ -710,118 +710,118 @@ enum class AvailabilityState : char { /// 1) we know the block *is* fully available. /// 2) we do not know whether the block is fully available or not, but we are /// currently speculating that it will be. -static bool IsValueFullyAvailableInBlock( - BasicBlock *BB, - DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) { - SmallVector<BasicBlock *, 32> Worklist; - Optional<BasicBlock *> UnavailableBB; - - // The number of times we didn't find an entry for a block in a map and - // optimistically inserted an entry marking block as speculatively available. - unsigned NumNewNewSpeculativelyAvailableBBs = 0; - -#ifndef NDEBUG - SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs; - SmallVector<BasicBlock *, 32> AvailableBBs; -#endif - - Worklist.emplace_back(BB); - while (!Worklist.empty()) { - BasicBlock *CurrBB = Worklist.pop_back_val(); // LIFO - depth-first! - // Optimistically assume that the block is Speculatively Available and check - // to see if we already know about this block in one lookup. - std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV = - FullyAvailableBlocks.try_emplace( - CurrBB, AvailabilityState::SpeculativelyAvailable); - AvailabilityState &State = IV.first->second; - - // Did the entry already exist for this block? - if (!IV.second) { - if (State == AvailabilityState::Unavailable) { - UnavailableBB = CurrBB; - break; // Backpropagate unavailability info. - } - -#ifndef NDEBUG - AvailableBBs.emplace_back(CurrBB); -#endif - continue; // Don't recurse further, but continue processing worklist. - } - - // No entry found for block. - ++NumNewNewSpeculativelyAvailableBBs; - bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations; - - // If we have exhausted our budget, mark this block as unavailable. - // Also, if this block has no predecessors, the value isn't live-in here. - if (OutOfBudget || pred_empty(CurrBB)) { - MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget; - State = AvailabilityState::Unavailable; - UnavailableBB = CurrBB; - break; // Backpropagate unavailability info. - } - - // Tentatively consider this block as speculatively available. -#ifndef NDEBUG - NewSpeculativelyAvailableBBs.insert(CurrBB); -#endif - // And further recurse into block's predecessors, in depth-first order! - Worklist.append(pred_begin(CurrBB), pred_end(CurrBB)); - } - -#if LLVM_ENABLE_STATS - IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax( - NumNewNewSpeculativelyAvailableBBs); -#endif - - // If the block isn't marked as fixpoint yet - // (the Unavailable and Available states are fixpoints) - auto MarkAsFixpointAndEnqueueSuccessors = - [&](BasicBlock *BB, AvailabilityState FixpointState) { - auto It = FullyAvailableBlocks.find(BB); - if (It == FullyAvailableBlocks.end()) - return; // Never queried this block, leave as-is. - switch (AvailabilityState &State = It->second) { - case AvailabilityState::Unavailable: - case AvailabilityState::Available: - return; // Don't backpropagate further, continue processing worklist. - case AvailabilityState::SpeculativelyAvailable: // Fix it! - State = FixpointState; -#ifndef NDEBUG - assert(NewSpeculativelyAvailableBBs.erase(BB) && - "Found a speculatively available successor leftover?"); -#endif - // Queue successors for further processing. - Worklist.append(succ_begin(BB), succ_end(BB)); - return; - } - }; - - if (UnavailableBB) { - // Okay, we have encountered an unavailable block. - // Mark speculatively available blocks reachable from UnavailableBB as - // unavailable as well. Paths are terminated when they reach blocks not in - // FullyAvailableBlocks or they are not marked as speculatively available. - Worklist.clear(); - Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB)); - while (!Worklist.empty()) - MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), - AvailabilityState::Unavailable); - } - -#ifndef NDEBUG - Worklist.clear(); - for (BasicBlock *AvailableBB : AvailableBBs) - Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB)); - while (!Worklist.empty()) - MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), - AvailabilityState::Available); - - assert(NewSpeculativelyAvailableBBs.empty() && - "Must have fixed all the new speculatively available blocks."); -#endif - - return !UnavailableBB; +static bool IsValueFullyAvailableInBlock( + BasicBlock *BB, + DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) { + SmallVector<BasicBlock *, 32> Worklist; + Optional<BasicBlock *> UnavailableBB; + + // The number of times we didn't find an entry for a block in a map and + // optimistically inserted an entry marking block as speculatively available. + unsigned NumNewNewSpeculativelyAvailableBBs = 0; + +#ifndef NDEBUG + SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs; + SmallVector<BasicBlock *, 32> AvailableBBs; +#endif + + Worklist.emplace_back(BB); + while (!Worklist.empty()) { + BasicBlock *CurrBB = Worklist.pop_back_val(); // LIFO - depth-first! + // Optimistically assume that the block is Speculatively Available and check + // to see if we already know about this block in one lookup. + std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV = + FullyAvailableBlocks.try_emplace( + CurrBB, AvailabilityState::SpeculativelyAvailable); + AvailabilityState &State = IV.first->second; + + // Did the entry already exist for this block? + if (!IV.second) { + if (State == AvailabilityState::Unavailable) { + UnavailableBB = CurrBB; + break; // Backpropagate unavailability info. + } + +#ifndef NDEBUG + AvailableBBs.emplace_back(CurrBB); +#endif + continue; // Don't recurse further, but continue processing worklist. + } + + // No entry found for block. + ++NumNewNewSpeculativelyAvailableBBs; + bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations; + + // If we have exhausted our budget, mark this block as unavailable. + // Also, if this block has no predecessors, the value isn't live-in here. + if (OutOfBudget || pred_empty(CurrBB)) { + MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget; + State = AvailabilityState::Unavailable; + UnavailableBB = CurrBB; + break; // Backpropagate unavailability info. + } + + // Tentatively consider this block as speculatively available. +#ifndef NDEBUG + NewSpeculativelyAvailableBBs.insert(CurrBB); +#endif + // And further recurse into block's predecessors, in depth-first order! + Worklist.append(pred_begin(CurrBB), pred_end(CurrBB)); + } + +#if LLVM_ENABLE_STATS + IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax( + NumNewNewSpeculativelyAvailableBBs); +#endif + + // If the block isn't marked as fixpoint yet + // (the Unavailable and Available states are fixpoints) + auto MarkAsFixpointAndEnqueueSuccessors = + [&](BasicBlock *BB, AvailabilityState FixpointState) { + auto It = FullyAvailableBlocks.find(BB); + if (It == FullyAvailableBlocks.end()) + return; // Never queried this block, leave as-is. + switch (AvailabilityState &State = It->second) { + case AvailabilityState::Unavailable: + case AvailabilityState::Available: + return; // Don't backpropagate further, continue processing worklist. + case AvailabilityState::SpeculativelyAvailable: // Fix it! + State = FixpointState; +#ifndef NDEBUG + assert(NewSpeculativelyAvailableBBs.erase(BB) && + "Found a speculatively available successor leftover?"); +#endif + // Queue successors for further processing. + Worklist.append(succ_begin(BB), succ_end(BB)); + return; + } + }; + + if (UnavailableBB) { + // Okay, we have encountered an unavailable block. + // Mark speculatively available blocks reachable from UnavailableBB as + // unavailable as well. Paths are terminated when they reach blocks not in + // FullyAvailableBlocks or they are not marked as speculatively available. + Worklist.clear(); + Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB)); + while (!Worklist.empty()) + MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), + AvailabilityState::Unavailable); + } + +#ifndef NDEBUG + Worklist.clear(); + for (BasicBlock *AvailableBB : AvailableBBs) + Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB)); + while (!Worklist.empty()) + MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), + AvailabilityState::Available); + + assert(NewSpeculativelyAvailableBBs.empty() && + "Must have fixed all the new speculatively available blocks."); +#endif + + return !UnavailableBB; } /// Given a set of loads specified by ValuesPerBlock, @@ -1040,7 +1040,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo, if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { // Reject loads and stores that are to the same address but are of - // different types if we have to. If the stored value is convertable to + // different types if we have to. If the stored value is convertable to // the loaded value, we can reuse it. if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(), DL)) @@ -1155,9 +1155,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // because if the index is out of bounds we should deoptimize rather than // access the array. // Check that there is no guard in this block above our instruction. - bool MustEnsureSafetyOfSpeculativeExecution = - ICF->isDominatedByICFIFromSameBlock(LI); - + bool MustEnsureSafetyOfSpeculativeExecution = + ICF->isDominatedByICFIFromSameBlock(LI); + while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1174,8 +1174,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; // Check that there is no implicit control flow in a block above. - MustEnsureSafetyOfSpeculativeExecution = - MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB); + MustEnsureSafetyOfSpeculativeExecution = + MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB); } assert(TmpBB); @@ -1184,11 +1184,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check to see how many predecessors have the loaded value fully // available. MapVector<BasicBlock *, Value *> PredLoads; - DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks; + DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks; for (const AvailableValueInBlock &AV : ValuesPerBlock) - FullyAvailableBlocks[AV.BB] = AvailabilityState::Available; + FullyAvailableBlocks[AV.BB] = AvailabilityState::Available; for (BasicBlock *UnavailableBB : UnavailableBlocks) - FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable; + FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable; SmallVector<BasicBlock *, 4> CriticalEdgePred; for (BasicBlock *Pred : predecessors(LoadBB)) { @@ -1201,7 +1201,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; } - if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { + if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { continue; } @@ -1228,16 +1228,16 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; } - // Do not split backedge as it will break the canonical loop form. - if (!isLoadPRESplitBackedgeEnabled()) - if (DT->dominates(LoadBB, Pred)) { - LLVM_DEBUG( - dbgs() - << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '" - << Pred->getName() << "': " << *LI << '\n'); - return false; - } - + // Do not split backedge as it will break the canonical loop form. + if (!isLoadPRESplitBackedgeEnabled()) + if (DT->dominates(LoadBB, Pred)) { + LLVM_DEBUG( + dbgs() + << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '" + << Pred->getName() << "': " << *LI << '\n'); + return false; + } + CriticalEdgePred.push_back(Pred); } else { // Only add the predecessors that will not be split for now. @@ -1257,17 +1257,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (NumUnavailablePreds != 1) return false; - // Now we know where we will insert load. We must ensure that it is safe - // to speculatively execute the load at that points. - if (MustEnsureSafetyOfSpeculativeExecution) { - if (CriticalEdgePred.size()) - if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT)) - return false; - for (auto &PL : PredLoads) - if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT)) - return false; - } - + // Now we know where we will insert load. We must ensure that it is safe + // to speculatively execute the load at that points. + if (MustEnsureSafetyOfSpeculativeExecution) { + if (CriticalEdgePred.size()) + if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT)) + return false; + for (auto &PL : PredLoads) + if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT)) + return false; + } + // Split critical edges, and update the unavailable predecessors accordingly. for (BasicBlock *OrigPred : CriticalEdgePred) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); @@ -1349,7 +1349,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Instructions that have been inserted in predecessor(s) to materialize // the load address do not retain their original debug locations. Doing // so could lead to confusing (but correct) source attributions. - I->updateLocationAfterHoist(); + I->updateLocationAfterHoist(); // FIXME: We really _ought_ to insert these value numbers into their // parent's availability map. However, in doing so, we risk getting into @@ -1367,22 +1367,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, LI->getAlign(), LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); NewLoad->setDebugLoc(LI->getDebugLoc()); - if (MSSAU) { - auto *MSSA = MSSAU->getMemorySSA(); - // Get the defining access of the original load or use the load if it is a - // MemoryDef (e.g. because it is volatile). The inserted loads are - // guaranteed to load from the same definition. - auto *LIAcc = MSSA->getMemoryAccess(LI); - auto *DefiningAcc = - isa<MemoryDef>(LIAcc) ? LIAcc : LIAcc->getDefiningAccess(); - auto *NewAccess = MSSAU->createMemoryAccessInBB( - NewLoad, DefiningAcc, NewLoad->getParent(), - MemorySSA::BeforeTerminator); - if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess)) - MSSAU->insertDef(NewDef, /*RenameUses=*/true); - else - MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true); - } + if (MSSAU) { + auto *MSSA = MSSAU->getMemorySSA(); + // Get the defining access of the original load or use the load if it is a + // MemoryDef (e.g. because it is volatile). The inserted loads are + // guaranteed to load from the same definition. + auto *LIAcc = MSSA->getMemoryAccess(LI); + auto *DefiningAcc = + isa<MemoryDef>(LIAcc) ? LIAcc : LIAcc->getDefiningAccess(); + auto *NewAccess = MSSAU->createMemoryAccessInBB( + NewLoad, DefiningAcc, NewLoad->getParent(), + MemorySSA::BeforeTerminator); + if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess)) + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + else + MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true); + } // Transfer the old load's AA tags to the new load. AAMDNodes Tags; @@ -1470,14 +1470,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return false; } - bool Changed = false; + bool Changed = false; // If this load follows a GEP, see if we can PRE the indices before analyzing. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) { for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(), OE = GEP->idx_end(); OI != OE; ++OI) if (Instruction *I = dyn_cast<Instruction>(OI->get())) - Changed |= performScalarPRE(I); + Changed |= performScalarPRE(I); } // Step 2: Analyze the availability of the load @@ -1488,7 +1488,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // If we have no predecessors that produce a known value for this load, exit // early. if (ValuesPerBlock.empty()) - return Changed; + return Changed; // Step 3: Eliminate fully redundancy. // @@ -1520,12 +1520,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // Step 4: Eliminate partial redundancy. if (!isPREEnabled() || !isLoadPREEnabled()) - return Changed; + return Changed; if (!isLoadInLoopPREEnabled() && this->LI && this->LI->getLoopFor(LI->getParent())) - return Changed; + return Changed; - return Changed || PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); + return Changed || PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); } static bool impliesEquivalanceIfTrue(CmpInst* Cmp) { @@ -1600,40 +1600,40 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. FIXME: We could insert unreachable // instruction directly because we can modify the CFG. - auto *NewS = new StoreInst(UndefValue::get(Int8Ty), - Constant::getNullValue(Int8Ty->getPointerTo()), - IntrinsicI); - if (MSSAU) { - const MemoryUseOrDef *FirstNonDom = nullptr; - const auto *AL = - MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent()); - - // If there are accesses in the current basic block, find the first one - // that does not come before NewS. The new memory access is inserted - // after the found access or before the terminator if no such access is - // found. - if (AL) { - for (auto &Acc : *AL) { - if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc)) - if (!Current->getMemoryInst()->comesBefore(NewS)) { - FirstNonDom = Current; - break; - } - } - } - - // This added store is to null, so it will never executed and we can - // just use the LiveOnEntry def as defining access. - auto *NewDef = - FirstNonDom ? MSSAU->createMemoryAccessBefore( - NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), - const_cast<MemoryUseOrDef *>(FirstNonDom)) - : MSSAU->createMemoryAccessInBB( - NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), - NewS->getParent(), MemorySSA::BeforeTerminator); - - MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false); - } + auto *NewS = new StoreInst(UndefValue::get(Int8Ty), + Constant::getNullValue(Int8Ty->getPointerTo()), + IntrinsicI); + if (MSSAU) { + const MemoryUseOrDef *FirstNonDom = nullptr; + const auto *AL = + MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent()); + + // If there are accesses in the current basic block, find the first one + // that does not come before NewS. The new memory access is inserted + // after the found access or before the terminator if no such access is + // found. + if (AL) { + for (auto &Acc : *AL) { + if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc)) + if (!Current->getMemoryInst()->comesBefore(NewS)) { + FirstNonDom = Current; + break; + } + } + } + + // This added store is to null, so it will never executed and we can + // just use the LiveOnEntry def as defining access. + auto *NewDef = + FirstNonDom ? MSSAU->createMemoryAccessBefore( + NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), + const_cast<MemoryUseOrDef *>(FirstNonDom)) + : MSSAU->createMemoryAccessInBB( + NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), + NewS->getParent(), MemorySSA::BeforeTerminator); + + MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false); + } } if (isAssumeWithEmptyBundle(*IntrinsicI)) markInstructionForDeletion(IntrinsicI); @@ -1661,11 +1661,11 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true ReplaceOperandsWithMap[V] = True; - // Similarly, after assume(!NotV) we know that NotV == false. - Value *NotV; - if (match(V, m_Not(m_Value(NotV)))) - ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext()); - + // Similarly, after assume(!NotV) we know that NotV == false. + Value *NotV; + if (match(V, m_Not(m_Value(NotV)))) + ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext()); + // If we find an equality fact, canonicalize all dominated uses in this block // to one of the two values. We heuristically choice the "oldest" of the // two where age is determined by value number. (Note that propagateEquality @@ -1772,8 +1772,8 @@ bool GVN::processLoad(LoadInst *L) { // Replace the load! patchAndReplaceAllUsesWith(L, AvailableValue); markInstructionForDeletion(L); - if (MSSAU) - MSSAU->removeMemoryAccess(L); + if (MSSAU) + MSSAU->removeMemoryAccess(L); ++NumGVNLoad; reportLoadElim(L, AvailableValue, ORE); // Tell MDA to rexamine the reused pointer since we might have more @@ -1895,7 +1895,7 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, } if (Exp.commutative) { - assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!"); + assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!"); if (Exp.varargs[0] > Exp.varargs[1]) { std::swap(Exp.varargs[0], Exp.varargs[1]); uint32_t Opcode = Exp.opcode >> 8; @@ -1918,8 +1918,8 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, /// again. void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num, const BasicBlock &CurrBlock) { - for (const BasicBlock *Pred : predecessors(&CurrBlock)) - PhiTranslateTable.erase({Num, Pred}); + for (const BasicBlock *Pred : predecessors(&CurrBlock)) + PhiTranslateTable.erase({Num, Pred}); } // In order to find a leader for a given value number at a @@ -2083,8 +2083,8 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, // If "A && B" is known true then both A and B are known true. If "A || B" // is known false then both A and B are known false. Value *A, *B; - if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) || - (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) { + if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) || + (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) { Worklist.push_back(std::make_pair(A, RHS)); Worklist.push_back(std::make_pair(B, RHS)); continue; @@ -2286,7 +2286,7 @@ bool GVN::processInstruction(Instruction *I) { bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { + OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); @@ -2299,8 +2299,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, VN.setMemDep(MD); ORE = RunORE; InvalidBlockRPONumbers = true; - MemorySSAUpdater Updater(MSSA); - MSSAU = MSSA ? &Updater : nullptr; + MemorySSAUpdater Updater(MSSA); + MSSAU = MSSA ? &Updater : nullptr; bool Changed = false; bool ShouldContinue = true; @@ -2311,7 +2311,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { BasicBlock *BB = &*FI++; - bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD); + bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD); if (removedBlock) ++NumGVNBlocks; @@ -2347,9 +2347,9 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // iteration. DeadBlocks.clear(); - if (MSSA && VerifyMemorySSA) - MSSA->verifyMemorySSA(); - + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + return Changed; } @@ -2390,8 +2390,8 @@ bool GVN::processBlock(BasicBlock *BB) { salvageKnowledge(I, AC); salvageDebugInfo(*I); if (MD) MD->removeInstruction(I); - if (MSSAU) - MSSAU->removeMemoryAccess(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); LLVM_DEBUG(verifyRemoved(I)); ICF->removeInstruction(I); I->eraseFromParent(); @@ -2479,14 +2479,14 @@ bool GVN::performScalarPRE(Instruction *CurInst) { if (isa<GetElementPtrInst>(CurInst)) return false; - if (auto *CallB = dyn_cast<CallBase>(CurInst)) { - // We don't currently value number ANY inline asm calls. + if (auto *CallB = dyn_cast<CallBase>(CurInst)) { + // We don't currently value number ANY inline asm calls. if (CallB->isInlineAsm()) return false; - // Don't do PRE on convergent calls. - if (CallB->isConvergent()) - return false; - } + // Don't do PRE on convergent calls. + if (CallB->isConvergent()) + return false; + } uint32_t ValNo = VN.lookup(CurInst); @@ -2626,8 +2626,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) { LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); - if (MSSAU) - MSSAU->removeMemoryAccess(CurInst); + if (MSSAU) + MSSAU->removeMemoryAccess(CurInst); LLVM_DEBUG(verifyRemoved(CurInst)); // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes // some assertion failures. @@ -2672,12 +2672,12 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { // possible. BasicBlock *BB = SplitCriticalEdge( Pred, Succ, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); - if (BB) { - if (MD) - MD->invalidateCachedPredecessors(); - InvalidBlockRPONumbers = true; - } + CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); + if (BB) { + if (MD) + MD->invalidateCachedPredecessors(); + InvalidBlockRPONumbers = true; + } return BB; } @@ -2686,20 +2686,20 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { bool GVN::splitCriticalEdges() { if (toSplit.empty()) return false; - - bool Changed = false; + + bool Changed = false; do { std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val(); - Changed |= SplitCriticalEdge(Edge.first, Edge.second, - CriticalEdgeSplittingOptions(DT, LI, MSSAU)) != - nullptr; + Changed |= SplitCriticalEdge(Edge.first, Edge.second, + CriticalEdgeSplittingOptions(DT, LI, MSSAU)) != + nullptr; } while (!toSplit.empty()); - if (Changed) { - if (MD) - MD->invalidateCachedPredecessors(); - InvalidBlockRPONumbers = true; - } - return Changed; + if (Changed) { + if (MD) + MD->invalidateCachedPredecessors(); + InvalidBlockRPONumbers = true; + } + return Changed; } /// Executes one iteration of GVN @@ -2803,12 +2803,12 @@ void GVN::addDeadBlock(BasicBlock *BB) { // First, split the critical edges. This might also create additional blocks // to preserve LoopSimplify form and adjust edges accordingly. - SmallVector<BasicBlock *, 4> Preds(predecessors(B)); + SmallVector<BasicBlock *, 4> Preds(predecessors(B)); for (BasicBlock *P : Preds) { if (!DeadBlocks.count(P)) continue; - if (llvm::is_contained(successors(P), B) && + if (llvm::is_contained(successors(P), B) && isCriticalEdge(P->getTerminator(), B)) { if (BasicBlock *S = splitCriticalEdges(P, B)) DeadBlocks.insert(P = S); @@ -2893,7 +2893,7 @@ public: auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); - auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); + auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); return Impl.runImpl( F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), getAnalysis<DominatorTreeWrapperPass>().getDomTree(), @@ -2903,8 +2903,8 @@ public: ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep() : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr, - &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), - MSSAWP ? &MSSAWP->getMSSA() : nullptr); + &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -2920,7 +2920,7 @@ public: AU.addPreserved<TargetLibraryInfoWrapperPass>(); AU.addPreserved<LoopInfoWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); } private: diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp index 8d0bd56749..14f438c2c8 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp @@ -242,14 +242,14 @@ public: }; static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { - static const unsigned KnownIDs[] = {LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - LLVMContext::MD_invariant_group, - LLVMContext::MD_access_group}; + static const unsigned KnownIDs[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_range, + LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_load, + LLVMContext::MD_invariant_group, + LLVMContext::MD_access_group}; combineMetadata(ReplInst, I, KnownIDs, true); } @@ -263,7 +263,7 @@ public: : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {} - bool run(Function &F); + bool run(Function &F); // Copied from NewGVN.cpp // This function provides global ranking of operations so that we can place @@ -271,7 +271,7 @@ public: // for a complete ordering, as constants all have the same rank. However, // generally, we will simplify an operation with all constants so that it // doesn't matter what order they appear in. - unsigned int rank(const Value *V) const; + unsigned int rank(const Value *V) const; private: GVN::ValueTable VN; @@ -291,7 +291,7 @@ private: enum InsKind { Unknown, Scalar, Load, Store }; // Return true when there are exception handling in BB. - bool hasEH(const BasicBlock *BB); + bool hasEH(const BasicBlock *BB); // Return true when I1 appears before I2 in the instructions of BB. bool firstInBB(const Instruction *I1, const Instruction *I2) { @@ -304,10 +304,10 @@ private: // Return true when there are memory uses of Def in BB. bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, - const BasicBlock *BB); + const BasicBlock *BB); bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB, - int &NBBsOnAllPaths); + int &NBBsOnAllPaths); // Return true when there are exception handling or loads of memory Def // between Def and NewPt. This function is only called for stores: Def is @@ -317,19 +317,19 @@ private: // return true when the counter NBBsOnAllPaths reaces 0, except when it is // initialized to -1 which is unlimited. bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, - int &NBBsOnAllPaths); + int &NBBsOnAllPaths); // Return true when there are exception handling between HoistPt and BB. // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and // return true when the counter NBBsOnAllPaths reaches 0, except when it is // initialized to -1 which is unlimited. bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, - int &NBBsOnAllPaths); + int &NBBsOnAllPaths); // Return true when it is safe to hoist a memory load or store U from OldPt // to NewPt. bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, - MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths); + MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths); // Return true when it is safe to hoist scalar instructions from all blocks in // WL to HoistBB. @@ -352,21 +352,21 @@ private: // Returns the edge via which an instruction in BB will get the values from. // Returns true when the values are flowing out to each edge. - bool valueAnticipable(CHIArgs C, Instruction *TI) const; + bool valueAnticipable(CHIArgs C, Instruction *TI) const; // Check if it is safe to hoist values tracked by CHI in the range // [Begin, End) and accumulate them in Safe. void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K, - SmallVectorImpl<CHIArg> &Safe); + SmallVectorImpl<CHIArg> &Safe); using RenameStackType = DenseMap<VNType, SmallVector<Instruction *, 2>>; // Push all the VNs corresponding to BB into RenameStack. void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs, - RenameStackType &RenameStack); + RenameStackType &RenameStack); void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs, - RenameStackType &RenameStack); + RenameStackType &RenameStack); // Walk the post-dominator tree top-down and use a stack for each value to // store the last value you see. When you hit a CHI from a given edge, the @@ -396,7 +396,7 @@ private: // they form a list of anticipable values. OutValues contains CHIs // corresponding to each basic block. void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K, - HoistingPointList &HPL); + HoistingPointList &HPL); // Compute insertion points for each values which can be fully anticipated at // a dominator. HPL contains all such values. @@ -454,14 +454,14 @@ private: } // Insert empty CHI node for this VN. This is used to factor out // basic blocks where the ANTIC can potentially change. - CHIArg EmptyChi = {VN, nullptr, nullptr}; - for (auto *IDFBB : IDFBlocks) { + CHIArg EmptyChi = {VN, nullptr, nullptr}; + for (auto *IDFBB : IDFBlocks) { for (unsigned i = 0; i < V.size(); ++i) { - // Ignore spurious PDFs. - if (DT->properlyDominates(IDFBB, V[i]->getParent())) { - OutValue[IDFBB].push_back(EmptyChi); - LLVM_DEBUG(dbgs() << "\nInserting a CHI for BB: " - << IDFBB->getName() << ", for Insn: " << *V[i]); + // Ignore spurious PDFs. + if (DT->properlyDominates(IDFBB, V[i]->getParent())) { + OutValue[IDFBB].push_back(EmptyChi); + LLVM_DEBUG(dbgs() << "\nInserting a CHI for BB: " + << IDFBB->getName() << ", for Insn: " << *V[i]); } } } @@ -479,755 +479,755 @@ private: // a load without hoisting its access function. So before hoisting any // expression, make sure that all its operands are available at insert point. bool allOperandsAvailable(const Instruction *I, - const BasicBlock *HoistPt) const; + const BasicBlock *HoistPt) const; // Same as allOperandsAvailable with recursive check for GEP operands. bool allGepOperandsAvailable(const Instruction *I, - const BasicBlock *HoistPt) const; + const BasicBlock *HoistPt) const; // Make all operands of the GEP available. void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, const SmallVecInsn &InstructionsToHoist, - Instruction *Gep) const; - - void updateAlignment(Instruction *I, Instruction *Repl); - - // Remove all the instructions in Candidates and replace their usage with - // Repl. Returns the number of instructions removed. - unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl, - MemoryUseOrDef *NewMemAcc); - - // Replace all Memory PHI usage with NewMemAcc. - void raMPHIuw(MemoryUseOrDef *NewMemAcc); - - // Remove all other instructions and replace them with Repl. - unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl, - BasicBlock *DestBB, bool MoveAccess); - - // In the case Repl is a load or a store, we make all their GEPs - // available: GEPs are not hoisted by default to avoid the address - // computations to be hoisted without the associated load or store. - bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, - const SmallVecInsn &InstructionsToHoist) const; - - std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL); - - // Hoist all expressions. Returns Number of scalars hoisted - // and number of non-scalars hoisted. - std::pair<unsigned, unsigned> hoistExpressions(Function &F); -}; - -class GVNHoistLegacyPass : public FunctionPass { -public: - static char ID; - - GVNHoistLegacyPass() : FunctionPass(ID) { - initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); - auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); - auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); - auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); - - GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); - return G.run(F); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<PostDominatorTreeWrapperPass>(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addRequired<MemoryDependenceWrapperPass>(); - AU.addRequired<MemorySSAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } -}; - -bool GVNHoist::run(Function &F) { - NumFuncArgs = F.arg_size(); - VN.setDomTree(DT); - VN.setAliasAnalysis(AA); - VN.setMemDep(MD); - bool Res = false; - // Perform DFS Numbering of instructions. - unsigned BBI = 0; - for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { - DFSNumber[BB] = ++BBI; - unsigned I = 0; - for (auto &Inst : *BB) - DFSNumber[&Inst] = ++I; - } - - int ChainLength = 0; - - // FIXME: use lazy evaluation of VN to avoid the fix-point computation. - while (true) { - if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength) - return Res; - - auto HoistStat = hoistExpressions(F); - if (HoistStat.first + HoistStat.second == 0) - return Res; - - if (HoistStat.second > 0) - // To address a limitation of the current GVN, we need to rerun the - // hoisting after we hoisted loads or stores in order to be able to - // hoist all scalars dependent on the hoisted ld/st. - VN.clear(); - - Res = true; - } - - return Res; -} - -unsigned int GVNHoist::rank(const Value *V) const { - // Prefer constants to undef to anything else - // Undef is a constant, have to check it first. - // Prefer smaller constants to constantexprs - if (isa<ConstantExpr>(V)) - return 2; - if (isa<UndefValue>(V)) - return 1; - if (isa<Constant>(V)) - return 0; - else if (auto *A = dyn_cast<Argument>(V)) - return 3 + A->getArgNo(); - - // Need to shift the instruction DFS by number of arguments + 3 to account - // for the constant and argument ranking above. - auto Result = DFSNumber.lookup(V); - if (Result > 0) - return 4 + NumFuncArgs + Result; - // Unreachable or something else, just return a really large number. - return ~0; -} - -bool GVNHoist::hasEH(const BasicBlock *BB) { - auto It = BBSideEffects.find(BB); - if (It != BBSideEffects.end()) - return It->second; - - if (BB->isEHPad() || BB->hasAddressTaken()) { - BBSideEffects[BB] = true; - return true; - } - - if (BB->getTerminator()->mayThrow()) { - BBSideEffects[BB] = true; - return true; - } - - BBSideEffects[BB] = false; - return false; -} - -bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, - const BasicBlock *BB) { - const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); - if (!Acc) - return false; - - Instruction *OldPt = Def->getMemoryInst(); - const BasicBlock *OldBB = OldPt->getParent(); - const BasicBlock *NewBB = NewPt->getParent(); - bool ReachedNewPt = false; - - for (const MemoryAccess &MA : *Acc) - if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) { - Instruction *Insn = MU->getMemoryInst(); - - // Do not check whether MU aliases Def when MU occurs after OldPt. - if (BB == OldBB && firstInBB(OldPt, Insn)) - break; - - // Do not check whether MU aliases Def when MU occurs before NewPt. - if (BB == NewBB) { - if (!ReachedNewPt) { - if (firstInBB(Insn, NewPt)) - continue; - ReachedNewPt = true; - } + Instruction *Gep) const; + + void updateAlignment(Instruction *I, Instruction *Repl); + + // Remove all the instructions in Candidates and replace their usage with + // Repl. Returns the number of instructions removed. + unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl, + MemoryUseOrDef *NewMemAcc); + + // Replace all Memory PHI usage with NewMemAcc. + void raMPHIuw(MemoryUseOrDef *NewMemAcc); + + // Remove all other instructions and replace them with Repl. + unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl, + BasicBlock *DestBB, bool MoveAccess); + + // In the case Repl is a load or a store, we make all their GEPs + // available: GEPs are not hoisted by default to avoid the address + // computations to be hoisted without the associated load or store. + bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist) const; + + std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL); + + // Hoist all expressions. Returns Number of scalars hoisted + // and number of non-scalars hoisted. + std::pair<unsigned, unsigned> hoistExpressions(Function &F); +}; + +class GVNHoistLegacyPass : public FunctionPass { +public: + static char ID; + + GVNHoistLegacyPass() : FunctionPass(ID) { + initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); + auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); + auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); + + GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTreeWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<MemoryDependenceWrapperPass>(); + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; + +bool GVNHoist::run(Function &F) { + NumFuncArgs = F.arg_size(); + VN.setDomTree(DT); + VN.setAliasAnalysis(AA); + VN.setMemDep(MD); + bool Res = false; + // Perform DFS Numbering of instructions. + unsigned BBI = 0; + for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { + DFSNumber[BB] = ++BBI; + unsigned I = 0; + for (auto &Inst : *BB) + DFSNumber[&Inst] = ++I; + } + + int ChainLength = 0; + + // FIXME: use lazy evaluation of VN to avoid the fix-point computation. + while (true) { + if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength) + return Res; + + auto HoistStat = hoistExpressions(F); + if (HoistStat.first + HoistStat.second == 0) + return Res; + + if (HoistStat.second > 0) + // To address a limitation of the current GVN, we need to rerun the + // hoisting after we hoisted loads or stores in order to be able to + // hoist all scalars dependent on the hoisted ld/st. + VN.clear(); + + Res = true; + } + + return Res; +} + +unsigned int GVNHoist::rank(const Value *V) const { + // Prefer constants to undef to anything else + // Undef is a constant, have to check it first. + // Prefer smaller constants to constantexprs + if (isa<ConstantExpr>(V)) + return 2; + if (isa<UndefValue>(V)) + return 1; + if (isa<Constant>(V)) + return 0; + else if (auto *A = dyn_cast<Argument>(V)) + return 3 + A->getArgNo(); + + // Need to shift the instruction DFS by number of arguments + 3 to account + // for the constant and argument ranking above. + auto Result = DFSNumber.lookup(V); + if (Result > 0) + return 4 + NumFuncArgs + Result; + // Unreachable or something else, just return a really large number. + return ~0; +} + +bool GVNHoist::hasEH(const BasicBlock *BB) { + auto It = BBSideEffects.find(BB); + if (It != BBSideEffects.end()) + return It->second; + + if (BB->isEHPad() || BB->hasAddressTaken()) { + BBSideEffects[BB] = true; + return true; + } + + if (BB->getTerminator()->mayThrow()) { + BBSideEffects[BB] = true; + return true; + } + + BBSideEffects[BB] = false; + return false; +} + +bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, + const BasicBlock *BB) { + const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); + if (!Acc) + return false; + + Instruction *OldPt = Def->getMemoryInst(); + const BasicBlock *OldBB = OldPt->getParent(); + const BasicBlock *NewBB = NewPt->getParent(); + bool ReachedNewPt = false; + + for (const MemoryAccess &MA : *Acc) + if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) { + Instruction *Insn = MU->getMemoryInst(); + + // Do not check whether MU aliases Def when MU occurs after OldPt. + if (BB == OldBB && firstInBB(OldPt, Insn)) + break; + + // Do not check whether MU aliases Def when MU occurs before NewPt. + if (BB == NewBB) { + if (!ReachedNewPt) { + if (firstInBB(Insn, NewPt)) + continue; + ReachedNewPt = true; + } } - if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA)) - return true; + if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA)) + return true; + } + + return false; +} + +bool GVNHoist::hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB, + int &NBBsOnAllPaths) { + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; + + // Impossible to hoist with exceptions on the path. + if (hasEH(BB)) + return true; + + // No such instruction after HoistBarrier in a basic block was + // selected for hoisting so instructions selected within basic block with + // a hoist barrier can be hoisted. + if ((BB != SrcBB) && HoistBarrier.count(BB)) + return true; + + return false; +} + +bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, + int &NBBsOnAllPaths) { + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = Def->getBlock(); + assert(DT->dominates(NewBB, OldBB) && "invalid path"); + assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && + "def does not dominate new hoisting point"); + + // Walk all basic blocks reachable in depth-first iteration on the inverse + // CFG from OldBB to NewBB. These blocks are all the blocks that may be + // executed between the execution of NewBB and OldBB. Hoisting an expression + // from OldBB into NewBB has to be safe on all execution paths. + for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { + const BasicBlock *BB = *I; + if (BB == NewBB) { + // Stop traversal when reaching HoistPt. + I.skipChildren(); + continue; } - return false; -} - -bool GVNHoist::hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB, - int &NBBsOnAllPaths) { - // Stop walk once the limit is reached. - if (NBBsOnAllPaths == 0) - return true; - - // Impossible to hoist with exceptions on the path. - if (hasEH(BB)) - return true; - - // No such instruction after HoistBarrier in a basic block was - // selected for hoisting so instructions selected within basic block with - // a hoist barrier can be hoisted. - if ((BB != SrcBB) && HoistBarrier.count(BB)) - return true; - - return false; -} - -bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, - int &NBBsOnAllPaths) { - const BasicBlock *NewBB = NewPt->getParent(); - const BasicBlock *OldBB = Def->getBlock(); - assert(DT->dominates(NewBB, OldBB) && "invalid path"); - assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && - "def does not dominate new hoisting point"); - - // Walk all basic blocks reachable in depth-first iteration on the inverse - // CFG from OldBB to NewBB. These blocks are all the blocks that may be - // executed between the execution of NewBB and OldBB. Hoisting an expression - // from OldBB into NewBB has to be safe on all execution paths. - for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { - const BasicBlock *BB = *I; - if (BB == NewBB) { - // Stop traversal when reaching HoistPt. - I.skipChildren(); - continue; - } - - if (hasEHhelper(BB, OldBB, NBBsOnAllPaths)) - return true; - - // Check that we do not move a store past loads. - if (hasMemoryUse(NewPt, Def, BB)) - return true; - - // -1 is unlimited number of blocks on all paths. - if (NBBsOnAllPaths != -1) - --NBBsOnAllPaths; - - ++I; + if (hasEHhelper(BB, OldBB, NBBsOnAllPaths)) + return true; + + // Check that we do not move a store past loads. + if (hasMemoryUse(NewPt, Def, BB)) + return true; + + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; + + ++I; } - return false; -} - -bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, - int &NBBsOnAllPaths) { - assert(DT->dominates(HoistPt, SrcBB) && "Invalid path"); - - // Walk all basic blocks reachable in depth-first iteration on - // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the - // blocks that may be executed between the execution of NewHoistPt and - // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe - // on all execution paths. - for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) { - const BasicBlock *BB = *I; - if (BB == HoistPt) { - // Stop traversal when reaching NewHoistPt. - I.skipChildren(); - continue; + return false; +} + +bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, + int &NBBsOnAllPaths) { + assert(DT->dominates(HoistPt, SrcBB) && "Invalid path"); + + // Walk all basic blocks reachable in depth-first iteration on + // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the + // blocks that may be executed between the execution of NewHoistPt and + // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe + // on all execution paths. + for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) { + const BasicBlock *BB = *I; + if (BB == HoistPt) { + // Stop traversal when reaching NewHoistPt. + I.skipChildren(); + continue; } - - if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths)) - return true; - - // -1 is unlimited number of blocks on all paths. - if (NBBsOnAllPaths != -1) - --NBBsOnAllPaths; - - ++I; - } - - return false; -} - -bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt, - const Instruction *OldPt, MemoryUseOrDef *U, - GVNHoist::InsKind K, int &NBBsOnAllPaths) { - // In place hoisting is safe. - if (NewPt == OldPt) - return true; - - const BasicBlock *NewBB = NewPt->getParent(); - const BasicBlock *OldBB = OldPt->getParent(); - const BasicBlock *UBB = U->getBlock(); - - // Check for dependences on the Memory SSA. - MemoryAccess *D = U->getDefiningAccess(); - BasicBlock *DBB = D->getBlock(); - if (DT->properlyDominates(NewBB, DBB)) - // Cannot move the load or store to NewBB above its definition in DBB. - return false; - - if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D)) - if (auto *UD = dyn_cast<MemoryUseOrDef>(D)) - if (!firstInBB(UD->getMemoryInst(), NewPt)) - // Cannot move the load or store to NewPt above its definition in D. - return false; - - // Check for unsafe hoistings due to side effects. - if (K == InsKind::Store) { - if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths)) - return false; - } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) - return false; - - if (UBB == NewBB) { - if (DT->properlyDominates(DBB, NewBB)) - return true; - assert(UBB == DBB); - assert(MSSA->locallyDominates(D, U)); + + if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths)) + return true; + + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; + + ++I; } - // No side effects: it is safe to hoist. - return true; -} - -bool GVNHoist::valueAnticipable(CHIArgs C, Instruction *TI) const { - if (TI->getNumSuccessors() > (unsigned)size(C)) - return false; // Not enough args in this CHI. - - for (auto CHI : C) { - // Find if all the edges have values flowing out of BB. - if (!llvm::is_contained(successors(TI), CHI.Dest)) - return false; + return false; +} + +bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt, + const Instruction *OldPt, MemoryUseOrDef *U, + GVNHoist::InsKind K, int &NBBsOnAllPaths) { + // In place hoisting is safe. + if (NewPt == OldPt) + return true; + + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = OldPt->getParent(); + const BasicBlock *UBB = U->getBlock(); + + // Check for dependences on the Memory SSA. + MemoryAccess *D = U->getDefiningAccess(); + BasicBlock *DBB = D->getBlock(); + if (DT->properlyDominates(NewBB, DBB)) + // Cannot move the load or store to NewBB above its definition in DBB. + return false; + + if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D)) + if (auto *UD = dyn_cast<MemoryUseOrDef>(D)) + if (!firstInBB(UD->getMemoryInst(), NewPt)) + // Cannot move the load or store to NewPt above its definition in D. + return false; + + // Check for unsafe hoistings due to side effects. + if (K == InsKind::Store) { + if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths)) + return false; + } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) + return false; + + if (UBB == NewBB) { + if (DT->properlyDominates(DBB, NewBB)) + return true; + assert(UBB == DBB); + assert(MSSA->locallyDominates(D, U)); } - return true; -} -void GVNHoist::checkSafety(CHIArgs C, BasicBlock *BB, GVNHoist::InsKind K, - SmallVectorImpl<CHIArg> &Safe) { - int NumBBsOnAllPaths = MaxNumberOfBBSInPath; - for (auto CHI : C) { - Instruction *Insn = CHI.I; - if (!Insn) // No instruction was inserted in this CHI. - continue; - if (K == InsKind::Scalar) { - if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths)) - Safe.push_back(CHI); - } else { - auto *T = BB->getTerminator(); - if (MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn)) - if (safeToHoistLdSt(T, Insn, UD, K, NumBBsOnAllPaths)) - Safe.push_back(CHI); + // No side effects: it is safe to hoist. + return true; +} + +bool GVNHoist::valueAnticipable(CHIArgs C, Instruction *TI) const { + if (TI->getNumSuccessors() > (unsigned)size(C)) + return false; // Not enough args in this CHI. + + for (auto CHI : C) { + // Find if all the edges have values flowing out of BB. + if (!llvm::is_contained(successors(TI), CHI.Dest)) + return false; + } + return true; +} + +void GVNHoist::checkSafety(CHIArgs C, BasicBlock *BB, GVNHoist::InsKind K, + SmallVectorImpl<CHIArg> &Safe) { + int NumBBsOnAllPaths = MaxNumberOfBBSInPath; + for (auto CHI : C) { + Instruction *Insn = CHI.I; + if (!Insn) // No instruction was inserted in this CHI. + continue; + if (K == InsKind::Scalar) { + if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths)) + Safe.push_back(CHI); + } else { + auto *T = BB->getTerminator(); + if (MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn)) + if (safeToHoistLdSt(T, Insn, UD, K, NumBBsOnAllPaths)) + Safe.push_back(CHI); } } -} - -void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs, - GVNHoist::RenameStackType &RenameStack) { - auto it1 = ValueBBs.find(BB); - if (it1 != ValueBBs.end()) { - // Iterate in reverse order to keep lower ranked values on the top. - for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) { - // Get the value of instruction I - LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second); - RenameStack[VI.first].push_back(VI.second); +} + +void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs, + GVNHoist::RenameStackType &RenameStack) { + auto it1 = ValueBBs.find(BB); + if (it1 != ValueBBs.end()) { + // Iterate in reverse order to keep lower ranked values on the top. + for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) { + // Get the value of instruction I + LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second); + RenameStack[VI.first].push_back(VI.second); } + } +} + +void GVNHoist::fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs, + GVNHoist::RenameStackType &RenameStack) { + // For each *predecessor* (because Post-DOM) of BB check if it has a CHI + for (auto Pred : predecessors(BB)) { + auto P = CHIBBs.find(Pred); + if (P == CHIBBs.end()) { + continue; + } + LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName();); + // A CHI is found (BB -> Pred is an edge in the CFG) + // Pop the stack until Top(V) = Ve. + auto &VCHI = P->second; + for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) { + CHIArg &C = *It; + if (!C.Dest) { + auto si = RenameStack.find(C.VN); + // The Basic Block where CHI is must dominate the value we want to + // track in a CHI. In the PDom walk, there can be values in the + // stack which are not control dependent e.g., nested loop. + if (si != RenameStack.end() && si->second.size() && + DT->properlyDominates(Pred, si->second.back()->getParent())) { + C.Dest = BB; // Assign the edge + C.I = si->second.pop_back_val(); // Assign the argument + LLVM_DEBUG(dbgs() + << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I + << ", VN: " << C.VN.first << ", " << C.VN.second); + } + // Move to next CHI of a different value + It = std::find_if(It, VCHI.end(), [It](CHIArg &A) { return A != *It; }); + } else + ++It; + } + } +} + +void GVNHoist::findHoistableCandidates(OutValuesType &CHIBBs, + GVNHoist::InsKind K, + HoistingPointList &HPL) { + auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; }; + + // CHIArgs now have the outgoing values, so check for anticipability and + // accumulate hoistable candidates in HPL. + for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) { + BasicBlock *BB = A.first; + SmallVectorImpl<CHIArg> &CHIs = A.second; + // Vector of PHIs contains PHIs for different instructions. + // Sort the args according to their VNs, such that identical + // instructions are together. + llvm::stable_sort(CHIs, cmpVN); + auto TI = BB->getTerminator(); + auto B = CHIs.begin(); + // [PreIt, PHIIt) form a range of CHIs which have identical VNs. + auto PHIIt = llvm::find_if(CHIs, [B](CHIArg &A) { return A != *B; }); + auto PrevIt = CHIs.begin(); + while (PrevIt != PHIIt) { + // Collect values which satisfy safety checks. + SmallVector<CHIArg, 2> Safe; + // We check for safety first because there might be multiple values in + // the same path, some of which are not safe to be hoisted, but overall + // each edge has at least one value which can be hoisted, making the + // value anticipable along that path. + checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe); + + // List of safe values should be anticipable at TI. + if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) { + HPL.push_back({BB, SmallVecInsn()}); + SmallVecInsn &V = HPL.back().second; + for (auto B : Safe) + V.push_back(B.I); + } + + // Check other VNs + PrevIt = PHIIt; + PHIIt = std::find_if(PrevIt, CHIs.end(), + [PrevIt](CHIArg &A) { return A != *PrevIt; }); + } } -} - -void GVNHoist::fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs, - GVNHoist::RenameStackType &RenameStack) { - // For each *predecessor* (because Post-DOM) of BB check if it has a CHI - for (auto Pred : predecessors(BB)) { - auto P = CHIBBs.find(Pred); - if (P == CHIBBs.end()) { - continue; - } - LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName();); - // A CHI is found (BB -> Pred is an edge in the CFG) - // Pop the stack until Top(V) = Ve. - auto &VCHI = P->second; - for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) { - CHIArg &C = *It; - if (!C.Dest) { - auto si = RenameStack.find(C.VN); - // The Basic Block where CHI is must dominate the value we want to - // track in a CHI. In the PDom walk, there can be values in the - // stack which are not control dependent e.g., nested loop. - if (si != RenameStack.end() && si->second.size() && - DT->properlyDominates(Pred, si->second.back()->getParent())) { - C.Dest = BB; // Assign the edge - C.I = si->second.pop_back_val(); // Assign the argument - LLVM_DEBUG(dbgs() - << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I - << ", VN: " << C.VN.first << ", " << C.VN.second); - } - // Move to next CHI of a different value - It = std::find_if(It, VCHI.end(), [It](CHIArg &A) { return A != *It; }); - } else - ++It; - } - } -} - -void GVNHoist::findHoistableCandidates(OutValuesType &CHIBBs, - GVNHoist::InsKind K, - HoistingPointList &HPL) { - auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; }; - - // CHIArgs now have the outgoing values, so check for anticipability and - // accumulate hoistable candidates in HPL. - for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) { - BasicBlock *BB = A.first; - SmallVectorImpl<CHIArg> &CHIs = A.second; - // Vector of PHIs contains PHIs for different instructions. - // Sort the args according to their VNs, such that identical - // instructions are together. - llvm::stable_sort(CHIs, cmpVN); - auto TI = BB->getTerminator(); - auto B = CHIs.begin(); - // [PreIt, PHIIt) form a range of CHIs which have identical VNs. - auto PHIIt = llvm::find_if(CHIs, [B](CHIArg &A) { return A != *B; }); - auto PrevIt = CHIs.begin(); - while (PrevIt != PHIIt) { - // Collect values which satisfy safety checks. - SmallVector<CHIArg, 2> Safe; - // We check for safety first because there might be multiple values in - // the same path, some of which are not safe to be hoisted, but overall - // each edge has at least one value which can be hoisted, making the - // value anticipable along that path. - checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe); - - // List of safe values should be anticipable at TI. - if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) { - HPL.push_back({BB, SmallVecInsn()}); - SmallVecInsn &V = HPL.back().second; - for (auto B : Safe) - V.push_back(B.I); - } - - // Check other VNs - PrevIt = PHIIt; - PHIIt = std::find_if(PrevIt, CHIs.end(), - [PrevIt](CHIArg &A) { return A != *PrevIt; }); - } - } -} - -bool GVNHoist::allOperandsAvailable(const Instruction *I, - const BasicBlock *HoistPt) const { - for (const Use &Op : I->operands()) - if (const auto *Inst = dyn_cast<Instruction>(&Op)) - if (!DT->dominates(Inst->getParent(), HoistPt)) - return false; - - return true; -} - -bool GVNHoist::allGepOperandsAvailable(const Instruction *I, - const BasicBlock *HoistPt) const { - for (const Use &Op : I->operands()) - if (const auto *Inst = dyn_cast<Instruction>(&Op)) - if (!DT->dominates(Inst->getParent(), HoistPt)) { - if (const GetElementPtrInst *GepOp = - dyn_cast<GetElementPtrInst>(Inst)) { - if (!allGepOperandsAvailable(GepOp, HoistPt)) +} + +bool GVNHoist::allOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (const Use &Op : I->operands()) + if (const auto *Inst = dyn_cast<Instruction>(&Op)) + if (!DT->dominates(Inst->getParent(), HoistPt)) + return false; + + return true; +} + +bool GVNHoist::allGepOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (const Use &Op : I->operands()) + if (const auto *Inst = dyn_cast<Instruction>(&Op)) + if (!DT->dominates(Inst->getParent(), HoistPt)) { + if (const GetElementPtrInst *GepOp = + dyn_cast<GetElementPtrInst>(Inst)) { + if (!allGepOperandsAvailable(GepOp, HoistPt)) return false; - // Gep is available if all operands of GepOp are available. - } else { - // Gep is not available if it has operands other than GEPs that are - // defined in blocks not dominating HoistPt. + // Gep is available if all operands of GepOp are available. + } else { + // Gep is not available if it has operands other than GEPs that are + // defined in blocks not dominating HoistPt. return false; - } + } } - return true; -} - -void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, - const SmallVecInsn &InstructionsToHoist, - Instruction *Gep) const { - assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available"); - - Instruction *ClonedGep = Gep->clone(); - for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) - if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) { - // Check whether the operand is already available. - if (DT->dominates(Op->getParent(), HoistPt)) - continue; - - // As a GEP can refer to other GEPs, recursively make all the operands - // of this GEP available at HoistPt. - if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op)) - makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); + return true; +} + +void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist, + Instruction *Gep) const { + assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available"); + + Instruction *ClonedGep = Gep->clone(); + for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) { + // Check whether the operand is already available. + if (DT->dominates(Op->getParent(), HoistPt)) + continue; + + // As a GEP can refer to other GEPs, recursively make all the operands + // of this GEP available at HoistPt. + if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op)) + makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); } - // Copy Gep and replace its uses in Repl with ClonedGep. - ClonedGep->insertBefore(HoistPt->getTerminator()); - - // Conservatively discard any optimization hints, they may differ on the - // other paths. - ClonedGep->dropUnknownNonDebugMetadata(); - - // If we have optimization hints which agree with each other along different - // paths, preserve them. - for (const Instruction *OtherInst : InstructionsToHoist) { - const GetElementPtrInst *OtherGep; - if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst)) - OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand()); - else - OtherGep = cast<GetElementPtrInst>( - cast<StoreInst>(OtherInst)->getPointerOperand()); - ClonedGep->andIRFlags(OtherGep); - } - - // Replace uses of Gep with ClonedGep in Repl. - Repl->replaceUsesOfWith(Gep, ClonedGep); -} - -void GVNHoist::updateAlignment(Instruction *I, Instruction *Repl) { - if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) { - ReplacementLoad->setAlignment( - std::min(ReplacementLoad->getAlign(), cast<LoadInst>(I)->getAlign())); - ++NumLoadsRemoved; - } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) { - ReplacementStore->setAlignment( - std::min(ReplacementStore->getAlign(), cast<StoreInst>(I)->getAlign())); - ++NumStoresRemoved; - } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) { - ReplacementAlloca->setAlignment(std::max(ReplacementAlloca->getAlign(), - cast<AllocaInst>(I)->getAlign())); - } else if (isa<CallInst>(Repl)) { - ++NumCallsRemoved; + // Copy Gep and replace its uses in Repl with ClonedGep. + ClonedGep->insertBefore(HoistPt->getTerminator()); + + // Conservatively discard any optimization hints, they may differ on the + // other paths. + ClonedGep->dropUnknownNonDebugMetadata(); + + // If we have optimization hints which agree with each other along different + // paths, preserve them. + for (const Instruction *OtherInst : InstructionsToHoist) { + const GetElementPtrInst *OtherGep; + if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst)) + OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand()); + else + OtherGep = cast<GetElementPtrInst>( + cast<StoreInst>(OtherInst)->getPointerOperand()); + ClonedGep->andIRFlags(OtherGep); + } + + // Replace uses of Gep with ClonedGep in Repl. + Repl->replaceUsesOfWith(Gep, ClonedGep); +} + +void GVNHoist::updateAlignment(Instruction *I, Instruction *Repl) { + if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) { + ReplacementLoad->setAlignment( + std::min(ReplacementLoad->getAlign(), cast<LoadInst>(I)->getAlign())); + ++NumLoadsRemoved; + } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) { + ReplacementStore->setAlignment( + std::min(ReplacementStore->getAlign(), cast<StoreInst>(I)->getAlign())); + ++NumStoresRemoved; + } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) { + ReplacementAlloca->setAlignment(std::max(ReplacementAlloca->getAlign(), + cast<AllocaInst>(I)->getAlign())); + } else if (isa<CallInst>(Repl)) { + ++NumCallsRemoved; } -} - -unsigned GVNHoist::rauw(const SmallVecInsn &Candidates, Instruction *Repl, - MemoryUseOrDef *NewMemAcc) { - unsigned NR = 0; - for (Instruction *I : Candidates) { - if (I != Repl) { - ++NR; - updateAlignment(I, Repl); - if (NewMemAcc) { - // Update the uses of the old MSSA access with NewMemAcc. - MemoryAccess *OldMA = MSSA->getMemoryAccess(I); - OldMA->replaceAllUsesWith(NewMemAcc); - MSSAUpdater->removeMemoryAccess(OldMA); - } - - Repl->andIRFlags(I); - combineKnownMetadata(Repl, I); - I->replaceAllUsesWith(Repl); - // Also invalidate the Alias Analysis cache. - MD->removeInstruction(I); - I->eraseFromParent(); +} + +unsigned GVNHoist::rauw(const SmallVecInsn &Candidates, Instruction *Repl, + MemoryUseOrDef *NewMemAcc) { + unsigned NR = 0; + for (Instruction *I : Candidates) { + if (I != Repl) { + ++NR; + updateAlignment(I, Repl); + if (NewMemAcc) { + // Update the uses of the old MSSA access with NewMemAcc. + MemoryAccess *OldMA = MSSA->getMemoryAccess(I); + OldMA->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(OldMA); + } + + Repl->andIRFlags(I); + combineKnownMetadata(Repl, I); + I->replaceAllUsesWith(Repl); + // Also invalidate the Alias Analysis cache. + MD->removeInstruction(I); + I->eraseFromParent(); + } + } + return NR; +} + +void GVNHoist::raMPHIuw(MemoryUseOrDef *NewMemAcc) { + SmallPtrSet<MemoryPhi *, 4> UsePhis; + for (User *U : NewMemAcc->users()) + if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U)) + UsePhis.insert(Phi); + + for (MemoryPhi *Phi : UsePhis) { + auto In = Phi->incoming_values(); + if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) { + Phi->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(Phi); + } + } +} + +unsigned GVNHoist::removeAndReplace(const SmallVecInsn &Candidates, + Instruction *Repl, BasicBlock *DestBB, + bool MoveAccess) { + MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl); + if (MoveAccess && NewMemAcc) { + // The definition of this ld/st will not change: ld/st hoisting is + // legal when the ld/st is not moved past its current definition. + MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::BeforeTerminator); + } + + // Replace all other instructions with Repl with memory access NewMemAcc. + unsigned NR = rauw(Candidates, Repl, NewMemAcc); + + // Remove MemorySSA phi nodes with the same arguments. + if (NewMemAcc) + raMPHIuw(NewMemAcc); + return NR; +} + +bool GVNHoist::makeGepOperandsAvailable( + Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist) const { + // Check whether the GEP of a ld/st can be synthesized at HoistPt. + GetElementPtrInst *Gep = nullptr; + Instruction *Val = nullptr; + if (auto *Ld = dyn_cast<LoadInst>(Repl)) { + Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand()); + } else if (auto *St = dyn_cast<StoreInst>(Repl)) { + Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand()); + Val = dyn_cast<Instruction>(St->getValueOperand()); + // Check that the stored value is available. + if (Val) { + if (isa<GetElementPtrInst>(Val)) { + // Check whether we can compute the GEP at HoistPt. + if (!allGepOperandsAvailable(Val, HoistPt)) + return false; + } else if (!DT->dominates(Val->getParent(), HoistPt)) + return false; } - } - return NR; -} - -void GVNHoist::raMPHIuw(MemoryUseOrDef *NewMemAcc) { - SmallPtrSet<MemoryPhi *, 4> UsePhis; - for (User *U : NewMemAcc->users()) - if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U)) - UsePhis.insert(Phi); - - for (MemoryPhi *Phi : UsePhis) { - auto In = Phi->incoming_values(); - if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) { - Phi->replaceAllUsesWith(NewMemAcc); - MSSAUpdater->removeMemoryAccess(Phi); - } - } -} - -unsigned GVNHoist::removeAndReplace(const SmallVecInsn &Candidates, - Instruction *Repl, BasicBlock *DestBB, - bool MoveAccess) { - MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl); - if (MoveAccess && NewMemAcc) { - // The definition of this ld/st will not change: ld/st hoisting is - // legal when the ld/st is not moved past its current definition. - MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::BeforeTerminator); - } - - // Replace all other instructions with Repl with memory access NewMemAcc. - unsigned NR = rauw(Candidates, Repl, NewMemAcc); - - // Remove MemorySSA phi nodes with the same arguments. - if (NewMemAcc) - raMPHIuw(NewMemAcc); - return NR; -} - -bool GVNHoist::makeGepOperandsAvailable( - Instruction *Repl, BasicBlock *HoistPt, - const SmallVecInsn &InstructionsToHoist) const { - // Check whether the GEP of a ld/st can be synthesized at HoistPt. - GetElementPtrInst *Gep = nullptr; - Instruction *Val = nullptr; - if (auto *Ld = dyn_cast<LoadInst>(Repl)) { - Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand()); - } else if (auto *St = dyn_cast<StoreInst>(Repl)) { - Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand()); - Val = dyn_cast<Instruction>(St->getValueOperand()); - // Check that the stored value is available. - if (Val) { - if (isa<GetElementPtrInst>(Val)) { - // Check whether we can compute the GEP at HoistPt. - if (!allGepOperandsAvailable(Val, HoistPt)) - return false; - } else if (!DT->dominates(Val->getParent(), HoistPt)) - return false; - } - } - - // Check whether we can compute the Gep at HoistPt. - if (!Gep || !allGepOperandsAvailable(Gep, HoistPt)) - return false; - - makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); - - if (Val && isa<GetElementPtrInst>(Val)) - makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); - - return true; -} - -std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) { - unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; - for (const HoistingPointInfo &HP : HPL) { - // Find out whether we already have one of the instructions in HoistPt, - // in which case we do not have to move it. - BasicBlock *DestBB = HP.first; - const SmallVecInsn &InstructionsToHoist = HP.second; - Instruction *Repl = nullptr; - for (Instruction *I : InstructionsToHoist) - if (I->getParent() == DestBB) - // If there are two instructions in HoistPt to be hoisted in place: - // update Repl to be the first one, such that we can rename the uses - // of the second based on the first. - if (!Repl || firstInBB(I, Repl)) - Repl = I; - - // Keep track of whether we moved the instruction so we know whether we - // should move the MemoryAccess. - bool MoveAccess = true; - if (Repl) { - // Repl is already in HoistPt: it remains in place. - assert(allOperandsAvailable(Repl, DestBB) && - "instruction depends on operands that are not available"); - MoveAccess = false; - } else { - // When we do not find Repl in HoistPt, select the first in the list - // and move it to HoistPt. - Repl = InstructionsToHoist.front(); - - // We can move Repl in HoistPt only when all operands are available. - // The order in which hoistings are done may influence the availability - // of operands. - if (!allOperandsAvailable(Repl, DestBB)) { - // When HoistingGeps there is nothing more we can do to make the - // operands available: just continue. - if (HoistingGeps) - continue; - - // When not HoistingGeps we need to copy the GEPs. - if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist)) - continue; + } + + // Check whether we can compute the Gep at HoistPt. + if (!Gep || !allGepOperandsAvailable(Gep, HoistPt)) + return false; + + makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); + + if (Val && isa<GetElementPtrInst>(Val)) + makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); + + return true; +} + +std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) { + unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; + for (const HoistingPointInfo &HP : HPL) { + // Find out whether we already have one of the instructions in HoistPt, + // in which case we do not have to move it. + BasicBlock *DestBB = HP.first; + const SmallVecInsn &InstructionsToHoist = HP.second; + Instruction *Repl = nullptr; + for (Instruction *I : InstructionsToHoist) + if (I->getParent() == DestBB) + // If there are two instructions in HoistPt to be hoisted in place: + // update Repl to be the first one, such that we can rename the uses + // of the second based on the first. + if (!Repl || firstInBB(I, Repl)) + Repl = I; + + // Keep track of whether we moved the instruction so we know whether we + // should move the MemoryAccess. + bool MoveAccess = true; + if (Repl) { + // Repl is already in HoistPt: it remains in place. + assert(allOperandsAvailable(Repl, DestBB) && + "instruction depends on operands that are not available"); + MoveAccess = false; + } else { + // When we do not find Repl in HoistPt, select the first in the list + // and move it to HoistPt. + Repl = InstructionsToHoist.front(); + + // We can move Repl in HoistPt only when all operands are available. + // The order in which hoistings are done may influence the availability + // of operands. + if (!allOperandsAvailable(Repl, DestBB)) { + // When HoistingGeps there is nothing more we can do to make the + // operands available: just continue. + if (HoistingGeps) + continue; + + // When not HoistingGeps we need to copy the GEPs. + if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist)) + continue; } - - // Move the instruction at the end of HoistPt. - Instruction *Last = DestBB->getTerminator(); - MD->removeInstruction(Repl); - Repl->moveBefore(Last); - - DFSNumber[Repl] = DFSNumber[Last]++; + + // Move the instruction at the end of HoistPt. + Instruction *Last = DestBB->getTerminator(); + MD->removeInstruction(Repl); + Repl->moveBefore(Last); + + DFSNumber[Repl] = DFSNumber[Last]++; } - NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess); - - if (isa<LoadInst>(Repl)) - ++NL; - else if (isa<StoreInst>(Repl)) - ++NS; - else if (isa<CallInst>(Repl)) - ++NC; - else // Scalar - ++NI; + NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess); + + if (isa<LoadInst>(Repl)) + ++NL; + else if (isa<StoreInst>(Repl)) + ++NS; + else if (isa<CallInst>(Repl)) + ++NC; + else // Scalar + ++NI; } - if (MSSA && VerifyMemorySSA) - MSSA->verifyMemorySSA(); - - NumHoisted += NL + NS + NC + NI; - NumRemoved += NR; - NumLoadsHoisted += NL; - NumStoresHoisted += NS; - NumCallsHoisted += NC; - return {NI, NL + NC + NS}; -} - -std::pair<unsigned, unsigned> GVNHoist::hoistExpressions(Function &F) { - InsnInfo II; - LoadInfo LI; - StoreInfo SI; - CallInfo CI; - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { - int InstructionNb = 0; - for (Instruction &I1 : *BB) { - // If I1 cannot guarantee progress, subsequent instructions - // in BB cannot be hoisted anyways. - if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) { - HoistBarrier.insert(BB); - break; - } - // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting - // deeper may increase the register pressure and compilation time. - if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) - break; - - // Do not value number terminator instructions. - if (I1.isTerminator()) - break; - - if (auto *Load = dyn_cast<LoadInst>(&I1)) - LI.insert(Load, VN); - else if (auto *Store = dyn_cast<StoreInst>(&I1)) - SI.insert(Store, VN); - else if (auto *Call = dyn_cast<CallInst>(&I1)) { - if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) { - if (isa<DbgInfoIntrinsic>(Intr) || - Intr->getIntrinsicID() == Intrinsic::assume || - Intr->getIntrinsicID() == Intrinsic::sideeffect) - continue; - } - if (Call->mayHaveSideEffects()) - break; - - if (Call->isConvergent()) - break; - - CI.insert(Call, VN); - } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1)) - // Do not hoist scalars past calls that may write to memory because - // that could result in spills later. geps are handled separately. - // TODO: We can relax this for targets like AArch64 as they have more - // registers than X86. - II.insert(&I1, VN); - } + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + + NumHoisted += NL + NS + NC + NI; + NumRemoved += NR; + NumLoadsHoisted += NL; + NumStoresHoisted += NS; + NumCallsHoisted += NC; + return {NI, NL + NC + NS}; +} + +std::pair<unsigned, unsigned> GVNHoist::hoistExpressions(Function &F) { + InsnInfo II; + LoadInfo LI; + StoreInfo SI; + CallInfo CI; + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { + int InstructionNb = 0; + for (Instruction &I1 : *BB) { + // If I1 cannot guarantee progress, subsequent instructions + // in BB cannot be hoisted anyways. + if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) { + HoistBarrier.insert(BB); + break; + } + // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting + // deeper may increase the register pressure and compilation time. + if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) + break; + + // Do not value number terminator instructions. + if (I1.isTerminator()) + break; + + if (auto *Load = dyn_cast<LoadInst>(&I1)) + LI.insert(Load, VN); + else if (auto *Store = dyn_cast<StoreInst>(&I1)) + SI.insert(Store, VN); + else if (auto *Call = dyn_cast<CallInst>(&I1)) { + if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) { + if (isa<DbgInfoIntrinsic>(Intr) || + Intr->getIntrinsicID() == Intrinsic::assume || + Intr->getIntrinsicID() == Intrinsic::sideeffect) + continue; + } + if (Call->mayHaveSideEffects()) + break; + + if (Call->isConvergent()) + break; + + CI.insert(Call, VN); + } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1)) + // Do not hoist scalars past calls that may write to memory because + // that could result in spills later. geps are handled separately. + // TODO: We can relax this for targets like AArch64 as they have more + // registers than X86. + II.insert(&I1, VN); + } } - HoistingPointList HPL; - computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); - computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); - computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); - computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); - computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); - computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); - return hoist(HPL); -} - + HoistingPointList HPL; + computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); + computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); + computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); + computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); + computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); + computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); + return hoist(HPL); +} + } // end namespace llvm PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp index aef927ab65..35ad503e23 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp @@ -158,7 +158,7 @@ public: void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) { for (auto II = Insts.begin(); II != Insts.end();) { - if (!llvm::is_contained(Blocks, (*II)->getParent())) { + if (!llvm::is_contained(Blocks, (*II)->getParent())) { ActiveBlocks.remove((*II)->getParent()); II = Insts.erase(II); } else { @@ -276,7 +276,7 @@ public: auto VI = Values.begin(); while (BI != Blocks.end()) { assert(VI != Values.end()); - if (!llvm::is_contained(NewBlocks, *BI)) { + if (!llvm::is_contained(NewBlocks, *BI)) { BI = Blocks.erase(BI); VI = Values.erase(VI); } else { @@ -692,7 +692,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking( ModelledPHI NewPHI(NewInsts, ActivePreds); // Does sinking this instruction render previous PHIs redundant? - if (NeededPHIs.erase(NewPHI)) + if (NeededPHIs.erase(NewPHI)) RecomputePHIContents = true; if (RecomputePHIContents) { @@ -754,7 +754,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking( Cand.NumMemoryInsts = MemoryInstNum; Cand.NumBlocks = ActivePreds.size(); Cand.NumPHIs = NeededPHIs.size(); - append_range(Cand.Blocks, ActivePreds); + append_range(Cand.Blocks, ActivePreds); return Cand; } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp index 61eb4ce0ed..80e644fc4f 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp @@ -347,8 +347,8 @@ bool GuardWideningImpl::eliminateInstrViaWidening( const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; auto I = GuardsInCurBB.begin(); - auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) - : GuardsInCurBB.end(); + auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) + : GuardsInCurBB.end(); #ifndef NDEBUG { @@ -665,12 +665,12 @@ bool GuardWideningImpl::combineRangeChecks( }; copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); - erase_if(Checks, IsCurrentCheck); + erase_if(Checks, IsCurrentCheck); assert(CurrentChecks.size() != 0 && "We know we have at least one!"); if (CurrentChecks.size() < 3) { - llvm::append_range(RangeChecksOut, CurrentChecks); + llvm::append_range(RangeChecksOut, CurrentChecks); continue; } @@ -698,7 +698,7 @@ bool GuardWideningImpl::combineRangeChecks( return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); }; - if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) + if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) return false; // We have a series of f+1 checks as: diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp index ae1fff0fa8..29c45e83b9 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -131,10 +131,10 @@ static cl::opt<bool> LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops")); -static cl::opt<bool> -AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), - cl::desc("Allow widening of indvars to eliminate s/zext")); - +static cl::opt<bool> +AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), + cl::desc("Allow widening of indvars to eliminate s/zext")); + namespace { struct RewritePhi; @@ -149,7 +149,7 @@ class IndVarSimplify { std::unique_ptr<MemorySSAUpdater> MSSAU; SmallVector<WeakTrackingVH, 16> DeadInsts; - bool WidenIndVars; + bool WidenIndVars; bool handleFloatingPointIV(Loop *L, PHINode *PH); bool rewriteNonIntegerIVs(Loop *L); @@ -172,9 +172,9 @@ class IndVarSimplify { public: IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, const DataLayout &DL, TargetLibraryInfo *TLI, - TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) - : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), - WidenIndVars(WidenIndVars) { + TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) + : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), + WidenIndVars(WidenIndVars) { if (MSSA) MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); } @@ -508,8 +508,8 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { /// Update information about the induction variable that is extended by this /// sign or zero extend operation. This is used to determine the final width of /// the IV before actually widening it. -static void visitIVCast(CastInst *Cast, WideIVInfo &WI, - ScalarEvolution *SE, +static void visitIVCast(CastInst *Cast, WideIVInfo &WI, + ScalarEvolution *SE, const TargetTransformInfo *TTI) { bool IsSigned = Cast->getOpcode() == Instruction::SExt; if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) @@ -631,18 +631,18 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L, } } while(!LoopPhis.empty()); - // Continue if we disallowed widening. - if (!WidenIndVars) - continue; - + // Continue if we disallowed widening. + if (!WidenIndVars) + continue; + for (; !WideIVs.empty(); WideIVs.pop_back()) { - unsigned ElimExt; - unsigned Widened; - if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, - DT, DeadInsts, ElimExt, Widened, - HasGuards, UsePostIncrementRanges)) { - NumElimExt += ElimExt; - NumWidened += Widened; + unsigned ElimExt; + unsigned Widened; + if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, + DT, DeadInsts, ElimExt, Widened, + HasGuards, UsePostIncrementRanges)) { + NumElimExt += ElimExt; + NumWidened += Widened; Changed = true; LoopPhis.push_back(WidePhi); } @@ -785,7 +785,7 @@ static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, // If we can't analyze propagation through this instruction, just skip it // and transitive users. Safe as false is a conservative result. - if (!propagatesPoison(cast<Operator>(I)) && I != Root) + if (!propagatesPoison(cast<Operator>(I)) && I != Root) continue; if (KnownPoison.insert(I).second) @@ -1290,116 +1290,116 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { return MadeAnyChanges; } -static void replaceExitCond(BranchInst *BI, Value *NewCond, - SmallVectorImpl<WeakTrackingVH> &DeadInsts) { - auto *OldCond = BI->getCondition(); - BI->setCondition(NewCond); - if (OldCond->use_empty()) - DeadInsts.emplace_back(OldCond); -} - -static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, - SmallVectorImpl<WeakTrackingVH> &DeadInsts) { - BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); - bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); - auto *OldCond = BI->getCondition(); - auto *NewCond = - ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); - replaceExitCond(BI, NewCond, DeadInsts); -} - -static void replaceWithInvariantCond( - const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, - const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, - SmallVectorImpl<WeakTrackingVH> &DeadInsts) { - BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); - Rewriter.setInsertPoint(BI); - auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); - auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); - bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); - if (ExitIfTrue) - InvariantPred = ICmpInst::getInversePredicate(InvariantPred); - IRBuilder<> Builder(BI); - auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, - BI->getCondition()->getName()); - replaceExitCond(BI, NewCond, DeadInsts); -} - -static bool optimizeLoopExitWithUnknownExitCount( - const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, - const SCEV *MaxIter, bool Inverted, bool SkipLastIter, - ScalarEvolution *SE, SCEVExpander &Rewriter, - SmallVectorImpl<WeakTrackingVH> &DeadInsts) { - ICmpInst::Predicate Pred; - Value *LHS, *RHS; - using namespace PatternMatch; - BasicBlock *TrueSucc, *FalseSucc; - if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), - m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) - return false; - - assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && - "Not a loop exit!"); - - // 'LHS pred RHS' should now mean that we stay in loop. - if (L->contains(FalseSucc)) - Pred = CmpInst::getInversePredicate(Pred); - - // If we are proving loop exit, invert the predicate. - if (Inverted) - Pred = CmpInst::getInversePredicate(Pred); - - const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); - const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); - // Can we prove it to be trivially true? - if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { - foldExit(L, ExitingBB, Inverted, DeadInsts); - return true; - } - // Further logic works for non-inverted condition only. - if (Inverted) - return false; - - auto *ARTy = LHSS->getType(); - auto *MaxIterTy = MaxIter->getType(); - // If possible, adjust types. - if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) - MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); - else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { - const SCEV *MinusOne = SE->getMinusOne(ARTy); - auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); - if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) - MaxIter = SE->getTruncateExpr(MaxIter, ARTy); - } - - if (SkipLastIter) { - const SCEV *One = SE->getOne(MaxIter->getType()); - MaxIter = SE->getMinusSCEV(MaxIter, One); +static void replaceExitCond(BranchInst *BI, Value *NewCond, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) { + auto *OldCond = BI->getCondition(); + BI->setCondition(NewCond); + if (OldCond->use_empty()) + DeadInsts.emplace_back(OldCond); +} + +static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) { + BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); + bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); + auto *OldCond = BI->getCondition(); + auto *NewCond = + ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); + replaceExitCond(BI, NewCond, DeadInsts); +} + +static void replaceWithInvariantCond( + const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, + const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) { + BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); + Rewriter.setInsertPoint(BI); + auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); + auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); + bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); + if (ExitIfTrue) + InvariantPred = ICmpInst::getInversePredicate(InvariantPred); + IRBuilder<> Builder(BI); + auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, + BI->getCondition()->getName()); + replaceExitCond(BI, NewCond, DeadInsts); +} + +static bool optimizeLoopExitWithUnknownExitCount( + const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, + const SCEV *MaxIter, bool Inverted, bool SkipLastIter, + ScalarEvolution *SE, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) { + ICmpInst::Predicate Pred; + Value *LHS, *RHS; + using namespace PatternMatch; + BasicBlock *TrueSucc, *FalseSucc; + if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), + m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) + return false; + + assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && + "Not a loop exit!"); + + // 'LHS pred RHS' should now mean that we stay in loop. + if (L->contains(FalseSucc)) + Pred = CmpInst::getInversePredicate(Pred); + + // If we are proving loop exit, invert the predicate. + if (Inverted) + Pred = CmpInst::getInversePredicate(Pred); + + const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); + const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); + // Can we prove it to be trivially true? + if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { + foldExit(L, ExitingBB, Inverted, DeadInsts); + return true; } - - // Check if there is a loop-invariant predicate equivalent to our check. - auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, - L, BI, MaxIter); - if (!LIP) - return false; - - // Can we prove it to be trivially true? - if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) - foldExit(L, ExitingBB, Inverted, DeadInsts); - else - replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, - Rewriter, DeadInsts); - - return true; + // Further logic works for non-inverted condition only. + if (Inverted) + return false; + + auto *ARTy = LHSS->getType(); + auto *MaxIterTy = MaxIter->getType(); + // If possible, adjust types. + if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) + MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); + else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { + const SCEV *MinusOne = SE->getMinusOne(ARTy); + auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); + if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) + MaxIter = SE->getTruncateExpr(MaxIter, ARTy); + } + + if (SkipLastIter) { + const SCEV *One = SE->getOne(MaxIter->getType()); + MaxIter = SE->getMinusSCEV(MaxIter, One); + } + + // Check if there is a loop-invariant predicate equivalent to our check. + auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, + L, BI, MaxIter); + if (!LIP) + return false; + + // Can we prove it to be trivially true? + if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) + foldExit(L, ExitingBB, Inverted, DeadInsts); + else + replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, + Rewriter, DeadInsts); + + return true; } bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector<BasicBlock*, 16> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - // Remove all exits which aren't both rewriteable and execute on every - // iteration. - llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { + // Remove all exits which aren't both rewriteable and execute on every + // iteration. + llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { // If our exitting block exits multiple loops, we can only rewrite the // innermost one. Otherwise, we're changing how many times the innermost // loop runs before it exits. @@ -1415,10 +1415,10 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { if (isa<Constant>(BI->getCondition())) return true; - // Likewise, the loop latch must be dominated by the exiting BB. - if (!DT->dominates(ExitingBB, L->getLoopLatch())) + // Likewise, the loop latch must be dominated by the exiting BB. + if (!DT->dominates(ExitingBB, L->getLoopLatch())) return true; - + return false; }); @@ -1426,25 +1426,25 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { return false; // Get a symbolic upper bound on the loop backedge taken count. - const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); + const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(MaxExitCount)) return false; - // Visit our exit blocks in order of dominance. We know from the fact that - // all exits must dominate the latch, so there is a total dominance order - // between them. - llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { + // Visit our exit blocks in order of dominance. We know from the fact that + // all exits must dominate the latch, so there is a total dominance order + // between them. + llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { // std::sort sorts in ascending order, so we want the inverse of // the normal dominance relation. if (A == B) return false; - if (DT->properlyDominates(A, B)) - return true; - else { - assert(DT->properlyDominates(B, A) && - "expected total dominance order!"); - return false; - } - }); + if (DT->properlyDominates(A, B)) + return true; + else { + assert(DT->properlyDominates(B, A) && + "expected total dominance order!"); + return false; + } + }); #ifdef ASSERT for (unsigned i = 1; i < ExitingBlocks.size(); i++) { assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); @@ -1452,56 +1452,56 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { #endif bool Changed = false; - bool SkipLastIter = false; + bool SkipLastIter = false; SmallSet<const SCEV*, 8> DominatingExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); - if (isa<SCEVCouldNotCompute>(ExitCount)) { - // Okay, we do not know the exit count here. Can we at least prove that it - // will remain the same within iteration space? - auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); - auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { - return optimizeLoopExitWithUnknownExitCount( - L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, - Rewriter, DeadInsts); - }; - - // TODO: We might have proved that we can skip the last iteration for - // this check. In this case, we only want to check the condition on the - // pre-last iteration (MaxExitCount - 1). However, there is a nasty - // corner case: - // - // for (i = len; i != 0; i--) { ... check (i ult X) ... } - // - // If we could not prove that len != 0, then we also could not prove that - // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then - // OptimizeCond will likely not prove anything for it, even if it could - // prove the same fact for len. - // - // As a temporary solution, we query both last and pre-last iterations in - // hope that we will be able to prove triviality for at least one of - // them. We can stop querying MaxExitCount for this case once SCEV - // understands that (MaxExitCount - 1) will not overflow here. - if (OptimizeCond(false, false) || OptimizeCond(true, false)) - Changed = true; - else if (SkipLastIter) - if (OptimizeCond(false, true) || OptimizeCond(true, true)) - Changed = true; - continue; - } - - if (MaxExitCount == ExitCount) - // If the loop has more than 1 iteration, all further checks will be - // executed 1 iteration less. - SkipLastIter = true; - + if (isa<SCEVCouldNotCompute>(ExitCount)) { + // Okay, we do not know the exit count here. Can we at least prove that it + // will remain the same within iteration space? + auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); + auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { + return optimizeLoopExitWithUnknownExitCount( + L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, + Rewriter, DeadInsts); + }; + + // TODO: We might have proved that we can skip the last iteration for + // this check. In this case, we only want to check the condition on the + // pre-last iteration (MaxExitCount - 1). However, there is a nasty + // corner case: + // + // for (i = len; i != 0; i--) { ... check (i ult X) ... } + // + // If we could not prove that len != 0, then we also could not prove that + // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then + // OptimizeCond will likely not prove anything for it, even if it could + // prove the same fact for len. + // + // As a temporary solution, we query both last and pre-last iterations in + // hope that we will be able to prove triviality for at least one of + // them. We can stop querying MaxExitCount for this case once SCEV + // understands that (MaxExitCount - 1) will not overflow here. + if (OptimizeCond(false, false) || OptimizeCond(true, false)) + Changed = true; + else if (SkipLastIter) + if (OptimizeCond(false, true) || OptimizeCond(true, true)) + Changed = true; + continue; + } + + if (MaxExitCount == ExitCount) + // If the loop has more than 1 iteration, all further checks will be + // executed 1 iteration less. + SkipLastIter = true; + // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. // TODO: Given we know the backedge can't be taken, we should go ahead // and break it. Or at least, kill all the header phis and simplify. if (ExitCount->isZero()) { - foldExit(L, ExitingBB, true, DeadInsts); + foldExit(L, ExitingBB, true, DeadInsts); Changed = true; continue; } @@ -1523,7 +1523,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // one? if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxExitCount, ExitCount)) { - foldExit(L, ExitingBB, false, DeadInsts); + foldExit(L, ExitingBB, false, DeadInsts); Changed = true; continue; } @@ -1533,7 +1533,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // exiting iteration, but (from the visit order) strictly follows another // which does the same and is thus dead. if (!DominatingExitCounts.insert(ExitCount).second) { - foldExit(L, ExitingBB, false, DeadInsts); + foldExit(L, ExitingBB, false, DeadInsts); Changed = true; continue; } @@ -1789,9 +1789,9 @@ bool IndVarSimplify::run(Loop *L) { if (optimizeLoopExits(L, Rewriter)) { Changed = true; // Given we've changed exit counts, notify SCEV - // Some nested loops may share same folded exit basic block, - // thus we need to notify top most loop. - SE->forgetTopmostLoop(L); + // Some nested loops may share same folded exit basic block, + // thus we need to notify top most loop. + SE->forgetTopmostLoop(L); } // Try to form loop invariant tests for loop exits by changing how many @@ -1868,15 +1868,15 @@ bool IndVarSimplify::run(Loop *L) { // Now that we're done iterating through lists, clean up any instructions // which are now dead. - while (!DeadInsts.empty()) { - Value *V = DeadInsts.pop_back_val(); - - if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) - Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); - else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) + while (!DeadInsts.empty()) { + Value *V = DeadInsts.pop_back_val(); + + if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) + Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); + else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); - } + } // The Rewriter may not be used from this point on. @@ -1926,8 +1926,8 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, Function *F = L.getHeader()->getParent(); const DataLayout &DL = F->getParent()->getDataLayout(); - IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, - WidenIndVars && AllowIVWidening); + IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, + WidenIndVars && AllowIVWidening); if (!IVS.run(&L)) return PreservedAnalyses::all(); @@ -1964,7 +1964,7 @@ struct IndVarSimplifyLegacyPass : public LoopPass { if (MSSAAnalysis) MSSA = &MSSAAnalysis->getMSSA(); - IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); + IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); return IVS.run(L); } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 6e09dec198..321f44932a 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -52,7 +52,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" @@ -113,9 +113,9 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden, static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks", cl::Hidden, cl::init(false)); -static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations", - cl::Hidden, cl::init(10)); - +static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations", + cl::Hidden, cl::init(10)); + static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch", cl::Hidden, cl::init(true)); @@ -228,27 +228,27 @@ public: SmallVectorImpl<InductiveRangeCheck> &Checks); }; -struct LoopStructure; - +struct LoopStructure; + class InductiveRangeCheckElimination { ScalarEvolution &SE; BranchProbabilityInfo *BPI; DominatorTree &DT; LoopInfo &LI; - using GetBFIFunc = - llvm::Optional<llvm::function_ref<llvm::BlockFrequencyInfo &()> >; - GetBFIFunc GetBFI; - - // Returns true if it is profitable to do a transform basing on estimation of - // number of iterations. - bool isProfitableToTransform(const Loop &L, LoopStructure &LS); - + using GetBFIFunc = + llvm::Optional<llvm::function_ref<llvm::BlockFrequencyInfo &()> >; + GetBFIFunc GetBFI; + + // Returns true if it is profitable to do a transform basing on estimation of + // number of iterations. + bool isProfitableToTransform(const Loop &L, LoopStructure &LS); + public: InductiveRangeCheckElimination(ScalarEvolution &SE, BranchProbabilityInfo *BPI, DominatorTree &DT, - LoopInfo &LI, GetBFIFunc GetBFI = None) - : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {} + LoopInfo &LI, GetBFIFunc GetBFI = None) + : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {} bool run(Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop); }; @@ -505,8 +505,8 @@ struct LoopStructure { return Result; } - static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &, Loop &, - const char *&); + static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &, Loop &, + const char *&); }; /// This class is used to constrain loops to run within a given iteration space. @@ -750,7 +750,7 @@ static bool isSafeIncreasingBound(const SCEV *Start, } Optional<LoopStructure> -LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L, +LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L, const char *&FailureReason) { if (!L.isLoopSimplifyForm()) { FailureReason = "loop not in LoopSimplify form"; @@ -1768,25 +1768,25 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) { auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F); LoopInfo &LI = AM.getResult<LoopAnalysis>(F); - // Get BFI analysis result on demand. Please note that modification of - // CFG invalidates this analysis and we should handle it. - auto getBFI = [&F, &AM ]()->BlockFrequencyInfo & { - return AM.getResult<BlockFrequencyAnalysis>(F); - }; - InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI }); + // Get BFI analysis result on demand. Please note that modification of + // CFG invalidates this analysis and we should handle it. + auto getBFI = [&F, &AM ]()->BlockFrequencyInfo & { + return AM.getResult<BlockFrequencyAnalysis>(F); + }; + InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI }); bool Changed = false; - { - bool CFGChanged = false; - for (const auto &L : LI) { - CFGChanged |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, - /*PreserveLCSSA=*/false); - Changed |= formLCSSARecursively(*L, DT, &LI, &SE); - } - Changed |= CFGChanged; - - if (CFGChanged && !SkipProfitabilityChecks) - AM.invalidate<BlockFrequencyAnalysis>(F); + { + bool CFGChanged = false; + for (const auto &L : LI) { + CFGChanged |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, + /*PreserveLCSSA=*/false); + Changed |= formLCSSARecursively(*L, DT, &LI, &SE); + } + Changed |= CFGChanged; + + if (CFGChanged && !SkipProfitabilityChecks) + AM.invalidate<BlockFrequencyAnalysis>(F); } SmallPriorityWorklist<Loop *, 4> Worklist; @@ -1798,11 +1798,11 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) { while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); - if (IRCE.run(L, LPMAddNewLoop)) { - Changed = true; - if (!SkipProfitabilityChecks) - AM.invalidate<BlockFrequencyAnalysis>(F); - } + if (IRCE.run(L, LPMAddNewLoop)) { + Changed = true; + if (!SkipProfitabilityChecks) + AM.invalidate<BlockFrequencyAnalysis>(F); + } } if (!Changed) @@ -1843,37 +1843,37 @@ bool IRCELegacyPass::runOnFunction(Function &F) { return Changed; } -bool -InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L, - LoopStructure &LS) { - if (SkipProfitabilityChecks) - return true; - if (GetBFI.hasValue()) { - BlockFrequencyInfo &BFI = (*GetBFI)(); - uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency(); - uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency(); - if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) { - LLVM_DEBUG(dbgs() << "irce: could not prove profitability: " - << "the estimated number of iterations basing on " - "frequency info is " << (hFreq / phFreq) << "\n";); - return false; - } - return true; - } - - if (!BPI) - return true; - BranchProbability ExitProbability = - BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx); - if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) { - LLVM_DEBUG(dbgs() << "irce: could not prove profitability: " - << "the exit probability is too big " << ExitProbability - << "\n";); - return false; - } - return true; -} - +bool +InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L, + LoopStructure &LS) { + if (SkipProfitabilityChecks) + return true; + if (GetBFI.hasValue()) { + BlockFrequencyInfo &BFI = (*GetBFI)(); + uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency(); + uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency(); + if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) { + LLVM_DEBUG(dbgs() << "irce: could not prove profitability: " + << "the estimated number of iterations basing on " + "frequency info is " << (hFreq / phFreq) << "\n";); + return false; + } + return true; + } + + if (!BPI) + return true; + BranchProbability ExitProbability = + BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx); + if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) { + LLVM_DEBUG(dbgs() << "irce: could not prove profitability: " + << "the exit probability is too big " << ExitProbability + << "\n";); + return false; + } + return true; +} + bool InductiveRangeCheckElimination::run( Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop) { if (L->getBlocks().size() >= LoopSizeCutoff) { @@ -1913,15 +1913,15 @@ bool InductiveRangeCheckElimination::run( const char *FailureReason = nullptr; Optional<LoopStructure> MaybeLoopStructure = - LoopStructure::parseLoopStructure(SE, *L, FailureReason); + LoopStructure::parseLoopStructure(SE, *L, FailureReason); if (!MaybeLoopStructure.hasValue()) { LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason << "\n";); return false; } LoopStructure LS = MaybeLoopStructure.getValue(); - if (!isProfitableToTransform(*L, LS)) - return false; + if (!isProfitableToTransform(*L, LS)) + return false; const SCEVAddRecExpr *IndVar = cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep))); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp index 332eb10ac1..9127f3c2e0 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -88,7 +88,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/InferAddressSpaces.h" +#include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -109,7 +109,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -164,16 +164,16 @@ public: } bool runOnFunction(Function &F) override; -}; - -class InferAddressSpacesImpl { - const TargetTransformInfo *TTI = nullptr; - const DataLayout *DL = nullptr; - - /// Target specific address space which uses of should be replaced if - /// possible. - unsigned FlatAddrSpace = 0; - +}; + +class InferAddressSpacesImpl { + const TargetTransformInfo *TTI = nullptr; + const DataLayout *DL = nullptr; + + /// Target specific address space which uses of should be replaced if + /// possible. + unsigned FlatAddrSpace = 0; + // Returns the new address space of V if updated; otherwise, returns None. Optional<unsigned> updateAddressSpace(const Value &V, @@ -215,11 +215,11 @@ class InferAddressSpacesImpl { const ValueToValueMapTy &ValueWithNewAddrSpace, SmallVectorImpl<const Use *> *UndefUsesToFix) const; unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; - -public: - InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace) - : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} - bool run(Function &F); + +public: + InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace) + : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} + bool run(Function &F); }; } // end anonymous namespace @@ -295,8 +295,8 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL, case Instruction::IntToPtr: return isNoopPtrIntCastPair(Op, DL, TTI); default: - // That value is an address expression if it has an assumed address space. - return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace; + // That value is an address expression if it has an assumed address space. + return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace; } } @@ -335,9 +335,9 @@ getPointerOperands(const Value &V, const DataLayout &DL, } } -bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, - Value *OldV, - Value *NewV) const { +bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, + Value *OldV, + Value *NewV) const { Module *M = II->getParent()->getParent()->getParent(); switch (II->getIntrinsicID()) { @@ -364,7 +364,7 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, } } -void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( +void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( IntrinsicInst *II, PostorderStackTy &PostorderStack, DenseSet<Value *> &Visited) const { auto IID = II->getIntrinsicID(); @@ -389,7 +389,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( // Returns all flat address expressions in function F. The elements are // If V is an unvisited flat address expression, appends V to PostorderStack // and marks it as visited. -void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( +void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( Value *V, PostorderStackTy &PostorderStack, DenseSet<Value *> &Visited) const { assert(V->getType()->isPointerTy()); @@ -404,8 +404,8 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( return; } - if (V->getType()->getPointerAddressSpace() == FlatAddrSpace && - isAddressExpression(*V, *DL, TTI)) { + if (V->getType()->getPointerAddressSpace() == FlatAddrSpace && + isAddressExpression(*V, *DL, TTI)) { if (Visited.insert(V).second) { PostorderStack.emplace_back(V, false); @@ -423,7 +423,7 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( // Returns all flat address expressions in function F. The elements are ordered // ordered in postorder. std::vector<WeakTrackingVH> -InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { +InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { // This function implements a non-recursive postorder traversal of a partial // use-def graph of function F. PostorderStackTy PostorderStack; @@ -488,12 +488,12 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { } // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().setInt(true); - // Skip values with an assumed address space. - if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) { - for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) { - appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, - Visited); - } + // Skip values with an assumed address space. + if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) { + for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) { + appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, + Visited); + } } } return Postorder; @@ -533,7 +533,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef( // // This may also return nullptr in the case the instruction could not be // rewritten. -Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( +Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( Instruction *I, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, SmallVectorImpl<const Use *> *UndefUsesToFix) const { @@ -568,16 +568,16 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( return nullptr; } - unsigned AS = TTI->getAssumedAddrSpace(I); - if (AS != UninitializedAddressSpace) { - // For the assumed address space, insert an `addrspacecast` to make that - // explicit. - auto *NewPtrTy = I->getType()->getPointerElementType()->getPointerTo(AS); - auto *NewI = new AddrSpaceCastInst(I, NewPtrTy); - NewI->insertAfter(I); - return NewI; - } - + unsigned AS = TTI->getAssumedAddrSpace(I); + if (AS != UninitializedAddressSpace) { + // For the assumed address space, insert an `addrspacecast` to make that + // explicit. + auto *NewPtrTy = I->getType()->getPointerElementType()->getPointerTo(AS); + auto *NewI = new AddrSpaceCastInst(I, NewPtrTy); + NewI->insertAfter(I); + return NewI; + } + // Computes the converted pointer operands. SmallVector<Value *, 4> NewPointerOperands; for (const Use &OperandUse : I->operands()) { @@ -606,7 +606,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); GetElementPtrInst *NewGEP = GetElementPtrInst::Create( GEP->getSourceElementType(), NewPointerOperands[0], - SmallVector<Value *, 4>(GEP->indices())); + SmallVector<Value *, 4>(GEP->indices())); NewGEP->setIsInBounds(GEP->isInBounds()); return NewGEP; } @@ -718,13 +718,13 @@ static Value *cloneConstantExprWithNewAddressSpace( // expression whose address space needs to be modified, in postorder. // // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix. -Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( - Value *V, unsigned NewAddrSpace, - const ValueToValueMapTy &ValueWithNewAddrSpace, - SmallVectorImpl<const Use *> *UndefUsesToFix) const { +Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( + Value *V, unsigned NewAddrSpace, + const ValueToValueMapTy &ValueWithNewAddrSpace, + SmallVectorImpl<const Use *> *UndefUsesToFix) const { // All values in Postorder are flat address expressions. - assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace && - isAddressExpression(*V, *DL, TTI)); + assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace && + isAddressExpression(*V, *DL, TTI)); if (Instruction *I = dyn_cast<Instruction>(V)) { Value *NewV = cloneInstructionWithNewAddressSpace( @@ -744,8 +744,8 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( // Defines the join operation on the address space lattice (see the file header // comments). -unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1, - unsigned AS2) const { +unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1, + unsigned AS2) const { if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace) return FlatAddrSpace; @@ -758,7 +758,7 @@ unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1, return (AS1 == AS2) ? AS1 : FlatAddrSpace; } -bool InferAddressSpacesImpl::run(Function &F) { +bool InferAddressSpacesImpl::run(Function &F) { DL = &F.getParent()->getDataLayout(); if (AssumeDefaultIsFlatAddressSpace) @@ -785,7 +785,7 @@ bool InferAddressSpacesImpl::run(Function &F) { // Constants need to be tracked through RAUW to handle cases with nested // constant expressions, so wrap values in WeakTrackingVH. -void InferAddressSpacesImpl::inferAddressSpaces( +void InferAddressSpacesImpl::inferAddressSpaces( ArrayRef<WeakTrackingVH> Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const { SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); @@ -829,7 +829,7 @@ void InferAddressSpacesImpl::inferAddressSpaces( } } -Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace( +Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace( const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const { assert(InferredAddrSpace.count(&V)); @@ -867,24 +867,24 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace( else NewAS = joinAddressSpaces(Src0AS, Src1AS); } else { - unsigned AS = TTI->getAssumedAddrSpace(&V); - if (AS != UninitializedAddressSpace) { - // Use the assumed address space directly. - NewAS = AS; - } else { - // Otherwise, infer the address space from its pointer operands. - for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) { - auto I = InferredAddrSpace.find(PtrOperand); - unsigned OperandAS = - I != InferredAddrSpace.end() - ? I->second - : PtrOperand->getType()->getPointerAddressSpace(); - - // join(flat, *) = flat. So we can break if NewAS is already flat. - NewAS = joinAddressSpaces(NewAS, OperandAS); - if (NewAS == FlatAddrSpace) - break; - } + unsigned AS = TTI->getAssumedAddrSpace(&V); + if (AS != UninitializedAddressSpace) { + // Use the assumed address space directly. + NewAS = AS; + } else { + // Otherwise, infer the address space from its pointer operands. + for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) { + auto I = InferredAddrSpace.find(PtrOperand); + unsigned OperandAS = + I != InferredAddrSpace.end() + ? I->second + : PtrOperand->getType()->getPointerAddressSpace(); + + // join(flat, *) = flat. So we can break if NewAS is already flat. + NewAS = joinAddressSpaces(NewAS, OperandAS); + if (NewAS == FlatAddrSpace) + break; + } } } @@ -975,8 +975,8 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, // \p returns true if it is OK to change the address space of constant \p C with // a ConstantExpr addrspacecast. -bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C, - unsigned NewAS) const { +bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C, + unsigned NewAS) const { assert(NewAS != UninitializedAddressSpace); unsigned SrcAS = C->getType()->getPointerAddressSpace(); @@ -1015,7 +1015,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I, return I; } -bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( +bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its @@ -1026,12 +1026,12 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( SmallVector<const Use *, 32> UndefUsesToFix; for (Value* V : Postorder) { unsigned NewAddrSpace = InferredAddrSpace.lookup(V); - - // In some degenerate cases (e.g. invalid IR in unreachable code), we may - // not even infer the value to have its original address space. - if (NewAddrSpace == UninitializedAddressSpace) - continue; - + + // In some degenerate cases (e.g. invalid IR in unreachable code), we may + // not even infer the value to have its original address space. + if (NewAddrSpace == UninitializedAddressSpace) + continue; + if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { Value *New = cloneValueWithNewAddressSpace( V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); @@ -1097,9 +1097,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( } User *CurUser = U.getUser(); - // Skip if the current user is the new value itself. - if (CurUser == NewV) - continue; + // Skip if the current user is the new value itself. + if (CurUser == NewV) + continue; // Handle more complex cases like intrinsic that need to be remangled. if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) { if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV)) @@ -1186,34 +1186,34 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( return true; } -bool InferAddressSpaces::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - - return InferAddressSpacesImpl( - &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), - FlatAddrSpace) - .run(F); -} - +bool InferAddressSpaces::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + return InferAddressSpacesImpl( + &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), + FlatAddrSpace) + .run(F); +} + FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) { return new InferAddressSpaces(AddressSpace); } - -InferAddressSpacesPass::InferAddressSpacesPass() - : FlatAddrSpace(UninitializedAddressSpace) {} -InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace) - : FlatAddrSpace(AddressSpace) {} - -PreservedAnalyses InferAddressSpacesPass::run(Function &F, - FunctionAnalysisManager &AM) { - bool Changed = - InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace) - .run(F); - if (Changed) { - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; - } - return PreservedAnalyses::all(); -} + +InferAddressSpacesPass::InferAddressSpacesPass() + : FlatAddrSpace(UninitializedAddressSpace) {} +InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace) + : FlatAddrSpace(AddressSpace) {} + +PreservedAnalyses InferAddressSpacesPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = + InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace) + .run(F); + if (Changed) { + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; + } + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp index c11d2e4c1d..aeb83643b6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp @@ -20,10 +20,10 @@ #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/Local.h" - + using namespace llvm; #define DEBUG_TYPE "instsimplify" diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp index 10b08b4e22..3e86ad4c14 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp @@ -32,7 +32,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -105,11 +105,11 @@ static cl::opt<bool> PrintLVIAfterJumpThreading( cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden); -static cl::opt<bool> JumpThreadingFreezeSelectCond( - "jump-threading-freeze-select-cond", - cl::desc("Freeze the condition when unfolding select"), cl::init(false), - cl::Hidden); - +static cl::opt<bool> JumpThreadingFreezeSelectCond( + "jump-threading-freeze-select-cond", + cl::desc("Freeze the condition when unfolding select"), cl::init(false), + cl::Hidden); + static cl::opt<bool> ThreadAcrossLoopHeaders( "jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), @@ -139,8 +139,8 @@ namespace { public: static char ID; // Pass identification - JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1) - : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) { + JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1) + : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) { initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); } @@ -154,7 +154,7 @@ namespace { AU.addPreserved<LazyValueInfoWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } void releaseMemory() override { Impl.releaseMemory(); } @@ -174,12 +174,12 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) // Public interface to the Jump Threading pass -FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) { - return new JumpThreading(InsertFr, Threshold); +FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) { + return new JumpThreading(InsertFr, Threshold); } -JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) { - InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr; +JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) { + InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr; DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T); } @@ -313,10 +313,10 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { bool JumpThreading::runOnFunction(Function &F) { if (skipFunction(F)) return false; - auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - // Jump Threading has no sense for the targets with divergent CF - if (TTI->hasBranchDivergence()) - return false; + auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + // Jump Threading has no sense for the targets with divergent CF + if (TTI->hasBranchDivergence()) + return false; auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); @@ -341,10 +341,10 @@ bool JumpThreading::runOnFunction(Function &F) { PreservedAnalyses JumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &TTI = AM.getResult<TargetIRAnalysis>(F); - // Jump Threading has no sense for the targets with divergent CF - if (TTI.hasBranchDivergence()) - return PreservedAnalyses::all(); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + // Jump Threading has no sense for the targets with divergent CF + if (TTI.hasBranchDivergence()) + return PreservedAnalyses::all(); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &LVI = AM.getResult<LazyValueAnalysis>(F); @@ -362,11 +362,11 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(), std::move(BFI), std::move(BPI)); - if (PrintLVIAfterJumpThreading) { - dbgs() << "LVI for function '" << F.getName() << "':\n"; - LVI.printLVI(F, DTU.getDomTree(), dbgs()); - } - + if (PrintLVIAfterJumpThreading) { + dbgs() << "LVI for function '" << F.getName() << "':\n"; + LVI.printLVI(F, DTU.getDomTree(), dbgs()); + } + if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -419,7 +419,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, Unreachable.insert(&BB); if (!ThreadAcrossLoopHeaders) - findLoopHeaders(F); + findLoopHeaders(F); bool EverChanged = false; bool Changed; @@ -428,7 +428,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, for (auto &BB : F) { if (Unreachable.count(&BB)) continue; - while (processBlock(&BB)) // Thread all of the branches we can over BB. + while (processBlock(&BB)) // Thread all of the branches we can over BB. Changed = true; // Jump threading may have introduced redundant debug values into BB @@ -443,7 +443,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, continue; if (pred_empty(&BB)) { - // When processBlock makes BB unreachable it doesn't bother to fix up + // When processBlock makes BB unreachable it doesn't bother to fix up // the instructions in it. We must remove BB to prevent invalid IR. LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName() << "' with terminator: " << *BB.getTerminator() @@ -455,7 +455,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, continue; } - // processBlock doesn't thread BBs with unconditional TIs. However, if BB + // processBlock doesn't thread BBs with unconditional TIs. However, if BB // is "almost empty", we attempt to merge BB with its sole successor. auto *BI = dyn_cast<BranchInst>(BB.getTerminator()); if (BI && BI->isUnconditional()) { @@ -489,7 +489,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // at the end of block. RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. -static void replaceFoldableUses(Instruction *Cond, Value *ToVal) { +static void replaceFoldableUses(Instruction *Cond, Value *ToVal) { assert(Cond->getType() == ToVal->getType()); auto *BB = Cond->getParent(); // We can unconditionally replace all uses in non-local blocks (i.e. uses @@ -553,18 +553,18 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, // Debugger intrinsics don't incur code size. if (isa<DbgInfoIntrinsic>(I)) continue; - // Pseudo-probes don't incur code size. - if (isa<PseudoProbeInst>(I)) - continue; - + // Pseudo-probes don't incur code size. + if (isa<PseudoProbeInst>(I)) + continue; + // If this is a pointer->pointer bitcast, it is free. if (isa<BitCastInst>(I) && I->getType()->isPointerTy()) continue; - // Freeze instruction is free, too. - if (isa<FreezeInst>(I)) - continue; - + // Freeze instruction is free, too. + if (isa<FreezeInst>(I)) + continue; + // Bail out if this instruction gives back a token type, it is not possible // to duplicate it if it is used outside this BB. if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB)) @@ -592,7 +592,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, return Size > Bonus ? Size - Bonus : 0; } -/// findLoopHeaders - We do not want jump threading to turn proper loop +/// findLoopHeaders - We do not want jump threading to turn proper loop /// structures into irreducible loops. Doing this breaks up the loop nesting /// hierarchy and pessimizes later transformations. To prevent this from /// happening, we first have to find the loop headers. Here we approximate this @@ -606,7 +606,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, /// within the loop (forming a nested loop). This simple analysis is not rich /// enough to track all of these properties and keep it up-to-date as the CFG /// mutates, so we don't allow any of these transformations. -void JumpThreadingPass::findLoopHeaders(Function &F) { +void JumpThreadingPass::findLoopHeaders(Function &F) { SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; FindFunctionBackedges(F, Edges); @@ -633,13 +633,13 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { return dyn_cast<ConstantInt>(Val); } -/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see +/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see /// if we can infer that the value is a known ConstantInt/BlockAddress or undef /// in any of our predecessors. If so, return the known list of value and pred /// BB in the result vector. /// /// This returns true if there were any known values. -bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( +bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( Value *V, BasicBlock *BB, PredValueInfo &Result, ConstantPreference Preference, DenseSet<Value *> &RecursionSet, Instruction *CxtI) { @@ -704,10 +704,10 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( return !Result.empty(); } - // Handle Cast instructions. + // Handle Cast instructions. if (CastInst *CI = dyn_cast<CastInst>(I)) { Value *Source = CI->getOperand(0); - computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference, + computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference, RecursionSet, CxtI); if (Result.empty()) return false; @@ -719,18 +719,18 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( return true; } - if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) { - Value *Source = FI->getOperand(0); - computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference, - RecursionSet, CxtI); - - erase_if(Result, [](auto &Pair) { - return !isGuaranteedNotToBeUndefOrPoison(Pair.first); - }); - - return !Result.empty(); - } - + if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) { + Value *Source = FI->getOperand(0); + computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference, + RecursionSet, CxtI); + + erase_if(Result, [](auto &Pair) { + return !isGuaranteedNotToBeUndefOrPoison(Pair.first); + }); + + return !Result.empty(); + } + // Handle some boolean conditions. if (I->getType()->getPrimitiveSizeInBits() == 1) { assert(Preference == WantInteger && "One-bit non-integer type?"); @@ -740,9 +740,9 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( I->getOpcode() == Instruction::And) { PredValueInfoTy LHSVals, RHSVals; - computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals, + computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals, WantInteger, RecursionSet, CxtI); - computeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals, + computeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals, WantInteger, RecursionSet, CxtI); if (LHSVals.empty() && RHSVals.empty()) @@ -778,7 +778,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( if (I->getOpcode() == Instruction::Xor && isa<ConstantInt>(I->getOperand(1)) && cast<ConstantInt>(I->getOperand(1))->isOne()) { - computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result, + computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result, WantInteger, RecursionSet, CxtI); if (Result.empty()) return false; @@ -796,7 +796,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( && "A binary operator creating a block address?"); if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { PredValueInfoTy LHSVals; - computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals, + computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals, WantInteger, RecursionSet, CxtI); // Try to use constant folding to simplify the binary operator. @@ -930,7 +930,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( // Try to find a constant value for the LHS of a comparison, // and evaluate it statically if we can. PredValueInfoTy LHSVals; - computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals, + computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals, WantInteger, RecursionSet, CxtI); for (const auto &LHSVal : LHSVals) { @@ -951,7 +951,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference); PredValueInfoTy Conds; if ((TrueVal || FalseVal) && - computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds, + computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds, WantInteger, RecursionSet, CxtI)) { for (auto &C : Conds) { Constant *Cond = C.first; @@ -979,8 +979,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( } // If all else fails, see if LVI can figure out a constant value for us. - assert(CxtI->getParent() == BB && "CxtI should be in BB"); - Constant *CI = LVI->getConstant(V, CxtI); + assert(CxtI->getParent() == BB && "CxtI should be in BB"); + Constant *CI = LVI->getConstant(V, CxtI); if (Constant *KC = getKnownConstant(CI, Preference)) { for (BasicBlock *Pred : predecessors(BB)) Result.emplace_back(KC, Pred); @@ -994,7 +994,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( /// /// Since we can pick an arbitrary destination, we pick the successor with the /// fewest predecessors. This should reduce the in-degree of the others. -static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) { +static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) { Instruction *BBTerm = BB->getTerminator(); unsigned MinSucc = 0; BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); @@ -1022,9 +1022,9 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) { return !BA->use_empty(); } -/// processBlock - If there are any predecessors whose control can be threaded +/// processBlock - If there are any predecessors whose control can be threaded /// through to a successor, transform them now. -bool JumpThreadingPass::processBlock(BasicBlock *BB) { +bool JumpThreadingPass::processBlock(BasicBlock *BB) { // If the block is trivially dead, just return and let the caller nuke it. // This simplifies other transformations. if (DTU->isBBPendingDeletion(BB) || @@ -1035,14 +1035,14 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { // successor, merge the blocks. This encourages recursive jump threading // because now the condition in this block can be threaded through // predecessors of our predecessor block. - if (maybeMergeBasicBlockIntoOnlyPred(BB)) + if (maybeMergeBasicBlockIntoOnlyPred(BB)) return true; - if (tryToUnfoldSelectInCurrBB(BB)) + if (tryToUnfoldSelectInCurrBB(BB)) return true; // Look if we can propagate guards to predecessors. - if (HasGuards && processGuards(BB)) + if (HasGuards && processGuards(BB)) return true; // What kind of constant we're looking for. @@ -1067,9 +1067,9 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { return false; // Must be an invoke or callbr. } - // Keep track if we constant folded the condition in this invocation. - bool ConstantFolded = false; - + // Keep track if we constant folded the condition in this invocation. + bool ConstantFolded = false; + // Run constant folding to see if we can reduce the condition to a simple // constant. if (Instruction *I = dyn_cast<Instruction>(Condition)) { @@ -1080,16 +1080,16 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { if (isInstructionTriviallyDead(I, TLI)) I->eraseFromParent(); Condition = SimpleVal; - ConstantFolded = true; + ConstantFolded = true; } } - // If the terminator is branching on an undef or freeze undef, we can pick any - // of the successors to branch to. Let getBestDestForJumpOnUndef decide. - auto *FI = dyn_cast<FreezeInst>(Condition); - if (isa<UndefValue>(Condition) || - (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) { - unsigned BestSucc = getBestDestForJumpOnUndef(BB); + // If the terminator is branching on an undef or freeze undef, we can pick any + // of the successors to branch to. Let getBestDestForJumpOnUndef decide. + auto *FI = dyn_cast<FreezeInst>(Condition); + if (isa<UndefValue>(Condition) || + (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) { + unsigned BestSucc = getBestDestForJumpOnUndef(BB); std::vector<DominatorTree::UpdateType> Updates; // Fold the branch/switch. @@ -1107,8 +1107,8 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); DTU->applyUpdatesPermissive(Updates); - if (FI) - FI->eraseFromParent(); + if (FI) + FI->eraseFromParent(); return true; } @@ -1121,8 +1121,8 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { << '\n'); ++NumFolds; ConstantFoldTerminator(BB, true, nullptr, DTU); - if (HasProfileData) - BPI->eraseBlock(BB); + if (HasProfileData) + BPI->eraseBlock(BB); return true; } @@ -1131,9 +1131,9 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { // All the rest of our checks depend on the condition being an instruction. if (!CondInst) { // FIXME: Unify this with code below. - if (processThreadableEdges(Condition, BB, Preference, Terminator)) + if (processThreadableEdges(Condition, BB, Preference, Terminator)) return true; - return ConstantFolded; + return ConstantFolded; } if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { @@ -1174,24 +1174,24 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { auto *CI = Ret == LazyValueInfo::True ? ConstantInt::getTrue(CondCmp->getType()) : ConstantInt::getFalse(CondCmp->getType()); - replaceFoldableUses(CondCmp, CI); + replaceFoldableUses(CondCmp, CI); } DTU->applyUpdatesPermissive( {{DominatorTree::Delete, BB, ToRemoveSucc}}); - if (HasProfileData) - BPI->eraseBlock(BB); + if (HasProfileData) + BPI->eraseBlock(BB); return true; } // We did not manage to simplify this branch, try to see whether // CondCmp depends on a known phi-select pattern. - if (tryToUnfoldSelect(CondCmp, BB)) + if (tryToUnfoldSelect(CondCmp, BB)) return true; } } if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) - if (tryToUnfoldSelect(SI, BB)) + if (tryToUnfoldSelect(SI, BB)) return true; // Check for some cases that are worth simplifying. Right now we want to look @@ -1199,11 +1199,11 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { // we see one, check to see if it's partially redundant. If so, insert a PHI // which can then be used to thread the values. Value *SimplifyValue = CondInst; - - if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue)) - // Look into freeze's operand - SimplifyValue = FI->getOperand(0); - + + if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue)) + // Look into freeze's operand + SimplifyValue = FI->getOperand(0); + if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue)) if (isa<Constant>(CondCmp->getOperand(1))) SimplifyValue = CondCmp->getOperand(0); @@ -1211,7 +1211,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { // TODO: There are other places where load PRE would be profitable, such as // more complex comparisons. if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue)) - if (simplifyPartiallyRedundantLoad(LoadI)) + if (simplifyPartiallyRedundantLoad(LoadI)) return true; // Before threading, try to propagate profile data backwards: @@ -1222,32 +1222,32 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { // Handle a variety of cases where we are branching on something derived from // a PHI node in the current block. If we can prove that any predecessors // compute a predictable value based on a PHI node, thread those predecessors. - if (processThreadableEdges(CondInst, BB, Preference, Terminator)) + if (processThreadableEdges(CondInst, BB, Preference, Terminator)) return true; - // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in - // the current block, see if we can simplify. - PHINode *PN = dyn_cast<PHINode>( - isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0) - : CondInst); - - if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator())) - return processBranchOnPHI(PN); + // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in + // the current block, see if we can simplify. + PHINode *PN = dyn_cast<PHINode>( + isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0) + : CondInst); + if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator())) + return processBranchOnPHI(PN); + // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify. if (CondInst->getOpcode() == Instruction::Xor && CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator())) - return processBranchOnXOR(cast<BinaryOperator>(CondInst)); + return processBranchOnXOR(cast<BinaryOperator>(CondInst)); // Search for a stronger dominating condition that can be used to simplify a // conditional branch leaving BB. - if (processImpliedCondition(BB)) + if (processImpliedCondition(BB)) return true; return false; } -bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) { +bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) { auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -1277,8 +1277,8 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) { UncondBI->setDebugLoc(BI->getDebugLoc()); BI->eraseFromParent(); DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}}); - if (HasProfileData) - BPI->eraseBlock(BB); + if (HasProfileData) + BPI->eraseBlock(BB); return true; } CurrentBB = CurrentPred; @@ -1296,11 +1296,11 @@ static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) { return false; } -/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially +/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially /// redundant load instruction, eliminate it by replacing it with a PHI node. /// This is an important optimization that encourages jump threading, and needs /// to be run interlaced with other jump threading tasks. -bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { +bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // Don't hack volatile and ordered loads. if (!LoadI->isUnordered()) return false; @@ -1470,7 +1470,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { } // Split them out to their own block. - UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split"); + UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split"); } // If the value isn't available in all predecessors, then there will be @@ -1534,11 +1534,11 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { return true; } -/// findMostPopularDest - The specified list contains multiple possible +/// findMostPopularDest - The specified list contains multiple possible /// threadable destinations. Pick the one that occurs the most frequently in /// the list. static BasicBlock * -findMostPopularDest(BasicBlock *BB, +findMostPopularDest(BasicBlock *BB, const SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &PredToDestList) { assert(!PredToDestList.empty()); @@ -1573,7 +1573,7 @@ findMostPopularDest(BasicBlock *BB, // Try to evaluate the value of V when the control flows from PredPredBB to // BB->getSinglePredecessor() and then on to BB. -Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB, +Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *V) { BasicBlock *PredBB = BB->getSinglePredecessor(); @@ -1600,9 +1600,9 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB, if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) { if (CondCmp->getParent() == BB) { Constant *Op0 = - evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0)); + evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0)); Constant *Op1 = - evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1)); + evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1)); if (Op0 && Op1) { return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1); } @@ -1613,7 +1613,7 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB, return nullptr; } -bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, +bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, ConstantPreference Preference, Instruction *CxtI) { // If threading this would thread across a loop header, don't even try to @@ -1622,15 +1622,15 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, return false; PredValueInfoTy PredValues; - if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference, + if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI)) { // We don't have known values in predecessors. See if we can thread through // BB and its sole predecessor. - return maybethreadThroughTwoBasicBlocks(BB, Cond); + return maybethreadThroughTwoBasicBlocks(BB, Cond); } assert(!PredValues.empty() && - "computeValueKnownInPredecessors returned true with no values"); + "computeValueKnownInPredecessors returned true with no values"); LLVM_DEBUG(dbgs() << "IN BB: " << *BB; for (const auto &PredValue : PredValues) { @@ -1722,8 +1722,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, BranchInst::Create(OnlyDest, Term); Term->eraseFromParent(); DTU->applyUpdatesPermissive(Updates); - if (HasProfileData) - BPI->eraseBlock(BB); + if (HasProfileData) + BPI->eraseBlock(BB); // If the condition is now dead due to the removal of the old terminator, // erase it. @@ -1739,7 +1739,7 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, // guard/assume. else if (OnlyVal && OnlyVal != MultipleVal && CondInst->getParent() == BB) - replaceFoldableUses(CondInst, OnlyVal); + replaceFoldableUses(CondInst, OnlyVal); } return true; } @@ -1752,18 +1752,18 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *MostPopularDest = OnlyDest; if (MostPopularDest == MultipleDestSentinel) { - // Remove any loop headers from the Dest list, threadEdge conservatively + // Remove any loop headers from the Dest list, threadEdge conservatively // won't process them, but we might have other destination that are eligible // and we still want to process. erase_if(PredToDestList, [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) { - return LoopHeaders.contains(PredToDest.second); + return LoopHeaders.contains(PredToDest.second); }); if (PredToDestList.empty()) return false; - MostPopularDest = findMostPopularDest(BB, PredToDestList); + MostPopularDest = findMostPopularDest(BB, PredToDestList); } // Now that we know what the most popular destination is, factor all @@ -1785,16 +1785,16 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, // the destination that these predecessors should get to. if (!MostPopularDest) MostPopularDest = BB->getTerminator()-> - getSuccessor(getBestDestForJumpOnUndef(BB)); + getSuccessor(getBestDestForJumpOnUndef(BB)); // Ok, try to thread it! - return tryThreadEdge(BB, PredsToFactor, MostPopularDest); + return tryThreadEdge(BB, PredsToFactor, MostPopularDest); } -/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on -/// a PHI node (or freeze PHI) in the current block. See if there are any -/// simplifications we can do based on inputs to the phi node. -bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) { +/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on +/// a PHI node (or freeze PHI) in the current block. See if there are any +/// simplifications we can do based on inputs to the phi node. +bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) { BasicBlock *BB = PN->getParent(); // TODO: We could make use of this to do it once for blocks with common PHI @@ -1806,16 +1806,16 @@ bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) { // *duplicate* the conditional branch into that block in order to further // encourage jump threading and to eliminate cases where we have branch on a // phi of an icmp (branch on icmp is much better). - // This is still beneficial when a frozen phi is used as the branch condition - // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp)) - // to br(icmp(freeze ...)). + // This is still beneficial when a frozen phi is used as the branch condition + // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp)) + // to br(icmp(freeze ...)). for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = PN->getIncomingBlock(i); if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator())) if (PredBr->isUnconditional()) { PredBBs[0] = PredBB; // Try to duplicate BB into PredBB. - if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs)) + if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs)) return true; } } @@ -1823,10 +1823,10 @@ bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) { return false; } -/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on +/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on /// a xor instruction in the current block. See if there are any /// simplifications we can do based on inputs to the xor. -bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) { +bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) { BasicBlock *BB = BO->getParent(); // If either the LHS or RHS of the xor is a constant, don't do this @@ -1864,17 +1864,17 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) { PredValueInfoTy XorOpValues; bool isLHS = true; - if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues, + if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues, WantInteger, BO)) { assert(XorOpValues.empty()); - if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues, + if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues, WantInteger, BO)) return false; isLHS = false; } assert(!XorOpValues.empty() && - "computeValueKnownInPredecessors returned true with no values"); + "computeValueKnownInPredecessors returned true with no values"); // Scan the information to see which is most popular: true or false. The // predecessors can be of the set true, false, or undef. @@ -1935,13 +1935,13 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) { return false; // Try to duplicate BB into PredBB. - return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); + return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); } -/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new +/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// predecessor to the PHIBB block. If it has PHI nodes, add entries for /// NewPred using the entries from OldPred (suitably mapped). -static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, +static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap<Instruction*, Value*> &ValueMap) { @@ -1962,7 +1962,7 @@ static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, } /// Merge basic block BB into its sole predecessor if possible. -bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) { +bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) { BasicBlock *SinglePred = BB->getSinglePredecessor(); if (!SinglePred) return false; @@ -2013,7 +2013,7 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) { /// Update the SSA form. NewBB contains instructions that are copied from BB. /// ValueMapping maps old values in BB to new ones in NewBB. -void JumpThreadingPass::updateSSA( +void JumpThreadingPass::updateSSA( BasicBlock *BB, BasicBlock *NewBB, DenseMap<Instruction *, Value *> &ValueMapping) { // If there were values defined in BB that are used outside the block, then we @@ -2059,7 +2059,7 @@ void JumpThreadingPass::updateSSA( /// arguments that come from PredBB. Return the map from the variables in the /// source basic block to the variables in the newly created basic block. DenseMap<Instruction *, Value *> -JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, +JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB) { // We are going to have to map operands from the source basic block to the new @@ -2076,15 +2076,15 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, ValueMapping[PN] = NewPN; } - // Clone noalias scope declarations in the threaded block. When threading a - // loop exit, we would otherwise end up with two idential scope declarations - // visible at the same time. - SmallVector<MDNode *> NoAliasScopes; - DenseMap<MDNode *, MDNode *> ClonedScopes; - LLVMContext &Context = PredBB->getContext(); - identifyNoAliasScopesToClone(BI, BE, NoAliasScopes); - cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context); - + // Clone noalias scope declarations in the threaded block. When threading a + // loop exit, we would otherwise end up with two idential scope declarations + // visible at the same time. + SmallVector<MDNode *> NoAliasScopes; + DenseMap<MDNode *, MDNode *> ClonedScopes; + LLVMContext &Context = PredBB->getContext(); + identifyNoAliasScopesToClone(BI, BE, NoAliasScopes); + cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context); + // Clone the non-phi instructions of the source basic block into NewBB, // keeping track of the mapping and using it to remap operands in the cloned // instructions. @@ -2093,7 +2093,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, New->setName(BI->getName()); NewBB->getInstList().push_back(New); ValueMapping[&*BI] = New; - adaptNoAliasScopes(New, ClonedScopes, Context); + adaptNoAliasScopes(New, ClonedScopes, Context); // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) @@ -2108,7 +2108,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, } /// Attempt to thread through two successive basic blocks. -bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, +bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond) { // Consider: // @@ -2177,7 +2177,7 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, BasicBlock *OnePred = nullptr; for (BasicBlock *P : predecessors(PredBB)) { if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>( - evaluateOnPredecessorEdge(BB, P, Cond))) { + evaluateOnPredecessorEdge(BB, P, Cond))) { if (CI->isZero()) { ZeroCount++; ZeroPred = P; @@ -2208,7 +2208,7 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, } // If threading this would thread across a loop header, don't thread the edge. - // See the comments above findLoopHeaders for justifications and caveats. + // See the comments above findLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) { LLVM_DEBUG({ bool BBIsHeader = LoopHeaders.count(BB); @@ -2241,11 +2241,11 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, } // Now we are ready to duplicate PredBB. - threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB); + threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB); return true; } -void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, +void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB) { @@ -2271,12 +2271,12 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them // to account for entry from PredPredBB. DenseMap<Instruction *, Value *> ValueMapping = - cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB); - - // Copy the edge probabilities from PredBB to NewBB. - if (HasProfileData) - BPI->copyEdgeProbabilities(PredBB, NewBB); + cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB); + // Copy the edge probabilities from PredBB to NewBB. + if (HasProfileData) + BPI->copyEdgeProbabilities(PredBB, NewBB); + // Update the terminator of PredPredBB to jump to NewBB instead of PredBB. // This eliminates predecessors from PredPredBB, which requires us to simplify // any PHI nodes in PredBB. @@ -2287,9 +2287,9 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, PredPredTerm->setSuccessor(i, NewBB); } - addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB, + addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB, ValueMapping); - addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB, + addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB, ValueMapping); DTU->applyUpdatesPermissive( @@ -2298,7 +2298,7 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, {DominatorTree::Insert, PredPredBB, NewBB}, {DominatorTree::Delete, PredPredBB, PredBB}}); - updateSSA(PredBB, NewBB, ValueMapping); + updateSSA(PredBB, NewBB, ValueMapping); // Clean up things like PHI nodes with single operands, dead instructions, // etc. @@ -2307,11 +2307,11 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, SmallVector<BasicBlock *, 1> PredsToFactor; PredsToFactor.push_back(NewBB); - threadEdge(BB, PredsToFactor, SuccBB); + threadEdge(BB, PredsToFactor, SuccBB); } -/// tryThreadEdge - Thread an edge if it's safe and profitable to do so. -bool JumpThreadingPass::tryThreadEdge( +/// tryThreadEdge - Thread an edge if it's safe and profitable to do so. +bool JumpThreadingPass::tryThreadEdge( BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs, BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. @@ -2322,7 +2322,7 @@ bool JumpThreadingPass::tryThreadEdge( } // If threading this would thread across a loop header, don't thread the edge. - // See the comments above findLoopHeaders for justifications and caveats. + // See the comments above findLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) { LLVM_DEBUG({ bool BBIsHeader = LoopHeaders.count(BB); @@ -2343,14 +2343,14 @@ bool JumpThreadingPass::tryThreadEdge( return false; } - threadEdge(BB, PredBBs, SuccBB); + threadEdge(BB, PredBBs, SuccBB); return true; } -/// threadEdge - We have decided that it is safe and profitable to factor the +/// threadEdge - We have decided that it is safe and profitable to factor the /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB /// across BB. Transform the IR to reflect this change. -void JumpThreadingPass::threadEdge(BasicBlock *BB, +void JumpThreadingPass::threadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs, BasicBlock *SuccBB) { assert(SuccBB != BB && "Don't create an infinite loop"); @@ -2365,7 +2365,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB, else { LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm"); + PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm"); } // And finally, do it! @@ -2389,7 +2389,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB, // Copy all the instructions from BB to NewBB except the terminator. DenseMap<Instruction *, Value *> ValueMapping = - cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB); + cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB); // We didn't copy the terminator from BB over to NewBB, because there is now // an unconditional jump to SuccBB. Insert the unconditional jump. @@ -2398,7 +2398,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB, // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the // PHI nodes for NewBB now. - addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); + addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); // Update the terminator of PredBB to jump to NewBB instead of BB. This // eliminates predecessors from BB, which requires us to simplify any PHI @@ -2415,7 +2415,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB, {DominatorTree::Insert, PredBB, NewBB}, {DominatorTree::Delete, PredBB, BB}}); - updateSSA(BB, NewBB, ValueMapping); + updateSSA(BB, NewBB, ValueMapping); // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This @@ -2423,7 +2423,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB, SimplifyInstructionsInBlock(NewBB, TLI); // Update the edge weight from BB to SuccBB, which should be less than before. - updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB); + updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB); // Threaded an edge! ++NumThreads; @@ -2432,7 +2432,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB, /// Create a new basic block that will be the predecessor of BB and successor of /// all blocks in Preds. When profile data is available, update the frequency of /// this new block. -BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB, +BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, const char *Suffix) { SmallVector<BasicBlock *, 2> NewBBs; @@ -2493,7 +2493,7 @@ bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { /// Update the block frequency of BB and branch weight and the metadata on the /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 - /// Freq(PredBB->BB) / Freq(BB->SuccBB). -void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB, +void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB, BasicBlock *NewBB, BasicBlock *SuccBB) { @@ -2585,18 +2585,18 @@ void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB, } } -/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch +/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch /// to BB which contains an i1 PHI node and a conditional branch on that PHI. /// If we can duplicate the contents of BB up into PredBB do so now, this /// improves the odds that the branch will be on an analyzable instruction like /// a compare. -bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( +bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) { assert(!PredBBs.empty() && "Can't handle an empty set"); // If BB is a loop header, then duplicating this block outside the loop would // cause us to transform this into an irreducible loop, don't do this. - // See the comments above findLoopHeaders for justifications and caveats. + // See the comments above findLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() << "' into predecessor block '" << PredBBs[0]->getName() @@ -2620,7 +2620,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( else { LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm"); + PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm"); } Updates.push_back({DominatorTree::Delete, PredBB, BB}); @@ -2692,12 +2692,12 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( // Check to see if the targets of the branch had PHI nodes. If so, we need to // add entries to the PHI nodes for branch from PredBB now. BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator()); - addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB, + addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB, ValueMapping); - addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, + addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, ValueMapping); - updateSSA(BB, PredBB, ValueMapping); + updateSSA(BB, PredBB, ValueMapping); // PredBB no longer jumps to BB, remove entries in the PHI node for the edge // that we nuked. @@ -2705,8 +2705,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); - if (HasProfileData) - BPI->copyEdgeProbabilities(BB, PredBB); + if (HasProfileData) + BPI->copyEdgeProbabilities(BB, PredBB); DTU->applyUpdatesPermissive(Updates); ++NumDupes; @@ -2718,7 +2718,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( // a PHI node in BB. SI has no other use. // A new basic block, NewBB, is created and SI is converted to compare and // conditional branch. SI is erased from parent. -void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, +void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx) { // Expand the select. @@ -2753,7 +2753,7 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB); } -bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) { +bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) { PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition()); if (!CondPHI || CondPHI->getParent() != BB) @@ -2765,7 +2765,7 @@ bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) { // The second and third condition can be potentially relaxed. Currently // the conditions help to simplify the code and allow us to reuse existing - // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *) + // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *) if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse()) continue; @@ -2773,13 +2773,13 @@ bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) { if (!PredTerm || !PredTerm->isUnconditional()) continue; - unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I); + unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I); return true; } return false; } -/// tryToUnfoldSelect - Look for blocks of the form +/// tryToUnfoldSelect - Look for blocks of the form /// bb1: /// %a = select /// br bb2 @@ -2791,7 +2791,7 @@ bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) { /// /// And expand the select into a branch structure if one of its arms allows %c /// to be folded. This later enables threading from bb1 over bb2. -bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { +bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0)); Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1)); @@ -2825,14 +2825,14 @@ bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { if ((LHSFolds != LazyValueInfo::Unknown || RHSFolds != LazyValueInfo::Unknown) && LHSFolds != RHSFolds) { - unfoldSelectInstr(Pred, BB, SI, CondLHS, I); + unfoldSelectInstr(Pred, BB, SI, CondLHS, I); return true; } } return false; } -/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the +/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the /// same BB in the form /// bb: /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ... @@ -2852,14 +2852,14 @@ bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { /// select if the associated PHI has at least one constant. If the unfolded /// select is not jump-threaded, it will be folded again in the later /// optimizations. -bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) { - // This transform would reduce the quality of msan diagnostics. +bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) { + // This transform would reduce the quality of msan diagnostics. // Disable this transform under MemorySanitizer. if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory)) return false; // If threading this would thread across a loop header, don't thread the edge. - // See the comments above findLoopHeaders for justifications and caveats. + // See the comments above findLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) return false; @@ -2902,12 +2902,12 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) { if (!SI) continue; // Expand the select. - Value *Cond = SI->getCondition(); - if (InsertFreezeWhenUnfoldingSelect && - !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI, - &DTU->getDomTree())) - Cond = new FreezeInst(Cond, "cond.fr", SI); - Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false); + Value *Cond = SI->getCondition(); + if (InsertFreezeWhenUnfoldingSelect && + !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI, + &DTU->getDomTree())) + Cond = new FreezeInst(Cond, "cond.fr", SI); + Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false); BasicBlock *SplitBB = SI->getParent(); BasicBlock *NewBB = Term->getParent(); PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); @@ -2951,7 +2951,7 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) { /// And cond either implies condGuard or !condGuard. In this case all the /// instructions before the guard can be duplicated in both branches, and the /// guard is then threaded to one of them. -bool JumpThreadingPass::processGuards(BasicBlock *BB) { +bool JumpThreadingPass::processGuards(BasicBlock *BB) { using namespace PatternMatch; // We only want to deal with two predecessors. @@ -2976,7 +2976,7 @@ bool JumpThreadingPass::processGuards(BasicBlock *BB) { if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator())) for (auto &I : *BB) - if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI)) + if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI)) return true; return false; @@ -2985,7 +2985,7 @@ bool JumpThreadingPass::processGuards(BasicBlock *BB) { /// Try to propagate the guard from BB which is the lower block of a diamond /// to one of its branches, in case if diamond's condition implies guard's /// condition. -bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard, +bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI) { assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?"); assert(BI->isConditional() && "Unconditional branch has 2 successors?"); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp index d2b4ba296f..6db37000d4 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp @@ -12,13 +12,13 @@ // safe. This pass also promotes must-aliased memory locations in the loop to // live in registers, thus hoisting and sinking "invariant" loads and stores. // -// Hoisting operations out of loops is a canonicalization transform. It -// enables and simplifies subsequent optimizations in the middle-end. -// Rematerialization of hoisted instructions to reduce register pressure is the -// responsibility of the back-end, which has more accurate information about -// register pressure and also handles other optimizations than LICM that -// increase live-ranges. -// +// Hoisting operations out of loops is a canonicalization transform. It +// enables and simplifies subsequent optimizations in the middle-end. +// Rematerialization of hoisted instructions to reduce register pressure is the +// responsibility of the back-end, which has more accurate information about +// register pressure and also handles other optimizations than LICM that +// increase live-ranges. +// // This pass uses alias analysis for two purposes: // // 1. Moving loop invariant loads and calls out of loops. If we can determine @@ -42,12 +42,12 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/GuardUtils.h" -#include "llvm/Analysis/LazyBlockFrequencyInfo.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -107,11 +107,11 @@ static cl::opt<bool> ControlFlowHoisting( "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); -static cl::opt<unsigned> HoistSinkColdnessThreshold( - "licm-coldness-threshold", cl::Hidden, cl::init(4), - cl::desc("Relative coldness Threshold of hoisting/sinking destination " - "block for LICM to be considered beneficial")); - +static cl::opt<unsigned> HoistSinkColdnessThreshold( + "licm-coldness-threshold", cl::Hidden, cl::init(4), + cl::desc("Relative coldness Threshold of hoisting/sinking destination " + "block for LICM to be considered beneficial")); + static cl::opt<uint32_t> MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -157,9 +157,9 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, - BlockFrequencyInfo *BFI, const Loop *CurLoop, - ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, - OptimizationRemarkEmitter *ORE); + BlockFrequencyInfo *BFI, const Loop *CurLoop, + ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, + OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, @@ -170,10 +170,10 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA); static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, Instruction &I, + Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags); -static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, - MemoryUse &MU); +static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); @@ -188,8 +188,8 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest, namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, - BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, - TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, + BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, + TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE); LoopInvariantCodeMotion(unsigned LicmMssaOptCap, @@ -221,30 +221,30 @@ struct LegacyLICMPass : public LoopPass { if (skipLoop(L)) return false; - LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block " - << L->getHeader()->getNameOrAsOperand() << "\n"); - + LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block " + << L->getHeader()->getNameOrAsOperand() << "\n"); + auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); MemorySSA *MSSA = EnableMSSALoopDependency ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA()) : nullptr; - bool hasProfileData = L->getHeader()->getParent()->hasProfileData(); - BlockFrequencyInfo *BFI = - hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() - : nullptr; + bool hasProfileData = L->getHeader()->getParent()->hasProfileData(); + BlockFrequencyInfo *BFI = + hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() + : nullptr; // For the old PM, we can't use OptimizationRemarkEmitter as an analysis - // pass. Function analyses need to be preserved across loop transformations + // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - return LICM.runOnLoop( - L, &getAnalysis<AAResultsWrapperPass>().getAAResults(), - &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), - &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI, - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( - *L->getHeader()->getParent()), - &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( - *L->getHeader()->getParent()), - SE ? &SE->getSE() : nullptr, MSSA, &ORE); + return LICM.runOnLoop( + L, &getAnalysis<AAResultsWrapperPass>().getAAResults(), + &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), + &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI, + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( + *L->getHeader()->getParent()), + &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( + *L->getHeader()->getParent()), + SE ? &SE->getSE() : nullptr, MSSA, &ORE); } /// This transformation requires natural loop information & requires that @@ -260,9 +260,9 @@ struct LegacyLICMPass : public LoopPass { } AU.addRequired<TargetTransformInfoWrapperPass>(); getLoopAnalysisUsage(AU); - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); - AU.addPreserved<LazyBlockFrequencyInfoPass>(); - AU.addPreserved<LazyBranchProbabilityInfoPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); + AU.addPreserved<LazyBlockFrequencyInfoPass>(); + AU.addPreserved<LazyBranchProbabilityInfoPass>(); } private: @@ -278,8 +278,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); - if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, - &AR.SE, AR.MSSA, &ORE)) + if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, + &AR.SE, AR.MSSA, &ORE)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -299,7 +299,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LazyBFIPass) +INITIALIZE_PASS_DEPENDENCY(LazyBFIPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) @@ -309,42 +309,42 @@ Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); } -llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, - MemorySSA *MSSA) - : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap, - IsSink, L, MSSA) {} - -llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags( - unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, - Loop *L, MemorySSA *MSSA) - : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), - IsSink(IsSink) { - assert(((L != nullptr) == (MSSA != nullptr)) && - "Unexpected values for SinkAndHoistLICMFlags"); - if (!MSSA) - return; - - unsigned AccessCapCount = 0; - for (auto *BB : L->getBlocks()) - if (const auto *Accesses = MSSA->getBlockAccesses(BB)) - for (const auto &MA : *Accesses) { - (void)MA; - ++AccessCapCount; - if (AccessCapCount > LicmMssaNoAccForPromotionCap) { - NoOfMemAccTooLarge = true; - return; - } - } -} - +llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, + MemorySSA *MSSA) + : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap, + IsSink, L, MSSA) {} + +llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags( + unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, + Loop *L, MemorySSA *MSSA) + : LicmMssaOptCap(LicmMssaOptCap), + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + IsSink(IsSink) { + assert(((L != nullptr) == (MSSA != nullptr)) && + "Unexpected values for SinkAndHoistLICMFlags"); + if (!MSSA) + return; + + unsigned AccessCapCount = 0; + for (auto *BB : L->getBlocks()) + if (const auto *Accesses = MSSA->getBlockAccesses(BB)) + for (const auto &MA : *Accesses) { + (void)MA; + ++AccessCapCount; + if (AccessCapCount > LicmMssaNoAccForPromotionCap) { + NoOfMemAccTooLarge = true; + return; + } + } +} + /// Hoist expressions out of the specified loop. Note, alias info for inner /// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. bool LoopInvariantCodeMotion::runOnLoop( Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, - BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, - ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) { + BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -357,18 +357,18 @@ bool LoopInvariantCodeMotion::runOnLoop( std::unique_ptr<AliasSetTracker> CurAST; std::unique_ptr<MemorySSAUpdater> MSSAU; - std::unique_ptr<SinkAndHoistLICMFlags> Flags; + std::unique_ptr<SinkAndHoistLICMFlags> Flags; if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); CurAST = collectAliasInfoForLoop(L, LI, AA); - Flags = std::make_unique<SinkAndHoistLICMFlags>( - LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true); + Flags = std::make_unique<SinkAndHoistLICMFlags>( + LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true); } else { LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n"); MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); - Flags = std::make_unique<SinkAndHoistLICMFlags>( - LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA); + Flags = std::make_unique<SinkAndHoistLICMFlags>( + LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA); } // Get the preheader block to move instructions into... @@ -388,14 +388,14 @@ bool LoopInvariantCodeMotion::runOnLoop( // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. if (L->hasDedicatedExits()) - Changed |= - sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE); - Flags->setIsSink(false); + Changed |= + sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, + CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE); + Flags->setIsSink(false); if (Preheader) - Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, - CurAST.get(), MSSAU.get(), SE, &SafetyInfo, - *Flags.get(), ORE); + Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, + CurAST.get(), MSSAU.get(), SE, &SafetyInfo, + *Flags.get(), ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -405,7 +405,7 @@ bool LoopInvariantCodeMotion::runOnLoop( // preheader for SSA updater, so also avoid sinking when no preheader // is available. if (!DisablePromotion && Preheader && L->hasDedicatedExits() && - !Flags->tooManyMemoryAccesses()) { + !Flags->tooManyMemoryAccesses()) { // Figure out the loop exits and their insertion points SmallVector<BasicBlock *, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -474,7 +474,7 @@ bool LoopInvariantCodeMotion::runOnLoop( // specifically moving instructions across the loop boundary and so it is // especially in need of sanity checking here. assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); - assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && + assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && "Parent loop not left in LCSSA form after LICM!"); if (MSSAU.get() && VerifyMemorySSA) @@ -491,10 +491,10 @@ bool LoopInvariantCodeMotion::runOnLoop( /// definitions, allowing us to sink a loop body in one pass without iteration. /// bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, - DominatorTree *DT, BlockFrequencyInfo *BFI, - TargetLibraryInfo *TLI, TargetTransformInfo *TTI, - Loop *CurLoop, AliasSetTracker *CurAST, - MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, + DominatorTree *DT, BlockFrequencyInfo *BFI, + TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + Loop *CurLoop, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { @@ -543,7 +543,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, ORE)) { - if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { + if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { if (!FreeInLoop) { ++II; salvageDebugInfo(I); @@ -627,7 +627,7 @@ public: else if (!TrueDestSucc.empty()) { Function *F = TrueDest->getParent(); auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); }; - auto It = llvm::find_if(*F, IsSucc); + auto It = llvm::find_if(*F, IsSucc); assert(It != F->end() && "Could not find successor in function"); CommonSucc = &*It; } @@ -695,15 +695,15 @@ public: return BB != Pair.second && (Pair.first->getSuccessor(0) == BB || Pair.first->getSuccessor(1) == BB); }; - auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor); + auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor); // If not involved in a pending branch, hoist to preheader BasicBlock *InitialPreheader = CurLoop->getLoopPreheader(); if (It == HoistableBranches.end()) { - LLVM_DEBUG(dbgs() << "LICM using " - << InitialPreheader->getNameOrAsOperand() - << " as hoist destination for " - << BB->getNameOrAsOperand() << "\n"); + LLVM_DEBUG(dbgs() << "LICM using " + << InitialPreheader->getNameOrAsOperand() + << " as hoist destination for " + << BB->getNameOrAsOperand() << "\n"); HoistDestinationMap[BB] = InitialPreheader; return InitialPreheader; } @@ -788,43 +788,43 @@ public: }; } // namespace -// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only -// only worthwhile if the destination block is actually colder than current -// block. -static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock, - OptimizationRemarkEmitter *ORE, - BlockFrequencyInfo *BFI) { - // Check block frequency only when runtime profile is available - // to avoid pathological cases. With static profile, lean towards - // hosting because it helps canonicalize the loop for vectorizer. - if (!DstBlock->getParent()->hasProfileData()) - return true; - - if (!HoistSinkColdnessThreshold || !BFI) - return true; - - BasicBlock *SrcBlock = I.getParent(); - if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold > - BFI->getBlockFreq(SrcBlock).getFrequency()) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I) - << "failed to sink or hoist instruction because containing block " - "has lower frequency than destination block"; - }); - return false; - } - - return true; -} - +// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only +// only worthwhile if the destination block is actually colder than current +// block. +static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock, + OptimizationRemarkEmitter *ORE, + BlockFrequencyInfo *BFI) { + // Check block frequency only when runtime profile is available + // to avoid pathological cases. With static profile, lean towards + // hosting because it helps canonicalize the loop for vectorizer. + if (!DstBlock->getParent()->hasProfileData()) + return true; + + if (!HoistSinkColdnessThreshold || !BFI) + return true; + + BasicBlock *SrcBlock = I.getParent(); + if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold > + BFI->getBlockFreq(SrcBlock).getFrequency()) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I) + << "failed to sink or hoist instruction because containing block " + "has lower frequency than destination block"; + }); + return false; + } + + return true; +} + /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, - DominatorTree *DT, BlockFrequencyInfo *BFI, - TargetLibraryInfo *TLI, Loop *CurLoop, + DominatorTree *DT, BlockFrequencyInfo *BFI, + TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, @@ -875,15 +875,15 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, // Try hoisting the instruction out to the preheader. We can only do // this if all of the operands of the instruction are loop invariant and - // if it is safe to hoist the instruction. We also check block frequency - // to make sure instruction only gets hoisted into colder blocks. + // if it is safe to hoist the instruction. We also check block frequency + // to make sure instruction only gets hoisted into colder blocks. // TODO: It may be safe to hoist if we are hoisting to a conditional block // and we have accurately duplicated the control flow from the loop header // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, ORE) && - worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) && + worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { @@ -982,7 +982,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, HoistPoint = Dominator->getTerminator(); } LLVM_DEBUG(dbgs() << "LICM rehoisting to " - << HoistPoint->getParent()->getNameOrAsOperand() + << HoistPoint->getParent()->getNameOrAsOperand() << ": " << *I << "\n"); moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE); HoistPoint = I; @@ -1014,20 +1014,20 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop) { Value *Addr = LI->getOperand(0); const DataLayout &DL = LI->getModule()->getDataLayout(); - const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); - - // It is not currently possible for clang to generate an invariant.start - // intrinsic with scalable vector types because we don't support thread local - // sizeless types and we don't permit sizeless types in structs or classes. - // Furthermore, even if support is added for this in future the intrinsic - // itself is defined to have a size of -1 for variable sized objects. This - // makes it impossible to verify if the intrinsic envelops our region of - // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8> - // types would have a -1 parameter, but the former is clearly double the size - // of the latter. - if (LocSizeInBits.isScalable()) - return false; - + const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); + + // It is not currently possible for clang to generate an invariant.start + // intrinsic with scalable vector types because we don't support thread local + // sizeless types and we don't permit sizeless types in structs or classes. + // Furthermore, even if support is added for this in future the intrinsic + // itself is defined to have a size of -1 for variable sized objects. This + // makes it impossible to verify if the intrinsic envelops our region of + // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8> + // types would have a -1 parameter, but the former is clearly double the size + // of the latter. + if (LocSizeInBits.isScalable()) + return false; + // if the type is i8 addrspace(x)*, we know this is the type of // llvm.invariant.start operand auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), @@ -1056,17 +1056,17 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || !II->use_empty()) continue; - ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0)); - // The intrinsic supports having a -1 argument for variable sized objects - // so we should check for that here. - if (InvariantSize->isNegative()) - continue; - uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; + ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0)); + // The intrinsic supports having a -1 argument for variable sized objects + // so we should check for that here. + if (InvariantSize->isNegative()) + continue; + uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; // Confirm the invariant.start location size contains the load operand size // in bits. Also, the invariant.start should dominate the load, and we // should not hoist the load out of a loop that contains this dominating // invariant.start. - if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && + if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && DT->properlyDominates(II->getParent(), CurLoop->getHeader())) return true; } @@ -1131,9 +1131,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *Flags, OptimizationRemarkEmitter *ORE) { - assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && - "Either AliasSetTracker or MemorySSA should be initialized."); - + assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && + "Either AliasSetTracker or MemorySSA should be initialized."); + // If we don't understand the instruction, bail early. if (!isHoistableAndSinkableInst(I)) return false; @@ -1167,7 +1167,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags); + MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1188,13 +1188,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, if (CI->mayThrow()) return false; - // Convergent attribute has been used on operations that involve - // inter-thread communication which results are implicitly affected by the - // enclosing control flows. It is not safe to hoist or sink such operations - // across control flow. - if (CI->isConvergent()) - return false; - + // Convergent attribute has been used on operations that involve + // inter-thread communication which results are implicitly affected by the + // enclosing control flows. It is not safe to hoist or sink such operations + // across control flow. + if (CI->isConvergent()) + return false; + using namespace PatternMatch; if (match(CI, m_Intrinsic<Intrinsic::assume>())) // Assumes don't actually alias anything or throw @@ -1219,10 +1219,10 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, bool Invalidated; if (CurAST) Invalidated = pointerInvalidatedByLoop( - MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA); + MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I, + MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I, *Flags); if (Invalidated) return false; @@ -1282,9 +1282,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, } else { // MSSAU if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) return true; - // If there are more accesses than the Promotion cap or no "quota" to - // check clobber, then give up as we're not walking a list that long. - if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls()) + // If there are more accesses than the Promotion cap or no "quota" to + // check clobber, then give up as we're not walking a list that long. + if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls()) return false; // If there are interfering Uses (i.e. their defining access is in the // loop), or ordered loads (stored as Defs!), don't move this store. @@ -1304,7 +1304,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // Uses may point to an access outside the loop, as getClobbering // checks the previous iteration when walking the backedge. // FIXME: More precise: no Uses that alias SI. - if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU)) + if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU)) return false; } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) { if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) { @@ -1324,7 +1324,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, } } auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); - Flags->incrementClobberingCalls(); + Flags->incrementClobberingCalls(); // If there are no clobbering Defs in the loop, store is safe to hoist. return MSSA->isLiveOnEntryDef(Source) || !CurLoop->contains(Source->getBlock()); @@ -1624,9 +1624,9 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, /// position, and may either delete it or move it to outside of the loop. /// static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, - BlockFrequencyInfo *BFI, const Loop *CurLoop, - ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, - OptimizationRemarkEmitter *ORE) { + BlockFrequencyInfo *BFI, const Loop *CurLoop, + ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, + OptimizationRemarkEmitter *ORE) { LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) @@ -1702,10 +1702,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. - // First check if I is worth sinking for all uses. Sink only when it is worth - // across all uses. + // First check if I is worth sinking for all uses. Sink only when it is worth + // across all uses. SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); - SmallVector<PHINode *, 8> ExitPNs; + SmallVector<PHINode *, 8> ExitPNs; for (auto *UI : Users) { auto *User = cast<Instruction>(UI); @@ -1715,15 +1715,15 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, PHINode *PN = cast<PHINode>(User); assert(ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"); - if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) { - return Changed; - } - - ExitPNs.push_back(PN); - } - - for (auto *PN : ExitPNs) { - + if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) { + return Changed; + } + + ExitPNs.push_back(PN); + } + + for (auto *PN : ExitPNs) { + // The PHI must be trivially replaceable. Instruction *New = sinkThroughTriviallyReplaceablePHI( PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); @@ -1741,8 +1741,8 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) { - LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": " - << I << "\n"); + LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": " + << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " << ore::NV("Inst", &I); @@ -1766,7 +1766,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, // Move the new node to the destination block, before its terminator. moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE); - I.updateLocationAfterHoist(); + I.updateLocationAfterHoist(); if (isa<LoadInst>(I)) ++NumMovedLoads; @@ -1812,7 +1812,7 @@ class LoopPromoter : public LoadAndStorePromoter { SmallVectorImpl<Instruction *> &LoopInsertPts; SmallVectorImpl<MemoryAccess *> &MSSAInsertPts; PredIteratorCache &PredCache; - AliasSetTracker *AST; + AliasSetTracker *AST; MemorySSAUpdater *MSSAU; LoopInfo &LI; DebugLoc DL; @@ -1842,7 +1842,7 @@ public: SmallVectorImpl<BasicBlock *> &LEB, SmallVectorImpl<Instruction *> &LIP, SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC, - AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li, + AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl, int alignment, bool UnorderedAtomic, const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), @@ -1899,13 +1899,13 @@ public: void replaceLoadWithValue(LoadInst *LI, Value *V) const override { // Update alias analysis. - if (AST) - AST->copyValue(LI, V); + if (AST) + AST->copyValue(LI, V); } void instructionDeleted(Instruction *I) const override { SafetyInfo.removeInstruction(I); - if (AST) - AST->deleteValue(I); + if (AST) + AST->deleteValue(I); if (MSSAU) MSSAU->removeMemoryAccess(I); } @@ -1951,7 +1951,7 @@ bool llvm::promoteLoopAccessesToScalars( ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && - SafetyInfo != nullptr && + SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars"); Value *SomePtr = *PointerMustAliases.begin(); @@ -2016,7 +2016,7 @@ bool llvm::promoteLoopAccessesToScalars( // we have to prove that the store is dead along the unwind edge. We do // this by proving that the caller can't have a reference to the object // after return and thus can't possibly load from the object. - Value *Object = getUnderlyingObject(SomePtr); + Value *Object = getUnderlyingObject(SomePtr); if (!isKnownNonEscaping(Object, TLI)) return false; // Subtlety: Alloca's aren't visible to callers, but *are* potentially @@ -2148,7 +2148,7 @@ bool llvm::promoteLoopAccessesToScalars( if (IsKnownThreadLocalObject) SafeToInsertStore = true; else { - Value *Object = getUnderlyingObject(SomePtr); + Value *Object = getUnderlyingObject(SomePtr); SafeToInsertStore = (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && !PointerMayBeCaptured(Object, true, true); @@ -2179,7 +2179,7 @@ bool llvm::promoteLoopAccessesToScalars( SmallVector<PHINode *, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL, + InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL, Alignment.value(), SawUnorderedAtomic, AATags, *SafetyInfo); @@ -2294,18 +2294,18 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, return false; } -bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, Instruction &I, - SinkAndHoistLICMFlags &Flags) { +bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, + Loop *CurLoop, Instruction &I, + SinkAndHoistLICMFlags &Flags) { // For hoisting, use the walker to determine safety - if (!Flags.getIsSink()) { + if (!Flags.getIsSink()) { MemoryAccess *Source; // See declaration of SetLicmMssaOptCap for usage details. - if (Flags.tooManyClobberingCalls()) + if (Flags.tooManyClobberingCalls()) Source = MU->getDefiningAccess(); else { Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); - Flags.incrementClobberingCalls(); + Flags.incrementClobberingCalls(); } return !MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()); @@ -2328,28 +2328,28 @@ bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, // FIXME: Increase precision: Safe to sink if Use post dominates the Def; // needs PostDominatorTreeAnalysis. // FIXME: More precise: no Defs that alias this Use. - if (Flags.tooManyMemoryAccesses()) + if (Flags.tooManyMemoryAccesses()) return true; for (auto *BB : CurLoop->getBlocks()) - if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU)) - return true; - // When sinking, the source block may not be part of the loop so check it. - if (!CurLoop->contains(&I)) - return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU); - - return false; -} - -bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, - MemoryUse &MU) { - if (const auto *Accesses = MSSA.getBlockDefs(&BB)) - for (const auto &MA : *Accesses) - if (const auto *MD = dyn_cast<MemoryDef>(&MA)) - if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU)) - return true; + if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU)) + return true; + // When sinking, the source block may not be part of the loop so check it. + if (!CurLoop->contains(&I)) + return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU); + return false; } +bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU) { + if (const auto *Accesses = MSSA.getBlockDefs(&BB)) + for (const auto &MA : *Accesses) + if (const auto *MD = dyn_cast<MemoryDef>(&MA)) + if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU)) + return true; + return false; +} + /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 45cdcb2f37..1b6d3484bf 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -271,7 +271,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { bool MadeChange = false; // Only prefetch in the inner-most loop - if (!L->isInnermost()) + if (!L->isInnermost()) return MadeChange; SmallPtrSet<const Value *, 32> EphValues; diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp index 1266c93316..3f896ef191 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp @@ -26,7 +26,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" - + using namespace llvm; #define DEBUG_TYPE "loop-delete" @@ -39,14 +39,14 @@ enum class LoopDeletionResult { Deleted, }; -static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) { - if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted) - return LoopDeletionResult::Deleted; - if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified) - return LoopDeletionResult::Modified; - return LoopDeletionResult::Unmodified; -} - +static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) { + if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted) + return LoopDeletionResult::Deleted; + if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified) + return LoopDeletionResult::Modified; + return LoopDeletionResult::Unmodified; +} + /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -62,28 +62,28 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, // of the loop. bool AllEntriesInvariant = true; bool AllOutgoingValuesSame = true; - if (!L->hasNoExitBlocks()) { - for (PHINode &P : ExitBlock->phis()) { - Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]); - - // Make sure all exiting blocks produce the same incoming value for the - // block. If there are different incoming values for different exiting - // blocks, then it is impossible to statically determine which value - // should be used. - AllOutgoingValuesSame = - all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) { - return incoming == P.getIncomingValueForBlock(BB); - }); - - if (!AllOutgoingValuesSame) - break; - - if (Instruction *I = dyn_cast<Instruction>(incoming)) - if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) { - AllEntriesInvariant = false; - break; - } - } + if (!L->hasNoExitBlocks()) { + for (PHINode &P : ExitBlock->phis()) { + Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]); + + // Make sure all exiting blocks produce the same incoming value for the + // block. If there are different incoming values for different exiting + // blocks, then it is impossible to statically determine which value + // should be used. + AllOutgoingValuesSame = + all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) { + return incoming == P.getIncomingValueForBlock(BB); + }); + + if (!AllOutgoingValuesSame) + break; + + if (Instruction *I = dyn_cast<Instruction>(incoming)) + if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) { + AllEntriesInvariant = false; + break; + } + } } if (Changed) @@ -96,9 +96,9 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, // This includes instructions that could write to memory, and loads that are // marked volatile. for (auto &I : L->blocks()) - if (any_of(*I, [](Instruction &I) { - return I.mayHaveSideEffects() && !I.isDroppable(); - })) + if (any_of(*I, [](Instruction &I) { + return I.mayHaveSideEffects() && !I.isDroppable(); + })) return false; return true; } @@ -135,33 +135,33 @@ static bool isLoopNeverExecuted(Loop *L) { return true; } -/// If we can prove the backedge is untaken, remove it. This destroys the -/// loop, but leaves the (now trivially loop invariant) control flow and -/// side effects (if any) in place. -static LoopDeletionResult -breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI, MemorySSA *MSSA, - OptimizationRemarkEmitter &ORE) { - assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); - - if (!L->getLoopLatch()) - return LoopDeletionResult::Unmodified; - - auto *BTC = SE.getBackedgeTakenCount(L); - if (!BTC->isZero()) - return LoopDeletionResult::Unmodified; - - breakLoopBackedge(L, DT, SE, LI, MSSA); - return LoopDeletionResult::Deleted; -} - +/// If we can prove the backedge is untaken, remove it. This destroys the +/// loop, but leaves the (now trivially loop invariant) control flow and +/// side effects (if any) in place. +static LoopDeletionResult +breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA, + OptimizationRemarkEmitter &ORE) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + + if (!L->getLoopLatch()) + return LoopDeletionResult::Unmodified; + + auto *BTC = SE.getBackedgeTakenCount(L); + if (!BTC->isZero()) + return LoopDeletionResult::Unmodified; + + breakLoopBackedge(L, DT, SE, LI, MSSA); + return LoopDeletionResult::Deleted; +} + /// Remove a loop if it is dead. /// -/// A loop is considered dead either if it does not impact the observable -/// behavior of the program other than finite running time, or if it is -/// required to make progress by an attribute such as 'mustprogress' or -/// 'llvm.loop.mustprogress' and does not make any. This may remove -/// infinite loops that have been required to make progress. +/// A loop is considered dead either if it does not impact the observable +/// behavior of the program other than finite running time, or if it is +/// required to make progress by an attribute such as 'mustprogress' or +/// 'llvm.loop.mustprogress' and does not make any. This may remove +/// infinite loops that have been required to make progress. /// /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in /// order to make various safety checks work. @@ -190,10 +190,10 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, if (ExitBlock && isLoopNeverExecuted(L)) { LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!"); - // We need to forget the loop before setting the incoming values of the exit - // phis to undef, so we properly invalidate the SCEV expressions for those - // phis. - SE.forgetLoop(L); + // We need to forget the loop before setting the incoming values of the exit + // phis to undef, so we properly invalidate the SCEV expressions for those + // phis. + SE.forgetLoop(L); // Set incoming value to undef for phi nodes in the exit block. for (PHINode &P : ExitBlock->phis()) { std::fill(P.incoming_values().begin(), P.incoming_values().end(), @@ -214,12 +214,12 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, SmallVector<BasicBlock *, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - // We require that the loop has at most one exit block. Otherwise, we'd be in - // the situation of needing to be able to solve statically which exit block - // will be branched to, or trying to preserve the branching logic in a loop - // invariant manner. - if (!ExitBlock && !L->hasNoExitBlocks()) { - LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n"); + // We require that the loop has at most one exit block. Otherwise, we'd be in + // the situation of needing to be able to solve statically which exit block + // will be branched to, or trying to preserve the branching logic in a loop + // invariant manner. + if (!ExitBlock && !L->hasNoExitBlocks()) { + LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n"); return LoopDeletionResult::Unmodified; } // Finally, we have to check that the loop really is dead. @@ -230,13 +230,13 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, : LoopDeletionResult::Unmodified; } - // Don't remove loops for which we can't solve the trip count unless the loop - // was required to make progress but has been determined to be dead. + // Don't remove loops for which we can't solve the trip count unless the loop + // was required to make progress but has been determined to be dead. const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(S) && - !L->getHeader()->getParent()->mustProgress() && !hasMustProgress(L)) { - LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was " - "not required to make progress.\n"); + if (isa<SCEVCouldNotCompute>(S) && + !L->getHeader()->getParent()->mustProgress() && !hasMustProgress(L)) { + LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was " + "not required to make progress.\n"); return Changed ? LoopDeletionResult::Modified : LoopDeletionResult::Unmodified; } @@ -265,14 +265,14 @@ PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE); - - // If we can prove the backedge isn't taken, just break it and be done. This - // leaves the loop structure in place which means it can handle dispatching - // to the right exit based on whatever loop invariant structure remains. - if (Result != LoopDeletionResult::Deleted) - Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI, - AR.MSSA, ORE)); - + + // If we can prove the backedge isn't taken, just break it and be done. This + // leaves the loop structure in place which means it can handle dispatching + // to the right exit based on whatever loop invariant structure remains. + if (Result != LoopDeletionResult::Deleted) + Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI, + AR.MSSA, ORE)); + if (Result == LoopDeletionResult::Unmodified) return PreservedAnalyses::all(); @@ -332,12 +332,12 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE); - // If we can prove the backedge isn't taken, just break it and be done. This - // leaves the loop structure in place which means it can handle dispatching - // to the right exit based on whatever loop invariant structure remains. - if (Result != LoopDeletionResult::Deleted) - Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE)); - + // If we can prove the backedge isn't taken, just break it and be done. This + // leaves the loop structure in place which means it can handle dispatching + // to the right exit based on whatever loop invariant structure remains. + if (Result != LoopDeletionResult::Deleted) + Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE)); + if (Result == LoopDeletionResult::Deleted) LPM.markLoopAsDeleted(*L); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp index 1bd2529891..0d467540e3 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp @@ -663,20 +663,20 @@ public: /// Try to distribute an inner-most loop. bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) { - assert(L->isInnermost() && "Only process inner loops."); + assert(L->isInnermost() && "Only process inner loops."); LLVM_DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName() << "\" checking " << *L << "\n"); - // Having a single exit block implies there's also one exiting block. + // Having a single exit block implies there's also one exiting block. if (!L->getExitBlock()) return fail("MultipleExitBlocks", "multiple exit blocks"); if (!L->isLoopSimplifyForm()) return fail("NotLoopSimplifyForm", "loop is not in loop-simplify form"); - if (!L->isRotatedForm()) - return fail("NotBottomTested", "loop is not bottom tested"); + if (!L->isRotatedForm()) + return fail("NotBottomTested", "loop is not bottom tested"); BasicBlock *PH = L->getLoopPreheader(); @@ -815,7 +815,7 @@ public: LLVM_DEBUG(dbgs() << "\nPointers:\n"); LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks)); - LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE); + LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE); LVer.versionLoop(DefsUsedOutside); LVer.annotateLoopWithNoAlias(); @@ -981,7 +981,7 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, for (Loop *TopLevelLoop : *LI) for (Loop *L : depth_first(TopLevelLoop)) // We only handle inner-most loops. - if (L->isInnermost()) + if (L->isInnermost()) Worklist.push_back(L); // Now walk the identified inner loops. @@ -1057,8 +1057,8 @@ PreservedAnalyses LoopDistributePass::run(Function &F, auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); std::function<const LoopAccessInfo &(Loop &)> GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, - TLI, TTI, nullptr, nullptr}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, + TLI, TTI, nullptr, nullptr}; return LAM.getResult<LoopAccessAnalysis>(L, AR); }; diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp index aaff68436c..f7639dd02e 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp @@ -1,728 +1,728 @@ -//===- LoopFlatten.cpp - Loop flattening pass------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass flattens pairs nested loops into a single loop. -// -// The intention is to optimise loop nests like this, which together access an -// array linearly: -// for (int i = 0; i < N; ++i) -// for (int j = 0; j < M; ++j) -// f(A[i*M+j]); -// into one loop: -// for (int i = 0; i < (N*M); ++i) -// f(A[i]); -// -// It can also flatten loops where the induction variables are not used in the -// loop. This is only worth doing if the induction variables are only used in an -// expression like i*M+j. If they had any other uses, we would have to insert a -// div/mod to reconstruct the original values, so this wouldn't be profitable. -// -// We also need to prove that N*M will not overflow. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar/LoopFlatten.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Verifier.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" -#include "llvm/Transforms/Utils/SimplifyIndVar.h" - -#define DEBUG_TYPE "loop-flatten" - -using namespace llvm; -using namespace llvm::PatternMatch; - -static cl::opt<unsigned> RepeatedInstructionThreshold( - "loop-flatten-cost-threshold", cl::Hidden, cl::init(2), - cl::desc("Limit on the cost of instructions that can be repeated due to " - "loop flattening")); - -static cl::opt<bool> - AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, - cl::init(false), - cl::desc("Assume that the product of the two iteration " - "limits will never overflow")); - -static cl::opt<bool> - WidenIV("loop-flatten-widen-iv", cl::Hidden, - cl::init(true), - cl::desc("Widen the loop induction variables, if possible, so " - "overflow checks won't reject flattening")); - -struct FlattenInfo { - Loop *OuterLoop = nullptr; - Loop *InnerLoop = nullptr; - PHINode *InnerInductionPHI = nullptr; - PHINode *OuterInductionPHI = nullptr; - Value *InnerLimit = nullptr; - Value *OuterLimit = nullptr; - BinaryOperator *InnerIncrement = nullptr; - BinaryOperator *OuterIncrement = nullptr; - BranchInst *InnerBranch = nullptr; - BranchInst *OuterBranch = nullptr; - SmallPtrSet<Value *, 4> LinearIVUses; - SmallPtrSet<PHINode *, 4> InnerPHIsToTransform; - - // Whether this holds the flatten info before or after widening. - bool Widened = false; - - FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {}; -}; - -// Finds the induction variable, increment and limit for a simple loop that we -// can flatten. -static bool findLoopComponents( - Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions, - PHINode *&InductionPHI, Value *&Limit, BinaryOperator *&Increment, - BranchInst *&BackBranch, ScalarEvolution *SE) { - LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n"); - - if (!L->isLoopSimplifyForm()) { - LLVM_DEBUG(dbgs() << "Loop is not in normal form\n"); - return false; - } - - // There must be exactly one exiting block, and it must be the same at the - // latch. - BasicBlock *Latch = L->getLoopLatch(); - if (L->getExitingBlock() != Latch) { - LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n"); - return false; - } - // Latch block must end in a conditional branch. - BackBranch = dyn_cast<BranchInst>(Latch->getTerminator()); - if (!BackBranch || !BackBranch->isConditional()) { - LLVM_DEBUG(dbgs() << "Could not find back-branch\n"); - return false; - } - IterationInstructions.insert(BackBranch); - LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump()); - bool ContinueOnTrue = L->contains(BackBranch->getSuccessor(0)); - - // Find the induction PHI. If there is no induction PHI, we can't do the - // transformation. TODO: could other variables trigger this? Do we have to - // search for the best one? - InductionPHI = nullptr; - for (PHINode &PHI : L->getHeader()->phis()) { - InductionDescriptor ID; - if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) { - InductionPHI = &PHI; - LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump()); - break; - } - } - if (!InductionPHI) { - LLVM_DEBUG(dbgs() << "Could not find induction PHI\n"); - return false; - } - - auto IsValidPredicate = [&](ICmpInst::Predicate Pred) { - if (ContinueOnTrue) - return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT; - else - return Pred == CmpInst::ICMP_EQ; - }; - - // Find Compare and make sure it is valid - ICmpInst *Compare = dyn_cast<ICmpInst>(BackBranch->getCondition()); - if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) || - Compare->hasNUsesOrMore(2)) { - LLVM_DEBUG(dbgs() << "Could not find valid comparison\n"); - return false; - } - IterationInstructions.insert(Compare); - LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump()); - - // Find increment and limit from the compare - Increment = nullptr; - if (match(Compare->getOperand(0), - m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { - Increment = dyn_cast<BinaryOperator>(Compare->getOperand(0)); - Limit = Compare->getOperand(1); - } else if (Compare->getUnsignedPredicate() == CmpInst::ICMP_NE && - match(Compare->getOperand(1), - m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { - Increment = dyn_cast<BinaryOperator>(Compare->getOperand(1)); - Limit = Compare->getOperand(0); - } - if (!Increment || Increment->hasNUsesOrMore(3)) { - LLVM_DEBUG(dbgs() << "Cound not find valid increment\n"); - return false; - } - IterationInstructions.insert(Increment); - LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump()); - LLVM_DEBUG(dbgs() << "Found limit: "; Limit->dump()); - - assert(InductionPHI->getNumIncomingValues() == 2); - assert(InductionPHI->getIncomingValueForBlock(Latch) == Increment && - "PHI value is not increment inst"); - - auto *CI = dyn_cast<ConstantInt>( - InductionPHI->getIncomingValueForBlock(L->getLoopPreheader())); - if (!CI || !CI->isZero()) { - LLVM_DEBUG(dbgs() << "PHI value is not zero: "; CI->dump()); - return false; - } - - LLVM_DEBUG(dbgs() << "Successfully found all loop components\n"); - return true; -} - -static bool checkPHIs(struct FlattenInfo &FI, - const TargetTransformInfo *TTI) { - // All PHIs in the inner and outer headers must either be: - // - The induction PHI, which we are going to rewrite as one induction in - // the new loop. This is already checked by findLoopComponents. - // - An outer header PHI with all incoming values from outside the loop. - // LoopSimplify guarantees we have a pre-header, so we don't need to - // worry about that here. - // - Pairs of PHIs in the inner and outer headers, which implement a - // loop-carried dependency that will still be valid in the new loop. To - // be valid, this variable must be modified only in the inner loop. - - // The set of PHI nodes in the outer loop header that we know will still be - // valid after the transformation. These will not need to be modified (with - // the exception of the induction variable), but we do need to check that - // there are no unsafe PHI nodes. - SmallPtrSet<PHINode *, 4> SafeOuterPHIs; - SafeOuterPHIs.insert(FI.OuterInductionPHI); - - // Check that all PHI nodes in the inner loop header match one of the valid - // patterns. - for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) { - // The induction PHIs break these rules, and that's OK because we treat - // them specially when doing the transformation. - if (&InnerPHI == FI.InnerInductionPHI) - continue; - - // Each inner loop PHI node must have two incoming values/blocks - one - // from the pre-header, and one from the latch. - assert(InnerPHI.getNumIncomingValues() == 2); - Value *PreHeaderValue = - InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader()); - Value *LatchValue = - InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch()); - - // The incoming value from the outer loop must be the PHI node in the - // outer loop header, with no modifications made in the top of the outer - // loop. - PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue); - if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) { - LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n"); - return false; - } - - // The other incoming value must come from the inner loop, without any - // modifications in the tail end of the outer loop. We are in LCSSA form, - // so this will actually be a PHI in the inner loop's exit block, which - // only uses values from inside the inner loop. - PHINode *LCSSAPHI = dyn_cast<PHINode>( - OuterPHI->getIncomingValueForBlock(FI.OuterLoop->getLoopLatch())); - if (!LCSSAPHI) { - LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n"); - return false; - } - - // The value used by the LCSSA PHI must be the same one that the inner - // loop's PHI uses. - if (LCSSAPHI->hasConstantValue() != LatchValue) { - LLVM_DEBUG( - dbgs() << "LCSSA PHI incoming value does not match latch value\n"); - return false; - } - - LLVM_DEBUG(dbgs() << "PHI pair is safe:\n"); - LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump()); - LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump()); - SafeOuterPHIs.insert(OuterPHI); - FI.InnerPHIsToTransform.insert(&InnerPHI); - } - - for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) { - if (!SafeOuterPHIs.count(&OuterPHI)) { - LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump()); - return false; - } - } - - LLVM_DEBUG(dbgs() << "checkPHIs: OK\n"); - return true; -} - -static bool -checkOuterLoopInsts(struct FlattenInfo &FI, - SmallPtrSetImpl<Instruction *> &IterationInstructions, - const TargetTransformInfo *TTI) { - // Check for instructions in the outer but not inner loop. If any of these - // have side-effects then this transformation is not legal, and if there is - // a significant amount of code here which can't be optimised out that it's - // not profitable (as these instructions would get executed for each - // iteration of the inner loop). - unsigned RepeatedInstrCost = 0; - for (auto *B : FI.OuterLoop->getBlocks()) { - if (FI.InnerLoop->contains(B)) - continue; - - for (auto &I : *B) { - if (!isa<PHINode>(&I) && !I.isTerminator() && - !isSafeToSpeculativelyExecute(&I)) { - LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have " - "side effects: "; - I.dump()); - return false; - } - // The execution count of the outer loop's iteration instructions - // (increment, compare and branch) will be increased, but the - // equivalent instructions will be removed from the inner loop, so - // they make a net difference of zero. - if (IterationInstructions.count(&I)) - continue; - // The uncoditional branch to the inner loop's header will turn into - // a fall-through, so adds no cost. - BranchInst *Br = dyn_cast<BranchInst>(&I); - if (Br && Br->isUnconditional() && - Br->getSuccessor(0) == FI.InnerLoop->getHeader()) - continue; - // Multiplies of the outer iteration variable and inner iteration - // count will be optimised out. - if (match(&I, m_c_Mul(m_Specific(FI.OuterInductionPHI), - m_Specific(FI.InnerLimit)))) - continue; - int Cost = TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); - LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump()); - RepeatedInstrCost += Cost; - } - } - - LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: " - << RepeatedInstrCost << "\n"); - // Bail out if flattening the loops would cause instructions in the outer - // loop but not in the inner loop to be executed extra times. - if (RepeatedInstrCost > RepeatedInstructionThreshold) { - LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n"); - return false; - } - - LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n"); - return true; -} - -static bool checkIVUsers(struct FlattenInfo &FI) { - // We require all uses of both induction variables to match this pattern: - // - // (OuterPHI * InnerLimit) + InnerPHI - // - // Any uses of the induction variables not matching that pattern would - // require a div/mod to reconstruct in the flattened loop, so the - // transformation wouldn't be profitable. - - Value *InnerLimit = FI.InnerLimit; - if (FI.Widened && - (isa<SExtInst>(InnerLimit) || isa<ZExtInst>(InnerLimit))) - InnerLimit = cast<Instruction>(InnerLimit)->getOperand(0); - - // Check that all uses of the inner loop's induction variable match the - // expected pattern, recording the uses of the outer IV. - SmallPtrSet<Value *, 4> ValidOuterPHIUses; - for (User *U : FI.InnerInductionPHI->users()) { - if (U == FI.InnerIncrement) - continue; - - // After widening the IVs, a trunc instruction might have been introduced, so - // look through truncs. - if (isa<TruncInst>(U)) { - if (!U->hasOneUse()) - return false; - U = *U->user_begin(); - } - - LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump()); - - Value *MatchedMul; - Value *MatchedItCount; - bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI), - m_Value(MatchedMul))) && - match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI), - m_Value(MatchedItCount))); - - // Matches the same pattern as above, except it also looks for truncs - // on the phi, which can be the result of widening the induction variables. - bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)), - m_Value(MatchedMul))) && - match(MatchedMul, - m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)), - m_Value(MatchedItCount))); - - if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) { - LLVM_DEBUG(dbgs() << "Use is optimisable\n"); - ValidOuterPHIUses.insert(MatchedMul); - FI.LinearIVUses.insert(U); - } else { - LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n"); - return false; - } - } - - // Check that there are no uses of the outer IV other than the ones found - // as part of the pattern above. - for (User *U : FI.OuterInductionPHI->users()) { - if (U == FI.OuterIncrement) - continue; - - auto IsValidOuterPHIUses = [&] (User *U) -> bool { - LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump()); - if (!ValidOuterPHIUses.count(U)) { - LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n"); - return false; - } - LLVM_DEBUG(dbgs() << "Use is optimisable\n"); - return true; - }; - - if (auto *V = dyn_cast<TruncInst>(U)) { - for (auto *K : V->users()) { - if (!IsValidOuterPHIUses(K)) - return false; - } - continue; - } - - if (!IsValidOuterPHIUses(U)) - return false; - } - - LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n"; - dbgs() << "Found " << FI.LinearIVUses.size() - << " value(s) that can be replaced:\n"; - for (Value *V : FI.LinearIVUses) { - dbgs() << " "; - V->dump(); - }); - return true; -} - -// Return an OverflowResult dependant on if overflow of the multiplication of -// InnerLimit and OuterLimit can be assumed not to happen. -static OverflowResult checkOverflow(struct FlattenInfo &FI, - DominatorTree *DT, AssumptionCache *AC) { - Function *F = FI.OuterLoop->getHeader()->getParent(); - const DataLayout &DL = F->getParent()->getDataLayout(); - - // For debugging/testing. - if (AssumeNoOverflow) - return OverflowResult::NeverOverflows; - - // Check if the multiply could not overflow due to known ranges of the - // input values. - OverflowResult OR = computeOverflowForUnsignedMul( - FI.InnerLimit, FI.OuterLimit, DL, AC, - FI.OuterLoop->getLoopPreheader()->getTerminator(), DT); - if (OR != OverflowResult::MayOverflow) - return OR; - - for (Value *V : FI.LinearIVUses) { - for (Value *U : V->users()) { - if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { - // The IV is used as the operand of a GEP, and the IV is at least as - // wide as the address space of the GEP. In this case, the GEP would - // wrap around the address space before the IV increment wraps, which - // would be UB. - if (GEP->isInBounds() && - V->getType()->getIntegerBitWidth() >= - DL.getPointerTypeSizeInBits(GEP->getType())) { - LLVM_DEBUG( - dbgs() << "use of linear IV would be UB if overflow occurred: "; - GEP->dump()); - return OverflowResult::NeverOverflows; - } - } - } - } - - return OverflowResult::MayOverflow; -} - -static bool CanFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, - LoopInfo *LI, ScalarEvolution *SE, - AssumptionCache *AC, const TargetTransformInfo *TTI) { - SmallPtrSet<Instruction *, 8> IterationInstructions; - if (!findLoopComponents(FI.InnerLoop, IterationInstructions, FI.InnerInductionPHI, - FI.InnerLimit, FI.InnerIncrement, FI.InnerBranch, SE)) - return false; - if (!findLoopComponents(FI.OuterLoop, IterationInstructions, FI.OuterInductionPHI, - FI.OuterLimit, FI.OuterIncrement, FI.OuterBranch, SE)) - return false; - - // Both of the loop limit values must be invariant in the outer loop - // (non-instructions are all inherently invariant). - if (!FI.OuterLoop->isLoopInvariant(FI.InnerLimit)) { - LLVM_DEBUG(dbgs() << "inner loop limit not invariant\n"); - return false; - } - if (!FI.OuterLoop->isLoopInvariant(FI.OuterLimit)) { - LLVM_DEBUG(dbgs() << "outer loop limit not invariant\n"); - return false; - } - - if (!checkPHIs(FI, TTI)) - return false; - - // FIXME: it should be possible to handle different types correctly. - if (FI.InnerInductionPHI->getType() != FI.OuterInductionPHI->getType()) - return false; - - if (!checkOuterLoopInsts(FI, IterationInstructions, TTI)) - return false; - - // Find the values in the loop that can be replaced with the linearized - // induction variable, and check that there are no other uses of the inner - // or outer induction variable. If there were, we could still do this - // transformation, but we'd have to insert a div/mod to calculate the - // original IVs, so it wouldn't be profitable. - if (!checkIVUsers(FI)) - return false; - - LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n"); - return true; -} - -static bool DoFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, - LoopInfo *LI, ScalarEvolution *SE, - AssumptionCache *AC, - const TargetTransformInfo *TTI) { - Function *F = FI.OuterLoop->getHeader()->getParent(); - LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n"); - { - using namespace ore; - OptimizationRemark Remark(DEBUG_TYPE, "Flattened", FI.InnerLoop->getStartLoc(), - FI.InnerLoop->getHeader()); - OptimizationRemarkEmitter ORE(F); - Remark << "Flattened into outer loop"; - ORE.emit(Remark); - } - - Value *NewTripCount = - BinaryOperator::CreateMul(FI.InnerLimit, FI.OuterLimit, "flatten.tripcount", - FI.OuterLoop->getLoopPreheader()->getTerminator()); - LLVM_DEBUG(dbgs() << "Created new trip count in preheader: "; - NewTripCount->dump()); - - // Fix up PHI nodes that take values from the inner loop back-edge, which - // we are about to remove. - FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); - - // The old Phi will be optimised away later, but for now we can't leave - // leave it in an invalid state, so are updating them too. - for (PHINode *PHI : FI.InnerPHIsToTransform) - PHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); - - // Modify the trip count of the outer loop to be the product of the two - // trip counts. - cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount); - - // Replace the inner loop backedge with an unconditional branch to the exit. - BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock(); - BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock(); - InnerExitingBlock->getTerminator()->eraseFromParent(); - BranchInst::Create(InnerExitBlock, InnerExitingBlock); - DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader()); - - // Replace all uses of the polynomial calculated from the two induction - // variables with the one new one. - IRBuilder<> Builder(FI.OuterInductionPHI->getParent()->getTerminator()); - for (Value *V : FI.LinearIVUses) { - Value *OuterValue = FI.OuterInductionPHI; - if (FI.Widened) - OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(), - "flatten.trunciv"); - - LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); - dbgs() << "with: "; OuterValue->dump()); - V->replaceAllUsesWith(OuterValue); - } - - // Tell LoopInfo, SCEV and the pass manager that the inner loop has been - // deleted, and any information that have about the outer loop invalidated. - SE->forgetLoop(FI.OuterLoop); - SE->forgetLoop(FI.InnerLoop); - LI->erase(FI.InnerLoop); - return true; -} - -static bool CanWidenIV(struct FlattenInfo &FI, DominatorTree *DT, - LoopInfo *LI, ScalarEvolution *SE, - AssumptionCache *AC, const TargetTransformInfo *TTI) { - if (!WidenIV) { - LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n"); - return false; - } - - LLVM_DEBUG(dbgs() << "Try widening the IVs\n"); - Module *M = FI.InnerLoop->getHeader()->getParent()->getParent(); - auto &DL = M->getDataLayout(); - auto *InnerType = FI.InnerInductionPHI->getType(); - auto *OuterType = FI.OuterInductionPHI->getType(); - unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits(); - auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext()); - - // If both induction types are less than the maximum legal integer width, - // promote both to the widest type available so we know calculating - // (OuterLimit * InnerLimit) as the new trip count is safe. - if (InnerType != OuterType || - InnerType->getScalarSizeInBits() >= MaxLegalSize || - MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) { - LLVM_DEBUG(dbgs() << "Can't widen the IV\n"); - return false; - } - - SCEVExpander Rewriter(*SE, DL, "loopflatten"); - SmallVector<WideIVInfo, 2> WideIVs; - SmallVector<WeakTrackingVH, 4> DeadInsts; - WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false }); - WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false }); - unsigned ElimExt; - unsigned Widened; - - for (unsigned i = 0; i < WideIVs.size(); i++) { - PHINode *WidePhi = createWideIV(WideIVs[i], LI, SE, Rewriter, DT, DeadInsts, - ElimExt, Widened, true /* HasGuards */, - true /* UsePostIncrementRanges */); - if (!WidePhi) - return false; - LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump()); - LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIVs[i].NarrowIV->dump()); - RecursivelyDeleteDeadPHINode(WideIVs[i].NarrowIV); - } - // After widening, rediscover all the loop components. - assert(Widened && "Widenend IV expected"); - FI.Widened = true; - return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI); -} - -static bool FlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, - LoopInfo *LI, ScalarEvolution *SE, - AssumptionCache *AC, - const TargetTransformInfo *TTI) { - LLVM_DEBUG( - dbgs() << "Loop flattening running on outer loop " - << FI.OuterLoop->getHeader()->getName() << " and inner loop " - << FI.InnerLoop->getHeader()->getName() << " in " - << FI.OuterLoop->getHeader()->getParent()->getName() << "\n"); - - if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI)) - return false; - - // Check if we can widen the induction variables to avoid overflow checks. - if (CanWidenIV(FI, DT, LI, SE, AC, TTI)) - return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI); - - // Check if the new iteration variable might overflow. In this case, we - // need to version the loop, and select the original version at runtime if - // the iteration space is too large. - // TODO: We currently don't version the loop. - OverflowResult OR = checkOverflow(FI, DT, AC); - if (OR == OverflowResult::AlwaysOverflowsHigh || - OR == OverflowResult::AlwaysOverflowsLow) { - LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n"); - return false; - } else if (OR == OverflowResult::MayOverflow) { - LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n"); - return false; - } - - LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n"); - return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI); -} - -bool Flatten(DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, - AssumptionCache *AC, TargetTransformInfo *TTI) { - bool Changed = false; - for (auto *InnerLoop : LI->getLoopsInPreorder()) { - auto *OuterLoop = InnerLoop->getParentLoop(); - if (!OuterLoop) - continue; - struct FlattenInfo FI(OuterLoop, InnerLoop); - Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI); - } - return Changed; -} - -PreservedAnalyses LoopFlattenPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto *LI = &AM.getResult<LoopAnalysis>(F); - auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); - auto *AC = &AM.getResult<AssumptionAnalysis>(F); - auto *TTI = &AM.getResult<TargetIRAnalysis>(F); - - if (!Flatten(DT, LI, SE, AC, TTI)) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; -} - -namespace { -class LoopFlattenLegacyPass : public FunctionPass { -public: - static char ID; // Pass ID, replacement for typeid - LoopFlattenLegacyPass() : FunctionPass(ID) { - initializeLoopFlattenLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - // Possibly flatten loop L into its child. - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - getLoopAnalysisUsage(AU); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addPreserved<TargetTransformInfoWrapperPass>(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addPreserved<AssumptionCacheTracker>(); - } -}; -} // namespace - -char LoopFlattenLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", - false, false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", - false, false) - -FunctionPass *llvm::createLoopFlattenPass() { return new LoopFlattenLegacyPass(); } - -bool LoopFlattenLegacyPass::runOnFunction(Function &F) { - ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>(); - auto *TTI = &TTIP.getTTI(F); - auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - return Flatten(DT, LI, SE, AC, TTI); -} +//===- LoopFlatten.cpp - Loop flattening pass------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass flattens pairs nested loops into a single loop. +// +// The intention is to optimise loop nests like this, which together access an +// array linearly: +// for (int i = 0; i < N; ++i) +// for (int j = 0; j < M; ++j) +// f(A[i*M+j]); +// into one loop: +// for (int i = 0; i < (N*M); ++i) +// f(A[i]); +// +// It can also flatten loops where the induction variables are not used in the +// loop. This is only worth doing if the induction variables are only used in an +// expression like i*M+j. If they had any other uses, we would have to insert a +// div/mod to reconstruct the original values, so this wouldn't be profitable. +// +// We also need to prove that N*M will not overflow. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LoopFlatten.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" + +#define DEBUG_TYPE "loop-flatten" + +using namespace llvm; +using namespace llvm::PatternMatch; + +static cl::opt<unsigned> RepeatedInstructionThreshold( + "loop-flatten-cost-threshold", cl::Hidden, cl::init(2), + cl::desc("Limit on the cost of instructions that can be repeated due to " + "loop flattening")); + +static cl::opt<bool> + AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, + cl::init(false), + cl::desc("Assume that the product of the two iteration " + "limits will never overflow")); + +static cl::opt<bool> + WidenIV("loop-flatten-widen-iv", cl::Hidden, + cl::init(true), + cl::desc("Widen the loop induction variables, if possible, so " + "overflow checks won't reject flattening")); + +struct FlattenInfo { + Loop *OuterLoop = nullptr; + Loop *InnerLoop = nullptr; + PHINode *InnerInductionPHI = nullptr; + PHINode *OuterInductionPHI = nullptr; + Value *InnerLimit = nullptr; + Value *OuterLimit = nullptr; + BinaryOperator *InnerIncrement = nullptr; + BinaryOperator *OuterIncrement = nullptr; + BranchInst *InnerBranch = nullptr; + BranchInst *OuterBranch = nullptr; + SmallPtrSet<Value *, 4> LinearIVUses; + SmallPtrSet<PHINode *, 4> InnerPHIsToTransform; + + // Whether this holds the flatten info before or after widening. + bool Widened = false; + + FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {}; +}; + +// Finds the induction variable, increment and limit for a simple loop that we +// can flatten. +static bool findLoopComponents( + Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions, + PHINode *&InductionPHI, Value *&Limit, BinaryOperator *&Increment, + BranchInst *&BackBranch, ScalarEvolution *SE) { + LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n"); + + if (!L->isLoopSimplifyForm()) { + LLVM_DEBUG(dbgs() << "Loop is not in normal form\n"); + return false; + } + + // There must be exactly one exiting block, and it must be the same at the + // latch. + BasicBlock *Latch = L->getLoopLatch(); + if (L->getExitingBlock() != Latch) { + LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n"); + return false; + } + // Latch block must end in a conditional branch. + BackBranch = dyn_cast<BranchInst>(Latch->getTerminator()); + if (!BackBranch || !BackBranch->isConditional()) { + LLVM_DEBUG(dbgs() << "Could not find back-branch\n"); + return false; + } + IterationInstructions.insert(BackBranch); + LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump()); + bool ContinueOnTrue = L->contains(BackBranch->getSuccessor(0)); + + // Find the induction PHI. If there is no induction PHI, we can't do the + // transformation. TODO: could other variables trigger this? Do we have to + // search for the best one? + InductionPHI = nullptr; + for (PHINode &PHI : L->getHeader()->phis()) { + InductionDescriptor ID; + if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) { + InductionPHI = &PHI; + LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump()); + break; + } + } + if (!InductionPHI) { + LLVM_DEBUG(dbgs() << "Could not find induction PHI\n"); + return false; + } + + auto IsValidPredicate = [&](ICmpInst::Predicate Pred) { + if (ContinueOnTrue) + return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT; + else + return Pred == CmpInst::ICMP_EQ; + }; + + // Find Compare and make sure it is valid + ICmpInst *Compare = dyn_cast<ICmpInst>(BackBranch->getCondition()); + if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) || + Compare->hasNUsesOrMore(2)) { + LLVM_DEBUG(dbgs() << "Could not find valid comparison\n"); + return false; + } + IterationInstructions.insert(Compare); + LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump()); + + // Find increment and limit from the compare + Increment = nullptr; + if (match(Compare->getOperand(0), + m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { + Increment = dyn_cast<BinaryOperator>(Compare->getOperand(0)); + Limit = Compare->getOperand(1); + } else if (Compare->getUnsignedPredicate() == CmpInst::ICMP_NE && + match(Compare->getOperand(1), + m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { + Increment = dyn_cast<BinaryOperator>(Compare->getOperand(1)); + Limit = Compare->getOperand(0); + } + if (!Increment || Increment->hasNUsesOrMore(3)) { + LLVM_DEBUG(dbgs() << "Cound not find valid increment\n"); + return false; + } + IterationInstructions.insert(Increment); + LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump()); + LLVM_DEBUG(dbgs() << "Found limit: "; Limit->dump()); + + assert(InductionPHI->getNumIncomingValues() == 2); + assert(InductionPHI->getIncomingValueForBlock(Latch) == Increment && + "PHI value is not increment inst"); + + auto *CI = dyn_cast<ConstantInt>( + InductionPHI->getIncomingValueForBlock(L->getLoopPreheader())); + if (!CI || !CI->isZero()) { + LLVM_DEBUG(dbgs() << "PHI value is not zero: "; CI->dump()); + return false; + } + + LLVM_DEBUG(dbgs() << "Successfully found all loop components\n"); + return true; +} + +static bool checkPHIs(struct FlattenInfo &FI, + const TargetTransformInfo *TTI) { + // All PHIs in the inner and outer headers must either be: + // - The induction PHI, which we are going to rewrite as one induction in + // the new loop. This is already checked by findLoopComponents. + // - An outer header PHI with all incoming values from outside the loop. + // LoopSimplify guarantees we have a pre-header, so we don't need to + // worry about that here. + // - Pairs of PHIs in the inner and outer headers, which implement a + // loop-carried dependency that will still be valid in the new loop. To + // be valid, this variable must be modified only in the inner loop. + + // The set of PHI nodes in the outer loop header that we know will still be + // valid after the transformation. These will not need to be modified (with + // the exception of the induction variable), but we do need to check that + // there are no unsafe PHI nodes. + SmallPtrSet<PHINode *, 4> SafeOuterPHIs; + SafeOuterPHIs.insert(FI.OuterInductionPHI); + + // Check that all PHI nodes in the inner loop header match one of the valid + // patterns. + for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) { + // The induction PHIs break these rules, and that's OK because we treat + // them specially when doing the transformation. + if (&InnerPHI == FI.InnerInductionPHI) + continue; + + // Each inner loop PHI node must have two incoming values/blocks - one + // from the pre-header, and one from the latch. + assert(InnerPHI.getNumIncomingValues() == 2); + Value *PreHeaderValue = + InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader()); + Value *LatchValue = + InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch()); + + // The incoming value from the outer loop must be the PHI node in the + // outer loop header, with no modifications made in the top of the outer + // loop. + PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue); + if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) { + LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n"); + return false; + } + + // The other incoming value must come from the inner loop, without any + // modifications in the tail end of the outer loop. We are in LCSSA form, + // so this will actually be a PHI in the inner loop's exit block, which + // only uses values from inside the inner loop. + PHINode *LCSSAPHI = dyn_cast<PHINode>( + OuterPHI->getIncomingValueForBlock(FI.OuterLoop->getLoopLatch())); + if (!LCSSAPHI) { + LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n"); + return false; + } + + // The value used by the LCSSA PHI must be the same one that the inner + // loop's PHI uses. + if (LCSSAPHI->hasConstantValue() != LatchValue) { + LLVM_DEBUG( + dbgs() << "LCSSA PHI incoming value does not match latch value\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "PHI pair is safe:\n"); + LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump()); + LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump()); + SafeOuterPHIs.insert(OuterPHI); + FI.InnerPHIsToTransform.insert(&InnerPHI); + } + + for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) { + if (!SafeOuterPHIs.count(&OuterPHI)) { + LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump()); + return false; + } + } + + LLVM_DEBUG(dbgs() << "checkPHIs: OK\n"); + return true; +} + +static bool +checkOuterLoopInsts(struct FlattenInfo &FI, + SmallPtrSetImpl<Instruction *> &IterationInstructions, + const TargetTransformInfo *TTI) { + // Check for instructions in the outer but not inner loop. If any of these + // have side-effects then this transformation is not legal, and if there is + // a significant amount of code here which can't be optimised out that it's + // not profitable (as these instructions would get executed for each + // iteration of the inner loop). + unsigned RepeatedInstrCost = 0; + for (auto *B : FI.OuterLoop->getBlocks()) { + if (FI.InnerLoop->contains(B)) + continue; + + for (auto &I : *B) { + if (!isa<PHINode>(&I) && !I.isTerminator() && + !isSafeToSpeculativelyExecute(&I)) { + LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have " + "side effects: "; + I.dump()); + return false; + } + // The execution count of the outer loop's iteration instructions + // (increment, compare and branch) will be increased, but the + // equivalent instructions will be removed from the inner loop, so + // they make a net difference of zero. + if (IterationInstructions.count(&I)) + continue; + // The uncoditional branch to the inner loop's header will turn into + // a fall-through, so adds no cost. + BranchInst *Br = dyn_cast<BranchInst>(&I); + if (Br && Br->isUnconditional() && + Br->getSuccessor(0) == FI.InnerLoop->getHeader()) + continue; + // Multiplies of the outer iteration variable and inner iteration + // count will be optimised out. + if (match(&I, m_c_Mul(m_Specific(FI.OuterInductionPHI), + m_Specific(FI.InnerLimit)))) + continue; + int Cost = TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump()); + RepeatedInstrCost += Cost; + } + } + + LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: " + << RepeatedInstrCost << "\n"); + // Bail out if flattening the loops would cause instructions in the outer + // loop but not in the inner loop to be executed extra times. + if (RepeatedInstrCost > RepeatedInstructionThreshold) { + LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n"); + return true; +} + +static bool checkIVUsers(struct FlattenInfo &FI) { + // We require all uses of both induction variables to match this pattern: + // + // (OuterPHI * InnerLimit) + InnerPHI + // + // Any uses of the induction variables not matching that pattern would + // require a div/mod to reconstruct in the flattened loop, so the + // transformation wouldn't be profitable. + + Value *InnerLimit = FI.InnerLimit; + if (FI.Widened && + (isa<SExtInst>(InnerLimit) || isa<ZExtInst>(InnerLimit))) + InnerLimit = cast<Instruction>(InnerLimit)->getOperand(0); + + // Check that all uses of the inner loop's induction variable match the + // expected pattern, recording the uses of the outer IV. + SmallPtrSet<Value *, 4> ValidOuterPHIUses; + for (User *U : FI.InnerInductionPHI->users()) { + if (U == FI.InnerIncrement) + continue; + + // After widening the IVs, a trunc instruction might have been introduced, so + // look through truncs. + if (isa<TruncInst>(U)) { + if (!U->hasOneUse()) + return false; + U = *U->user_begin(); + } + + LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump()); + + Value *MatchedMul; + Value *MatchedItCount; + bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI), + m_Value(MatchedMul))) && + match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI), + m_Value(MatchedItCount))); + + // Matches the same pattern as above, except it also looks for truncs + // on the phi, which can be the result of widening the induction variables. + bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)), + m_Value(MatchedMul))) && + match(MatchedMul, + m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)), + m_Value(MatchedItCount))); + + if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) { + LLVM_DEBUG(dbgs() << "Use is optimisable\n"); + ValidOuterPHIUses.insert(MatchedMul); + FI.LinearIVUses.insert(U); + } else { + LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n"); + return false; + } + } + + // Check that there are no uses of the outer IV other than the ones found + // as part of the pattern above. + for (User *U : FI.OuterInductionPHI->users()) { + if (U == FI.OuterIncrement) + continue; + + auto IsValidOuterPHIUses = [&] (User *U) -> bool { + LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump()); + if (!ValidOuterPHIUses.count(U)) { + LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Use is optimisable\n"); + return true; + }; + + if (auto *V = dyn_cast<TruncInst>(U)) { + for (auto *K : V->users()) { + if (!IsValidOuterPHIUses(K)) + return false; + } + continue; + } + + if (!IsValidOuterPHIUses(U)) + return false; + } + + LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n"; + dbgs() << "Found " << FI.LinearIVUses.size() + << " value(s) that can be replaced:\n"; + for (Value *V : FI.LinearIVUses) { + dbgs() << " "; + V->dump(); + }); + return true; +} + +// Return an OverflowResult dependant on if overflow of the multiplication of +// InnerLimit and OuterLimit can be assumed not to happen. +static OverflowResult checkOverflow(struct FlattenInfo &FI, + DominatorTree *DT, AssumptionCache *AC) { + Function *F = FI.OuterLoop->getHeader()->getParent(); + const DataLayout &DL = F->getParent()->getDataLayout(); + + // For debugging/testing. + if (AssumeNoOverflow) + return OverflowResult::NeverOverflows; + + // Check if the multiply could not overflow due to known ranges of the + // input values. + OverflowResult OR = computeOverflowForUnsignedMul( + FI.InnerLimit, FI.OuterLimit, DL, AC, + FI.OuterLoop->getLoopPreheader()->getTerminator(), DT); + if (OR != OverflowResult::MayOverflow) + return OR; + + for (Value *V : FI.LinearIVUses) { + for (Value *U : V->users()) { + if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { + // The IV is used as the operand of a GEP, and the IV is at least as + // wide as the address space of the GEP. In this case, the GEP would + // wrap around the address space before the IV increment wraps, which + // would be UB. + if (GEP->isInBounds() && + V->getType()->getIntegerBitWidth() >= + DL.getPointerTypeSizeInBits(GEP->getType())) { + LLVM_DEBUG( + dbgs() << "use of linear IV would be UB if overflow occurred: "; + GEP->dump()); + return OverflowResult::NeverOverflows; + } + } + } + } + + return OverflowResult::MayOverflow; +} + +static bool CanFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, + LoopInfo *LI, ScalarEvolution *SE, + AssumptionCache *AC, const TargetTransformInfo *TTI) { + SmallPtrSet<Instruction *, 8> IterationInstructions; + if (!findLoopComponents(FI.InnerLoop, IterationInstructions, FI.InnerInductionPHI, + FI.InnerLimit, FI.InnerIncrement, FI.InnerBranch, SE)) + return false; + if (!findLoopComponents(FI.OuterLoop, IterationInstructions, FI.OuterInductionPHI, + FI.OuterLimit, FI.OuterIncrement, FI.OuterBranch, SE)) + return false; + + // Both of the loop limit values must be invariant in the outer loop + // (non-instructions are all inherently invariant). + if (!FI.OuterLoop->isLoopInvariant(FI.InnerLimit)) { + LLVM_DEBUG(dbgs() << "inner loop limit not invariant\n"); + return false; + } + if (!FI.OuterLoop->isLoopInvariant(FI.OuterLimit)) { + LLVM_DEBUG(dbgs() << "outer loop limit not invariant\n"); + return false; + } + + if (!checkPHIs(FI, TTI)) + return false; + + // FIXME: it should be possible to handle different types correctly. + if (FI.InnerInductionPHI->getType() != FI.OuterInductionPHI->getType()) + return false; + + if (!checkOuterLoopInsts(FI, IterationInstructions, TTI)) + return false; + + // Find the values in the loop that can be replaced with the linearized + // induction variable, and check that there are no other uses of the inner + // or outer induction variable. If there were, we could still do this + // transformation, but we'd have to insert a div/mod to calculate the + // original IVs, so it wouldn't be profitable. + if (!checkIVUsers(FI)) + return false; + + LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n"); + return true; +} + +static bool DoFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, + LoopInfo *LI, ScalarEvolution *SE, + AssumptionCache *AC, + const TargetTransformInfo *TTI) { + Function *F = FI.OuterLoop->getHeader()->getParent(); + LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n"); + { + using namespace ore; + OptimizationRemark Remark(DEBUG_TYPE, "Flattened", FI.InnerLoop->getStartLoc(), + FI.InnerLoop->getHeader()); + OptimizationRemarkEmitter ORE(F); + Remark << "Flattened into outer loop"; + ORE.emit(Remark); + } + + Value *NewTripCount = + BinaryOperator::CreateMul(FI.InnerLimit, FI.OuterLimit, "flatten.tripcount", + FI.OuterLoop->getLoopPreheader()->getTerminator()); + LLVM_DEBUG(dbgs() << "Created new trip count in preheader: "; + NewTripCount->dump()); + + // Fix up PHI nodes that take values from the inner loop back-edge, which + // we are about to remove. + FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); + + // The old Phi will be optimised away later, but for now we can't leave + // leave it in an invalid state, so are updating them too. + for (PHINode *PHI : FI.InnerPHIsToTransform) + PHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); + + // Modify the trip count of the outer loop to be the product of the two + // trip counts. + cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount); + + // Replace the inner loop backedge with an unconditional branch to the exit. + BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock(); + BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock(); + InnerExitingBlock->getTerminator()->eraseFromParent(); + BranchInst::Create(InnerExitBlock, InnerExitingBlock); + DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader()); + + // Replace all uses of the polynomial calculated from the two induction + // variables with the one new one. + IRBuilder<> Builder(FI.OuterInductionPHI->getParent()->getTerminator()); + for (Value *V : FI.LinearIVUses) { + Value *OuterValue = FI.OuterInductionPHI; + if (FI.Widened) + OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(), + "flatten.trunciv"); + + LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); + dbgs() << "with: "; OuterValue->dump()); + V->replaceAllUsesWith(OuterValue); + } + + // Tell LoopInfo, SCEV and the pass manager that the inner loop has been + // deleted, and any information that have about the outer loop invalidated. + SE->forgetLoop(FI.OuterLoop); + SE->forgetLoop(FI.InnerLoop); + LI->erase(FI.InnerLoop); + return true; +} + +static bool CanWidenIV(struct FlattenInfo &FI, DominatorTree *DT, + LoopInfo *LI, ScalarEvolution *SE, + AssumptionCache *AC, const TargetTransformInfo *TTI) { + if (!WidenIV) { + LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "Try widening the IVs\n"); + Module *M = FI.InnerLoop->getHeader()->getParent()->getParent(); + auto &DL = M->getDataLayout(); + auto *InnerType = FI.InnerInductionPHI->getType(); + auto *OuterType = FI.OuterInductionPHI->getType(); + unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits(); + auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext()); + + // If both induction types are less than the maximum legal integer width, + // promote both to the widest type available so we know calculating + // (OuterLimit * InnerLimit) as the new trip count is safe. + if (InnerType != OuterType || + InnerType->getScalarSizeInBits() >= MaxLegalSize || + MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) { + LLVM_DEBUG(dbgs() << "Can't widen the IV\n"); + return false; + } + + SCEVExpander Rewriter(*SE, DL, "loopflatten"); + SmallVector<WideIVInfo, 2> WideIVs; + SmallVector<WeakTrackingVH, 4> DeadInsts; + WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false }); + WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false }); + unsigned ElimExt; + unsigned Widened; + + for (unsigned i = 0; i < WideIVs.size(); i++) { + PHINode *WidePhi = createWideIV(WideIVs[i], LI, SE, Rewriter, DT, DeadInsts, + ElimExt, Widened, true /* HasGuards */, + true /* UsePostIncrementRanges */); + if (!WidePhi) + return false; + LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump()); + LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIVs[i].NarrowIV->dump()); + RecursivelyDeleteDeadPHINode(WideIVs[i].NarrowIV); + } + // After widening, rediscover all the loop components. + assert(Widened && "Widenend IV expected"); + FI.Widened = true; + return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI); +} + +static bool FlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, + LoopInfo *LI, ScalarEvolution *SE, + AssumptionCache *AC, + const TargetTransformInfo *TTI) { + LLVM_DEBUG( + dbgs() << "Loop flattening running on outer loop " + << FI.OuterLoop->getHeader()->getName() << " and inner loop " + << FI.InnerLoop->getHeader()->getName() << " in " + << FI.OuterLoop->getHeader()->getParent()->getName() << "\n"); + + if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI)) + return false; + + // Check if we can widen the induction variables to avoid overflow checks. + if (CanWidenIV(FI, DT, LI, SE, AC, TTI)) + return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI); + + // Check if the new iteration variable might overflow. In this case, we + // need to version the loop, and select the original version at runtime if + // the iteration space is too large. + // TODO: We currently don't version the loop. + OverflowResult OR = checkOverflow(FI, DT, AC); + if (OR == OverflowResult::AlwaysOverflowsHigh || + OR == OverflowResult::AlwaysOverflowsLow) { + LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n"); + return false; + } else if (OR == OverflowResult::MayOverflow) { + LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n"); + return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI); +} + +bool Flatten(DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, + AssumptionCache *AC, TargetTransformInfo *TTI) { + bool Changed = false; + for (auto *InnerLoop : LI->getLoopsInPreorder()) { + auto *OuterLoop = InnerLoop->getParentLoop(); + if (!OuterLoop) + continue; + struct FlattenInfo FI(OuterLoop, InnerLoop); + Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI); + } + return Changed; +} + +PreservedAnalyses LoopFlattenPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto *LI = &AM.getResult<LoopAnalysis>(F); + auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); + auto *AC = &AM.getResult<AssumptionAnalysis>(F); + auto *TTI = &AM.getResult<TargetIRAnalysis>(F); + + if (!Flatten(DT, LI, SE, AC, TTI)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +namespace { +class LoopFlattenLegacyPass : public FunctionPass { +public: + static char ID; // Pass ID, replacement for typeid + LoopFlattenLegacyPass() : FunctionPass(ID) { + initializeLoopFlattenLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + // Possibly flatten loop L into its child. + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + getLoopAnalysisUsage(AU); + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addPreserved<TargetTransformInfoWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addPreserved<AssumptionCacheTracker>(); + } +}; +} // namespace + +char LoopFlattenLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", + false, false) + +FunctionPass *llvm::createLoopFlattenPass() { return new LoopFlattenLegacyPass(); } + +bool LoopFlattenLegacyPass::runOnFunction(Function &F) { + ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>(); + auto *TTI = &TTIP.getTTI(F); + auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + return Flatten(DT, LI, SE, AC, TTI); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp index b5f8dfa9aa..8131b7060a 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp @@ -46,7 +46,7 @@ #include "llvm/Transforms/Scalar/LoopFuse.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" @@ -54,7 +54,7 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" @@ -66,7 +66,7 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeMoverUtils.h" -#include "llvm/Transforms/Utils/LoopPeel.h" +#include "llvm/Transforms/Utils/LoopPeel.h" using namespace llvm; @@ -117,11 +117,11 @@ static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis( "Use all available analyses")), cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore); -static cl::opt<unsigned> FusionPeelMaxCount( - "loop-fusion-peel-max-count", cl::init(0), cl::Hidden, - cl::desc("Max number of iterations to be peeled from a loop, such that " - "fusion can take place")); - +static cl::opt<unsigned> FusionPeelMaxCount( + "loop-fusion-peel-max-count", cl::init(0), cl::Hidden, + cl::desc("Max number of iterations to be peeled from a loop, such that " + "fusion can take place")); + #ifndef NDEBUG static cl::opt<bool> VerboseFusionDebugging("loop-fusion-verbose-debug", @@ -165,12 +165,12 @@ struct FusionCandidate { bool Valid; /// Guard branch of the loop, if it exists BranchInst *GuardBranch; - /// Peeling Paramaters of the Loop. - TTI::PeelingPreferences PP; - /// Can you Peel this Loop? - bool AbleToPeel; - /// Has this loop been Peeled - bool Peeled; + /// Peeling Paramaters of the Loop. + TTI::PeelingPreferences PP; + /// Can you Peel this Loop? + bool AbleToPeel; + /// Has this loop been Peeled + bool Peeled; /// Dominator and PostDominator trees are needed for the /// FusionCandidateCompare function, required by FusionCandidateSet to @@ -182,13 +182,13 @@ struct FusionCandidate { OptimizationRemarkEmitter &ORE; FusionCandidate(Loop *L, const DominatorTree *DT, - const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE, - TTI::PeelingPreferences PP) + const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE, + TTI::PeelingPreferences PP) : Preheader(L->getLoopPreheader()), Header(L->getHeader()), ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()), Latch(L->getLoopLatch()), L(L), Valid(true), - GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)), - Peeled(false), DT(DT), PDT(PDT), ORE(ORE) { + GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)), + Peeled(false), DT(DT), PDT(PDT), ORE(ORE) { // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect @@ -259,17 +259,17 @@ struct FusionCandidate { return Preheader; } - /// After Peeling the loop is modified quite a bit, hence all of the Blocks - /// need to be updated accordingly. - void updateAfterPeeling() { - Preheader = L->getLoopPreheader(); - Header = L->getHeader(); - ExitingBlock = L->getExitingBlock(); - ExitBlock = L->getExitBlock(); - Latch = L->getLoopLatch(); - verify(); - } - + /// After Peeling the loop is modified quite a bit, hence all of the Blocks + /// need to be updated accordingly. + void updateAfterPeeling() { + Preheader = L->getLoopPreheader(); + Header = L->getHeader(); + ExitingBlock = L->getExitingBlock(); + ExitBlock = L->getExitBlock(); + Latch = L->getLoopLatch(); + verify(); + } + /// Given a guarded loop, get the successor of the guard that is not in the /// loop. /// @@ -281,8 +281,8 @@ struct FusionCandidate { assert(GuardBranch && "Only valid on guarded loops."); assert(GuardBranch->isConditional() && "Expecting guard to be a conditional branch."); - if (Peeled) - return GuardBranch->getSuccessor(1); + if (Peeled) + return GuardBranch->getSuccessor(1); return (GuardBranch->getSuccessor(0) == Preheader) ? GuardBranch->getSuccessor(1) : GuardBranch->getSuccessor(0); @@ -544,17 +544,17 @@ private: ScalarEvolution &SE; PostDominatorTree &PDT; OptimizationRemarkEmitter &ORE; - AssumptionCache &AC; - - const TargetTransformInfo &TTI; + AssumptionCache &AC; + const TargetTransformInfo &TTI; + public: LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI, ScalarEvolution &SE, PostDominatorTree &PDT, - OptimizationRemarkEmitter &ORE, const DataLayout &DL, - AssumptionCache &AC, const TargetTransformInfo &TTI) + OptimizationRemarkEmitter &ORE, const DataLayout &DL, + AssumptionCache &AC, const TargetTransformInfo &TTI) : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI), - DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {} + DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {} /// This is the main entry point for loop fusion. It will traverse the /// specified function and collect candidate loops to fuse, starting at the @@ -639,9 +639,9 @@ private: /// Flow Equivalent sets, sorted by dominance. void collectFusionCandidates(const LoopVector &LV) { for (Loop *L : LV) { - TTI::PeelingPreferences PP = - gatherPeelingPreferences(L, SE, TTI, None, None); - FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP); + TTI::PeelingPreferences PP = + gatherPeelingPreferences(L, SE, TTI, None, None); + FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP); if (!CurrCand.isEligibleForFusion(SE)) continue; @@ -691,135 +691,135 @@ private: /// Determine if two fusion candidates have the same trip count (i.e., they /// execute the same number of iterations). /// - /// This function will return a pair of values. The first is a boolean, - /// stating whether or not the two candidates are known at compile time to - /// have the same TripCount. The second is the difference in the two - /// TripCounts. This information can be used later to determine whether or not - /// peeling can be performed on either one of the candiates. - std::pair<bool, Optional<unsigned>> - haveIdenticalTripCounts(const FusionCandidate &FC0, - const FusionCandidate &FC1) const { - + /// This function will return a pair of values. The first is a boolean, + /// stating whether or not the two candidates are known at compile time to + /// have the same TripCount. The second is the difference in the two + /// TripCounts. This information can be used later to determine whether or not + /// peeling can be performed on either one of the candiates. + std::pair<bool, Optional<unsigned>> + haveIdenticalTripCounts(const FusionCandidate &FC0, + const FusionCandidate &FC1) const { + const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L); if (isa<SCEVCouldNotCompute>(TripCount0)) { UncomputableTripCount++; LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!"); - return {false, None}; + return {false, None}; } const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L); if (isa<SCEVCouldNotCompute>(TripCount1)) { UncomputableTripCount++; LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!"); - return {false, None}; + return {false, None}; } - + LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & " << *TripCount1 << " are " << (TripCount0 == TripCount1 ? "identical" : "different") << "\n"); - if (TripCount0 == TripCount1) - return {true, 0}; - - LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, " - "determining the difference between trip counts\n"); - - // Currently only considering loops with a single exit point - // and a non-constant trip count. - const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L); - const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L); - - // If any of the tripcounts are zero that means that loop(s) do not have - // a single exit or a constant tripcount. - if (TC0 == 0 || TC1 == 0) { - LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not " - "have a constant number of iterations. Peeling " - "is not benefical\n"); - return {false, None}; - } - - Optional<unsigned> Difference = None; - int Diff = TC0 - TC1; - - if (Diff > 0) - Difference = Diff; - else { - LLVM_DEBUG( - dbgs() << "Difference is less than 0. FC1 (second loop) has more " - "iterations than the first one. Currently not supported\n"); - } - - LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference - << "\n"); - - return {false, Difference}; - } - - void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1, - unsigned PeelCount) { - assert(FC0.AbleToPeel && "Should be able to peel loop"); - - LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount - << " iterations of the first loop. \n"); - - FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true); - if (FC0.Peeled) { - LLVM_DEBUG(dbgs() << "Done Peeling\n"); - -#ifndef NDEBUG - auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1); - - assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 && - "Loops should have identical trip counts after peeling"); -#endif - - FC0.PP.PeelCount += PeelCount; - - // Peeling does not update the PDT - PDT.recalculate(*FC0.Preheader->getParent()); - - FC0.updateAfterPeeling(); - - // In this case the iterations of the loop are constant, so the first - // loop will execute completely (will not jump from one of - // the peeled blocks to the second loop). Here we are updating the - // branch conditions of each of the peeled blocks, such that it will - // branch to its successor which is not the preheader of the second loop - // in the case of unguarded loops, or the succesors of the exit block of - // the first loop otherwise. Doing this update will ensure that the entry - // block of the first loop dominates the entry block of the second loop. - BasicBlock *BB = - FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader; - if (BB) { - SmallVector<DominatorTree::UpdateType, 8> TreeUpdates; - SmallVector<Instruction *, 8> WorkList; - for (BasicBlock *Pred : predecessors(BB)) { - if (Pred != FC0.ExitBlock) { - WorkList.emplace_back(Pred->getTerminator()); - TreeUpdates.emplace_back( - DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB)); - } - } - // Cannot modify the predecessors inside the above loop as it will cause - // the iterators to be nullptrs, causing memory errors. - for (Instruction *CurrentBranch: WorkList) { - BasicBlock *Succ = CurrentBranch->getSuccessor(0); - if (Succ == BB) - Succ = CurrentBranch->getSuccessor(1); - ReplaceInstWithInst(CurrentBranch, BranchInst::Create(Succ)); - } - - DTU.applyUpdates(TreeUpdates); - DTU.flush(); - } - LLVM_DEBUG( - dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount - << " iterations from the first loop.\n" - "Both Loops have the same number of iterations now.\n"); - } + if (TripCount0 == TripCount1) + return {true, 0}; + + LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, " + "determining the difference between trip counts\n"); + + // Currently only considering loops with a single exit point + // and a non-constant trip count. + const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L); + const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L); + + // If any of the tripcounts are zero that means that loop(s) do not have + // a single exit or a constant tripcount. + if (TC0 == 0 || TC1 == 0) { + LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not " + "have a constant number of iterations. Peeling " + "is not benefical\n"); + return {false, None}; + } + + Optional<unsigned> Difference = None; + int Diff = TC0 - TC1; + + if (Diff > 0) + Difference = Diff; + else { + LLVM_DEBUG( + dbgs() << "Difference is less than 0. FC1 (second loop) has more " + "iterations than the first one. Currently not supported\n"); + } + + LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference + << "\n"); + + return {false, Difference}; } + void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1, + unsigned PeelCount) { + assert(FC0.AbleToPeel && "Should be able to peel loop"); + + LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount + << " iterations of the first loop. \n"); + + FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true); + if (FC0.Peeled) { + LLVM_DEBUG(dbgs() << "Done Peeling\n"); + +#ifndef NDEBUG + auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1); + + assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 && + "Loops should have identical trip counts after peeling"); +#endif + + FC0.PP.PeelCount += PeelCount; + + // Peeling does not update the PDT + PDT.recalculate(*FC0.Preheader->getParent()); + + FC0.updateAfterPeeling(); + + // In this case the iterations of the loop are constant, so the first + // loop will execute completely (will not jump from one of + // the peeled blocks to the second loop). Here we are updating the + // branch conditions of each of the peeled blocks, such that it will + // branch to its successor which is not the preheader of the second loop + // in the case of unguarded loops, or the succesors of the exit block of + // the first loop otherwise. Doing this update will ensure that the entry + // block of the first loop dominates the entry block of the second loop. + BasicBlock *BB = + FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader; + if (BB) { + SmallVector<DominatorTree::UpdateType, 8> TreeUpdates; + SmallVector<Instruction *, 8> WorkList; + for (BasicBlock *Pred : predecessors(BB)) { + if (Pred != FC0.ExitBlock) { + WorkList.emplace_back(Pred->getTerminator()); + TreeUpdates.emplace_back( + DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB)); + } + } + // Cannot modify the predecessors inside the above loop as it will cause + // the iterators to be nullptrs, causing memory errors. + for (Instruction *CurrentBranch: WorkList) { + BasicBlock *Succ = CurrentBranch->getSuccessor(0); + if (Succ == BB) + Succ = CurrentBranch->getSuccessor(1); + ReplaceInstWithInst(CurrentBranch, BranchInst::Create(Succ)); + } + + DTU.applyUpdates(TreeUpdates); + DTU.flush(); + } + LLVM_DEBUG( + dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount + << " iterations from the first loop.\n" + "Both Loops have the same number of iterations now.\n"); + } + } + /// Walk each set of control flow equivalent fusion candidates and attempt to /// fuse them. This does a single linear traversal of all candidates in the /// set. The conditions for legal fusion are checked at this point. If a pair @@ -851,32 +851,32 @@ private: FC0->verify(); FC1->verify(); - // Check if the candidates have identical tripcounts (first value of - // pair), and if not check the difference in the tripcounts between - // the loops (second value of pair). The difference is not equal to - // None iff the loops iterate a constant number of times, and have a - // single exit. - std::pair<bool, Optional<unsigned>> IdenticalTripCountRes = - haveIdenticalTripCounts(*FC0, *FC1); - bool SameTripCount = IdenticalTripCountRes.first; - Optional<unsigned> TCDifference = IdenticalTripCountRes.second; - - // Here we are checking that FC0 (the first loop) can be peeled, and - // both loops have different tripcounts. - if (FC0->AbleToPeel && !SameTripCount && TCDifference) { - if (*TCDifference > FusionPeelMaxCount) { - LLVM_DEBUG(dbgs() - << "Difference in loop trip counts: " << *TCDifference - << " is greater than maximum peel count specificed: " - << FusionPeelMaxCount << "\n"); - } else { - // Dependent on peeling being performed on the first loop, and - // assuming all other conditions for fusion return true. - SameTripCount = true; - } - } - - if (!SameTripCount) { + // Check if the candidates have identical tripcounts (first value of + // pair), and if not check the difference in the tripcounts between + // the loops (second value of pair). The difference is not equal to + // None iff the loops iterate a constant number of times, and have a + // single exit. + std::pair<bool, Optional<unsigned>> IdenticalTripCountRes = + haveIdenticalTripCounts(*FC0, *FC1); + bool SameTripCount = IdenticalTripCountRes.first; + Optional<unsigned> TCDifference = IdenticalTripCountRes.second; + + // Here we are checking that FC0 (the first loop) can be peeled, and + // both loops have different tripcounts. + if (FC0->AbleToPeel && !SameTripCount && TCDifference) { + if (*TCDifference > FusionPeelMaxCount) { + LLVM_DEBUG(dbgs() + << "Difference in loop trip counts: " << *TCDifference + << " is greater than maximum peel count specificed: " + << FusionPeelMaxCount << "\n"); + } else { + // Dependent on peeling being performed on the first loop, and + // assuming all other conditions for fusion return true. + SameTripCount = true; + } + } + + if (!SameTripCount) { LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip " "counts. Not fusing.\n"); reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, @@ -894,7 +894,7 @@ private: // Ensure that FC0 and FC1 have identical guards. // If one (or both) are not guarded, this check is not necessary. if (FC0->GuardBranch && FC1->GuardBranch && - !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) { + !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) { LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical " "guards. Not Fusing.\n"); reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, @@ -963,23 +963,23 @@ private: LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and " << *FC1 << "\n"); - FusionCandidate FC0Copy = *FC0; - // Peel the loop after determining that fusion is legal. The Loops - // will still be safe to fuse after the peeling is performed. - bool Peel = TCDifference && *TCDifference > 0; - if (Peel) - peelFusionCandidate(FC0Copy, *FC1, *TCDifference); - + FusionCandidate FC0Copy = *FC0; + // Peel the loop after determining that fusion is legal. The Loops + // will still be safe to fuse after the peeling is performed. + bool Peel = TCDifference && *TCDifference > 0; + if (Peel) + peelFusionCandidate(FC0Copy, *FC1, *TCDifference); + // Report fusion to the Optimization Remarks. // Note this needs to be done *before* performFusion because // performFusion will change the original loops, making it not // possible to identify them after fusion is complete. - reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1, - FuseCounter); + reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1, + FuseCounter); - FusionCandidate FusedCand( - performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE, - FC0Copy.PP); + FusionCandidate FusedCand( + performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE, + FC0Copy.PP); FusedCand.verify(); assert(FusedCand.isEligibleForFusion(SE) && "Fused candidate should be eligible for fusion!"); @@ -1256,17 +1256,17 @@ private: return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader); } - /// Modify the latch branch of FC to be unconditional since successors of the - /// branch are the same. + /// Modify the latch branch of FC to be unconditional since successors of the + /// branch are the same. void simplifyLatchBranch(const FusionCandidate &FC) const { BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator()); if (FCLatchBranch) { assert(FCLatchBranch->isConditional() && FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) && "Expecting the two successors of FCLatchBranch to be the same"); - BranchInst *NewBranch = - BranchInst::Create(FCLatchBranch->getSuccessor(0)); - ReplaceInstWithInst(FCLatchBranch, NewBranch); + BranchInst *NewBranch = + BranchInst::Create(FCLatchBranch->getSuccessor(0)); + ReplaceInstWithInst(FCLatchBranch, NewBranch); } } @@ -1326,8 +1326,8 @@ private: if (FC0.GuardBranch) return fuseGuardedLoops(FC0, FC1); - assert(FC1.Preheader == - (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock)); + assert(FC1.Preheader == + (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock)); assert(FC1.Preheader->size() == 1 && FC1.Preheader->getSingleSuccessor() == FC1.Header); @@ -1369,30 +1369,30 @@ private: // to FC1.Header? I think this is basically what the three sequences are // trying to accomplish; however, doing this directly in the CFG may mean // the DT/PDT becomes invalid - if (!FC0.Peeled) { - FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader, - FC1.Header); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader)); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); - } else { - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader)); - - // Remove the ExitBlock of the first Loop (also not needed) - FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock, - FC1.Header); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock)); - FC0.ExitBlock->getTerminator()->eraseFromParent(); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); - new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock); - } - + if (!FC0.Peeled) { + FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader, + FC1.Header); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); + } else { + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader)); + + // Remove the ExitBlock of the first Loop (also not needed) + FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock, + FC1.Header); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock)); + FC0.ExitBlock->getTerminator()->eraseFromParent(); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); + new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock); + } + // The pre-header of L1 is not necessary anymore. - assert(pred_empty(FC1.Preheader)); + assert(pred_empty(FC1.Preheader)); FC1.Preheader->getTerminator()->eraseFromParent(); new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader); TreeUpdates.emplace_back(DominatorTree::UpdateType( @@ -1433,7 +1433,7 @@ private: FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); - // Modify the latch branch of FC0 to be unconditional as both successors of + // Modify the latch branch of FC0 to be unconditional as both successors of // the branch are the same. simplifyLatchBranch(FC0); @@ -1455,11 +1455,11 @@ private: LI.removeBlock(FC1.Preheader); DTU.deleteBB(FC1.Preheader); - if (FC0.Peeled) { - LI.removeBlock(FC0.ExitBlock); - DTU.deleteBB(FC0.ExitBlock); - } - + if (FC0.Peeled) { + LI.removeBlock(FC0.ExitBlock); + DTU.deleteBB(FC0.ExitBlock); + } + DTU.flush(); // Is there a way to keep SE up-to-date so we don't need to forget the loops @@ -1474,7 +1474,7 @@ private: mergeLatch(FC0, FC1); // Merge the loops. - SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks()); + SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks()); for (BasicBlock *BB : Blocks) { FC0.L->addBlockEntry(BB); FC1.L->removeBlockFromLoop(BB); @@ -1482,7 +1482,7 @@ private: continue; LI.changeLoopFor(BB, FC0.L); } - while (!FC1.L->isInnermost()) { + while (!FC1.L->isInnermost()) { const auto &ChildLoopIt = FC1.L->begin(); Loop *ChildLoop = *ChildLoopIt; FC1.L->removeChildLoop(ChildLoopIt); @@ -1555,15 +1555,15 @@ private: BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent(); BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock(); BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock(); - BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor(); + BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor(); // Move instructions from the exit block of FC0 to the beginning of the exit - // block of FC1, in the case that the FC0 loop has not been peeled. In the - // case that FC0 loop is peeled, then move the instructions of the successor - // of the FC0 Exit block to the beginning of the exit block of FC1. - moveInstructionsToTheBeginning( - (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock, - DT, PDT, DI); + // block of FC1, in the case that the FC0 loop has not been peeled. In the + // case that FC0 loop is peeled, then move the instructions of the successor + // of the FC0 Exit block to the beginning of the exit block of FC1. + moveInstructionsToTheBeginning( + (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock, + DT, PDT, DI); // Move instructions from the guard block of FC1 to the end of the guard // block of FC0. @@ -1584,9 +1584,9 @@ private: FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock); FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock); - BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock; - BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header); - + BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock; + BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header); + // The guard of FC1 is not necessary anymore. FC1.GuardBranch->eraseFromParent(); new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock); @@ -1600,18 +1600,18 @@ private: TreeUpdates.emplace_back(DominatorTree::UpdateType( DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock)); - if (FC0.Peeled) { - // Remove the Block after the ExitBlock of FC0 - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock)); - FC0ExitBlockSuccessor->getTerminator()->eraseFromParent(); - new UnreachableInst(FC0ExitBlockSuccessor->getContext(), - FC0ExitBlockSuccessor); - } - - assert(pred_empty(FC1GuardBlock) && + if (FC0.Peeled) { + // Remove the Block after the ExitBlock of FC0 + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock)); + FC0ExitBlockSuccessor->getTerminator()->eraseFromParent(); + new UnreachableInst(FC0ExitBlockSuccessor->getContext(), + FC0ExitBlockSuccessor); + } + + assert(pred_empty(FC1GuardBlock) && "Expecting guard block to have no predecessors"); - assert(succ_empty(FC1GuardBlock) && + assert(succ_empty(FC1GuardBlock) && "Expecting guard block to have no successors"); // Remember the phi nodes originally in the header of FC0 in order to rewire @@ -1665,13 +1665,13 @@ private: // TODO: In the future, we can handle non-empty exit blocks my merging any // instructions from FC0 exit block into FC1 exit block prior to removing // the block. - assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty"); + assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty"); FC0.ExitBlock->getTerminator()->eraseFromParent(); new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock); // Remove FC1 Preheader // The pre-header of L1 is not necessary anymore. - assert(pred_empty(FC1.Preheader)); + assert(pred_empty(FC1.Preheader)); FC1.Preheader->getTerminator()->eraseFromParent(); new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader); TreeUpdates.emplace_back(DominatorTree::UpdateType( @@ -1714,7 +1714,7 @@ private: FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); - // Modify the latch branch of FC0 to be unconditional as both successors of + // Modify the latch branch of FC0 to be unconditional as both successors of // the branch are the same. simplifyLatchBranch(FC0); @@ -1734,8 +1734,8 @@ private: // All done // Apply the updates to the Dominator Tree and cleanup. - assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!"); - assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!"); + assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!"); + assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!"); // Update DT/PDT DTU.applyUpdates(TreeUpdates); @@ -1743,10 +1743,10 @@ private: LI.removeBlock(FC1GuardBlock); LI.removeBlock(FC1.Preheader); LI.removeBlock(FC0.ExitBlock); - if (FC0.Peeled) { - LI.removeBlock(FC0ExitBlockSuccessor); - DTU.deleteBB(FC0ExitBlockSuccessor); - } + if (FC0.Peeled) { + LI.removeBlock(FC0ExitBlockSuccessor); + DTU.deleteBB(FC0ExitBlockSuccessor); + } DTU.deleteBB(FC1GuardBlock); DTU.deleteBB(FC1.Preheader); DTU.deleteBB(FC0.ExitBlock); @@ -1764,7 +1764,7 @@ private: mergeLatch(FC0, FC1); // Merge the loops. - SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks()); + SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks()); for (BasicBlock *BB : Blocks) { FC0.L->addBlockEntry(BB); FC1.L->removeBlockFromLoop(BB); @@ -1772,7 +1772,7 @@ private: continue; LI.changeLoopFor(BB, FC0.L); } - while (!FC1.L->isInnermost()) { + while (!FC1.L->isInnermost()) { const auto &ChildLoopIt = FC1.L->begin(); Loop *ChildLoop = *ChildLoopIt; FC1.L->removeChildLoop(ChildLoopIt); @@ -1812,8 +1812,8 @@ struct LoopFuseLegacy : public FunctionPass { AU.addRequired<PostDominatorTreeWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); AU.addRequired<DependenceAnalysisWrapperPass>(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addPreserved<ScalarEvolutionWrapperPass>(); AU.addPreserved<LoopInfoWrapperPass>(); @@ -1830,12 +1830,12 @@ struct LoopFuseLegacy : public FunctionPass { auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - const TargetTransformInfo &TTI = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - const DataLayout &DL = F.getParent()->getDataLayout(); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + const DataLayout &DL = F.getParent()->getDataLayout(); - LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI); + LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI); return LF.fuseLoops(F); } }; @@ -1848,11 +1848,11 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) { auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - auto &AC = AM.getResult<AssumptionAnalysis>(F); - const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); - const DataLayout &DL = F.getParent()->getDataLayout(); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); + const DataLayout &DL = F.getParent()->getDataLayout(); - LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI); + LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI); bool Changed = LF.fuseLoops(F); if (!Changed) return PreservedAnalyses::all(); @@ -1875,8 +1875,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false) FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 8064c02e2b..e60c95b7be 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -47,7 +47,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CmpInstAnalysis.h" +#include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -80,7 +80,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -108,33 +108,33 @@ using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); -STATISTIC( - NumShiftUntilBitTest, - "Number of uncountable loops recognized as 'shift until bitttest' idiom"); - -bool DisableLIRP::All; -static cl::opt<bool, true> - DisableLIRPAll("disable-" DEBUG_TYPE "-all", - cl::desc("Options to disable Loop Idiom Recognize Pass."), - cl::location(DisableLIRP::All), cl::init(false), - cl::ReallyHidden); - -bool DisableLIRP::Memset; -static cl::opt<bool, true> - DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", - cl::desc("Proceed with loop idiom recognize pass, but do " - "not convert loop(s) to memset."), - cl::location(DisableLIRP::Memset), cl::init(false), - cl::ReallyHidden); - -bool DisableLIRP::Memcpy; -static cl::opt<bool, true> - DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", - cl::desc("Proceed with loop idiom recognize pass, but do " - "not convert loop(s) to memcpy."), - cl::location(DisableLIRP::Memcpy), cl::init(false), - cl::ReallyHidden); - +STATISTIC( + NumShiftUntilBitTest, + "Number of uncountable loops recognized as 'shift until bitttest' idiom"); + +bool DisableLIRP::All; +static cl::opt<bool, true> + DisableLIRPAll("disable-" DEBUG_TYPE "-all", + cl::desc("Options to disable Loop Idiom Recognize Pass."), + cl::location(DisableLIRP::All), cl::init(false), + cl::ReallyHidden); + +bool DisableLIRP::Memset; +static cl::opt<bool, true> + DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", + cl::desc("Proceed with loop idiom recognize pass, but do " + "not convert loop(s) to memset."), + cl::location(DisableLIRP::Memset), cl::init(false), + cl::ReallyHidden); + +bool DisableLIRP::Memcpy; +static cl::opt<bool, true> + DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", + cl::desc("Proceed with loop idiom recognize pass, but do " + "not convert loop(s) to memcpy."), + cl::location(DisableLIRP::Memcpy), cl::init(false), + cl::ReallyHidden); + static cl::opt<bool> UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" @@ -232,8 +232,8 @@ private: const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); - bool recognizeShiftUntilBitTest(); - + bool recognizeShiftUntilBitTest(); + /// @} }; @@ -247,9 +247,9 @@ public: } bool runOnLoop(Loop *L, LPPassManager &LPM) override { - if (DisableLIRP::All) - return false; - + if (DisableLIRP::All) + return false; + if (skipLoop(L)) return false; @@ -295,9 +295,9 @@ char LoopIdiomRecognizeLegacyPass::ID = 0; PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { - if (DisableLIRP::All) - return PreservedAnalyses::all(); - + if (DisableLIRP::All) + return PreservedAnalyses::all(); + const auto *DL = &L.getHeader()->getModule()->getDataLayout(); // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis @@ -469,17 +469,17 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); - // Don't convert stores of non-integral pointer types to memsets (which stores - // integers). - if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType())) - return LegalStoreKind::None; - + // Don't convert stores of non-integral pointer types to memsets (which stores + // integers). + if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType())) + return LegalStoreKind::None; + // Reject stores that are so large that they overflow an unsigned. - // When storing out scalable vectors we bail out for now, since the code - // below currently only works for constant strides. - TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); - if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) || - (SizeInBits.getFixedSize() >> 32) != 0) + // When storing out scalable vectors we bail out for now, since the code + // below currently only works for constant strides. + TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); + if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) || + (SizeInBits.getFixedSize() >> 32) != 0) return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current @@ -508,13 +508,13 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // If we're allowed to form a memset, and the stored value would be // acceptable for memset, use it. - if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset && + if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset && // Verify that the stored value is loop invariant. If not, we can't // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // It looks like we can use SplatValue. return LegalStoreKind::Memset; - } else if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset && + } else if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset && // Don't create memset_pattern16s with address spaces. StorePtr->getType()->getPointerAddressSpace() == 0 && (PatternValue = getMemSetPatternValue(StoredVal, DL))) { @@ -523,7 +523,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { } // Otherwise, see if the store can be turned into a memcpy. - if (HasMemcpy && !DisableLIRP::Memcpy) { + if (HasMemcpy && !DisableLIRP::Memcpy) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. APInt Stride = getStoreStride(StoreEv); @@ -578,12 +578,12 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) { break; case LegalStoreKind::Memset: { // Find the base pointer. - Value *Ptr = getUnderlyingObject(SI->getPointerOperand()); + Value *Ptr = getUnderlyingObject(SI->getPointerOperand()); StoreRefsForMemset[Ptr].push_back(SI); } break; case LegalStoreKind::MemsetPattern: { // Find the base pointer. - Value *Ptr = getUnderlyingObject(SI->getPointerOperand()); + Value *Ptr = getUnderlyingObject(SI->getPointerOperand()); StoreRefsForMemsetPattern[Ptr].push_back(SI); } break; case LegalStoreKind::Memcpy: @@ -851,7 +851,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, // Get the location that may be stored across the loop. Since the access is // strided positively through memory, we say that the modified location starts // at the pointer and has infinite size. - LocationSize AccessSize = LocationSize::afterPointer(); + LocationSize AccessSize = LocationSize::afterPointer(); // If the loop iterates a fixed number of times, we can refine the access size // to be exactly the size of the memset, which is (BECount+1)*StoreSize @@ -903,8 +903,8 @@ static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr, // If we're going to need to zero extend the BE count, check if we can add // one to it prior to zero extending without overflow. Provided this is safe, // it allows better simplification of the +1. - if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() < - DL->getTypeSizeInBits(IntPtr).getFixedSize() && + if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() < + DL->getTypeSizeInBits(IntPtr).getFixedSize() && SE->isLoopEntryGuardedByCond( CurLoop, ICmpInst::ICMP_NE, BECount, SE->getNegativeSCEV(SE->getOne(BECount->getType())))) { @@ -947,12 +947,12 @@ bool LoopIdiomRecognize::processLoopStridedStore( BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); - SCEVExpanderCleaner ExpCleaner(Expander, *DT); + SCEVExpanderCleaner ExpCleaner(Expander, *DT); Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); Type *IntIdxTy = DL->getIndexType(DestPtr->getType()); - bool Changed = false; + bool Changed = false; const SCEV *Start = Ev->getStart(); // Handle negative strided loops. if (NegStride) @@ -961,7 +961,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. if (!isSafeToExpand(Start, *SE)) - return Changed; + return Changed; // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this @@ -970,22 +970,22 @@ bool LoopIdiomRecognize::processLoopStridedStore( // base pointer and checking the region. Value *BasePtr = Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator()); - - // From here on out, conservatively report to the pass manager that we've - // changed the IR, even if we later clean up these added instructions. There - // may be structural differences e.g. in the order of use lists not accounted - // for in just a textual dump of the IR. This is written as a variable, even - // though statically all the places this dominates could be replaced with - // 'true', with the hope that anyone trying to be clever / "more precise" with - // the return value will read this comment, and leave them alone. - Changed = true; - + + // From here on out, conservatively report to the pass manager that we've + // changed the IR, even if we later clean up these added instructions. There + // may be structural differences e.g. in the order of use lists not accounted + // for in just a textual dump of the IR. This is written as a variable, even + // though statically all the places this dominates could be replaced with + // 'true', with the hope that anyone trying to be clever / "more precise" with + // the return value will read this comment, and leave them alone. + Changed = true; + if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount, - StoreSize, *AA, Stores)) - return Changed; + StoreSize, *AA, Stores)) + return Changed; if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset)) - return Changed; + return Changed; // Okay, everything looks good, insert the memset. @@ -995,7 +995,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. if (!isSafeToExpand(NumBytesS, *SE)) - return Changed; + return Changed; Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); @@ -1054,7 +1054,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); ++NumMemSet; - ExpCleaner.markResultUsed(); + ExpCleaner.markResultUsed(); return true; } @@ -1088,9 +1088,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); - SCEVExpanderCleaner ExpCleaner(Expander, *DT); + SCEVExpanderCleaner ExpCleaner(Expander, *DT); - bool Changed = false; + bool Changed = false; const SCEV *StrStart = StoreEv->getStart(); unsigned StrAS = SI->getPointerAddressSpace(); Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS)); @@ -1108,20 +1108,20 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *StoreBasePtr = Expander.expandCodeFor( StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); - // From here on out, conservatively report to the pass manager that we've - // changed the IR, even if we later clean up these added instructions. There - // may be structural differences e.g. in the order of use lists not accounted - // for in just a textual dump of the IR. This is written as a variable, even - // though statically all the places this dominates could be replaced with - // 'true', with the hope that anyone trying to be clever / "more precise" with - // the return value will read this comment, and leave them alone. - Changed = true; - + // From here on out, conservatively report to the pass manager that we've + // changed the IR, even if we later clean up these added instructions. There + // may be structural differences e.g. in the order of use lists not accounted + // for in just a textual dump of the IR. This is written as a variable, even + // though statically all the places this dominates could be replaced with + // 'true', with the hope that anyone trying to be clever / "more precise" with + // the return value will read this comment, and leave them alone. + Changed = true; + SmallPtrSet<Instruction *, 1> Stores; Stores.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) - return Changed; + return Changed; const SCEV *LdStart = LoadEv->getStart(); unsigned LdAS = LI->getPointerAddressSpace(); @@ -1137,10 +1137,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, StoreSize, *AA, Stores)) - return Changed; + return Changed; if (avoidLIRForMultiBlockLoop()) - return Changed; + return Changed; // Okay, everything is safe, we can transform this! @@ -1163,14 +1163,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const Align StoreAlign = SI->getAlign(); const Align LoadAlign = LI->getAlign(); if (StoreAlign < StoreSize || LoadAlign < StoreSize) - return Changed; + return Changed; // If the element.atomic memcpy is not lowered into explicit // loads/stores later, then it will be lowered into an element-size // specific lib call. If the lib call doesn't exist for our store size, then // we shouldn't generate the memcpy. if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) - return Changed; + return Changed; // Create the call. // Note that unordered atomic loads/stores are *required* by the spec to @@ -1208,7 +1208,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); ++NumMemCpy; - ExpCleaner.markResultUsed(); + ExpCleaner.markResultUsed(); return true; } @@ -1218,7 +1218,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, bool IsLoopMemset) { if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) { - if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) { + if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) { LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName() << " : LIR " << (IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n"); @@ -1235,8 +1235,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS() || - recognizeShiftUntilBitTest(); + return recognizePopcount() || recognizeAndInsertFFS() || + recognizeShiftUntilBitTest(); } /// Check if the given conditional branch is based on the comparison between @@ -1483,7 +1483,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, return false; // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1 - // or cnt.next = cnt + -1. + // or cnt.next = cnt + -1. // TODO: We can skip the step. If loop trip count is known (CTLZ), // then all uses of "cnt.next" could be optimized to the trip count // plus "cnt0". Currently it is not optimized. @@ -1497,7 +1497,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, continue; ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1)); - if (!Inc || (!Inc->isOne() && !Inc->isMinusOne())) + if (!Inc || (!Inc->isOne() && !Inc->isMinusOne())) continue; PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry); @@ -1728,11 +1728,11 @@ void LoopIdiomRecognize::transformLoopToCountable( Builder.SetCurrentDebugLocation(DL); // Count = BitWidth - CTLZ(InitX); - // NewCount = Count; + // NewCount = Count; // If there are uses of CntPhi create: - // NewCount = BitWidth - CTLZ(InitX >> 1); - // Count = NewCount + 1; - Value *InitXNext; + // NewCount = BitWidth - CTLZ(InitX >> 1); + // Count = NewCount + 1; + Value *InitXNext; if (IsCntPhiUsedOutsideLoop) { if (DefX->getOpcode() == Instruction::AShr) InitXNext = @@ -1747,31 +1747,31 @@ void LoopIdiomRecognize::transformLoopToCountable( llvm_unreachable("Unexpected opcode!"); } else InitXNext = InitX; - Value *FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID); - Value *Count = Builder.CreateSub( - ConstantInt::get(FFS->getType(), FFS->getType()->getIntegerBitWidth()), + Value *FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID); + Value *Count = Builder.CreateSub( + ConstantInt::get(FFS->getType(), FFS->getType()->getIntegerBitWidth()), FFS); - Value *NewCount = Count; + Value *NewCount = Count; if (IsCntPhiUsedOutsideLoop) { - NewCount = Count; - Count = Builder.CreateAdd(Count, ConstantInt::get(Count->getType(), 1)); + NewCount = Count; + Count = Builder.CreateAdd(Count, ConstantInt::get(Count->getType(), 1)); } - NewCount = Builder.CreateZExtOrTrunc(NewCount, - cast<IntegerType>(CntInst->getType())); + NewCount = Builder.CreateZExtOrTrunc(NewCount, + cast<IntegerType>(CntInst->getType())); Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader); - if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) { - // If the counter was being incremented in the loop, add NewCount to the - // counter's initial value, but only if the initial value is not zero. - ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal); - if (!InitConst || !InitConst->isZero()) - NewCount = Builder.CreateAdd(NewCount, CntInitVal); - } else { - // If the count was being decremented in the loop, subtract NewCount from - // the counter's initial value. - NewCount = Builder.CreateSub(CntInitVal, NewCount); - } + if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) { + // If the counter was being incremented in the loop, add NewCount to the + // counter's initial value, but only if the initial value is not zero. + ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal); + if (!InitConst || !InitConst->isZero()) + NewCount = Builder.CreateAdd(NewCount, CntInitVal); + } else { + // If the count was being decremented in the loop, subtract NewCount from + // the counter's initial value. + NewCount = Builder.CreateSub(CntInitVal, NewCount); + } // Step 2: Insert new IV and loop condition: // loop: @@ -1919,343 +1919,343 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, // loop. The loop would otherwise not be deleted even if it becomes empty. SE->forgetLoop(CurLoop); } - -/// Match loop-invariant value. -template <typename SubPattern_t> struct match_LoopInvariant { - SubPattern_t SubPattern; - const Loop *L; - - match_LoopInvariant(const SubPattern_t &SP, const Loop *L) - : SubPattern(SP), L(L) {} - - template <typename ITy> bool match(ITy *V) { - return L->isLoopInvariant(V) && SubPattern.match(V); - } -}; - -/// Matches if the value is loop-invariant. -template <typename Ty> -inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) { - return match_LoopInvariant<Ty>(M, L); -} - -/// Return true if the idiom is detected in the loop. -/// -/// The core idiom we are trying to detect is: -/// \code -/// entry: -/// <...> -/// %bitmask = shl i32 1, %bitpos -/// br label %loop -/// -/// loop: -/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ] -/// %x.curr.bitmasked = and i32 %x.curr, %bitmask -/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 -/// %x.next = shl i32 %x.curr, 1 -/// <...> -/// br i1 %x.curr.isbitunset, label %loop, label %end -/// -/// end: -/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...> -/// %x.next.res = phi i32 [ %x.next, %loop ] <...> -/// <...> -/// \endcode -static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, - Value *&BitMask, Value *&BitPos, - Value *&CurrX, Instruction *&NextX) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE - " Performing shift-until-bittest idiom detection.\n"); - - // Give up if the loop has multiple blocks or multiple backedges. - if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n"); - return false; - } - - BasicBlock *LoopHeaderBB = CurLoop->getHeader(); - BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader(); - assert(LoopPreheaderBB && "There is always a loop preheader."); - - using namespace PatternMatch; - - // Step 1: Check if the loop backedge is in desirable form. - - ICmpInst::Predicate Pred; - Value *CmpLHS, *CmpRHS; - BasicBlock *TrueBB, *FalseBB; - if (!match(LoopHeaderBB->getTerminator(), - m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)), - m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n"); - return false; - } - - // Step 2: Check if the backedge's condition is in desirable form. - - auto MatchVariableBitMask = [&]() { - return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && - match(CmpLHS, - m_c_And(m_Value(CurrX), - m_CombineAnd( - m_Value(BitMask), - m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)), - CurLoop)))); - }; - auto MatchConstantBitMask = [&]() { - return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && - match(CmpLHS, m_And(m_Value(CurrX), - m_CombineAnd(m_Value(BitMask), m_Power2()))) && - (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask))); - }; - auto MatchDecomposableConstantBitMask = [&]() { - APInt Mask; - return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) && - ICmpInst::isEquality(Pred) && Mask.isPowerOf2() && - (BitMask = ConstantInt::get(CurrX->getType(), Mask)) && - (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2())); - }; - - if (!MatchVariableBitMask() && !MatchConstantBitMask() && - !MatchDecomposableConstantBitMask()) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n"); - return false; - } - - // Step 3: Check if the recurrence is in desirable form. - auto *CurrXPN = dyn_cast<PHINode>(CurrX); - if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n"); - return false; - } - - BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB); - NextX = - dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB)); - - if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) { - // FIXME: support right-shift? - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n"); - return false; - } - - // Step 4: Check if the backedge's destinations are in desirable form. - - assert(ICmpInst::isEquality(Pred) && - "Should only get equality predicates here."); - - // cmp-br is commutative, so canonicalize to a single variant. - if (Pred != ICmpInst::Predicate::ICMP_EQ) { - Pred = ICmpInst::getInversePredicate(Pred); - std::swap(TrueBB, FalseBB); - } - - // We expect to exit loop when comparison yields false, - // so when it yields true we should branch back to loop header. - if (TrueBB != LoopHeaderBB) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n"); - return false; - } - - // Okay, idiom checks out. - return true; -} - -/// Look for the following loop: -/// \code -/// entry: -/// <...> -/// %bitmask = shl i32 1, %bitpos -/// br label %loop -/// -/// loop: -/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ] -/// %x.curr.bitmasked = and i32 %x.curr, %bitmask -/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 -/// %x.next = shl i32 %x.curr, 1 -/// <...> -/// br i1 %x.curr.isbitunset, label %loop, label %end -/// -/// end: -/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...> -/// %x.next.res = phi i32 [ %x.next, %loop ] <...> -/// <...> -/// \endcode -/// -/// And transform it into: -/// \code -/// entry: -/// %bitmask = shl i32 1, %bitpos -/// %lowbitmask = add i32 %bitmask, -1 -/// %mask = or i32 %lowbitmask, %bitmask -/// %x.masked = and i32 %x, %mask -/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked, -/// i1 true) -/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros -/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1 -/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos -/// %tripcount = add i32 %backedgetakencount, 1 -/// %x.curr = shl i32 %x, %backedgetakencount -/// %x.next = shl i32 %x, %tripcount -/// br label %loop -/// -/// loop: -/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ] -/// %loop.iv.next = add nuw i32 %loop.iv, 1 -/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount -/// <...> -/// br i1 %loop.ivcheck, label %end, label %loop -/// -/// end: -/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...> -/// %x.next.res = phi i32 [ %x.next, %loop ] <...> -/// <...> -/// \endcode -bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { - bool MadeChange = false; - - Value *X, *BitMask, *BitPos, *XCurr; - Instruction *XNext; - if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, - XNext)) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE - " shift-until-bittest idiom detection failed.\n"); - return MadeChange; - } - LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n"); - - // Ok, it is the idiom we were looking for, we *could* transform this loop, - // but is it profitable to transform? - - BasicBlock *LoopHeaderBB = CurLoop->getHeader(); - BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader(); - assert(LoopPreheaderBB && "There is always a loop preheader."); - - BasicBlock *SuccessorBB = CurLoop->getExitBlock(); - assert(LoopPreheaderBB && "There is only a single successor."); - - IRBuilder<> Builder(LoopPreheaderBB->getTerminator()); - Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc()); - - Intrinsic::ID IntrID = Intrinsic::ctlz; - Type *Ty = X->getType(); - - TargetTransformInfo::TargetCostKind CostKind = - TargetTransformInfo::TCK_SizeAndLatency; - - // The rewrite is considered to be unprofitable iff and only iff the - // intrinsic/shift we'll use are not cheap. Note that we are okay with *just* - // making the loop countable, even if nothing else changes. - IntrinsicCostAttributes Attrs( - IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()}); - int Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind); - if (Cost > TargetTransformInfo::TCC_Basic) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE - " Intrinsic is too costly, not beneficial\n"); - return MadeChange; - } - if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) > - TargetTransformInfo::TCC_Basic) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n"); - return MadeChange; - } - - // Ok, transform appears worthwhile. - MadeChange = true; - - // Step 1: Compute the loop trip count. - - Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty), - BitPos->getName() + ".lowbitmask"); - Value *Mask = - Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask"); - Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked"); - CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic( - IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()}, - /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros"); - Value *XMaskedNumActiveBits = Builder.CreateSub( - ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros, - XMasked->getName() + ".numactivebits"); - Value *XMaskedLeadingOnePos = - Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty), - XMasked->getName() + ".leadingonepos"); - - Value *LoopBackedgeTakenCount = Builder.CreateSub( - BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount"); - // We know loop's backedge-taken count, but what's loop's trip count? - // Note that while NUW is always safe, while NSW is only for bitwidths != 2. - Value *LoopTripCount = - Builder.CreateNUWAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1), - CurLoop->getName() + ".tripcount"); - - // Step 2: Compute the recurrence's final value without a loop. - - // NewX is always safe to compute, because `LoopBackedgeTakenCount` - // will always be smaller than `bitwidth(X)`, i.e. we never get poison. - Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount); - NewX->takeName(XCurr); - if (auto *I = dyn_cast<Instruction>(NewX)) - I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true); - - Value *NewXNext; - // Rewriting XNext is more complicated, however, because `X << LoopTripCount` - // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen - // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know - // that isn't the case, we'll need to emit an alternative, safe IR. - if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() || - PatternMatch::match( - BitPos, PatternMatch::m_SpecificInt_ICMP( - ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(), - Ty->getScalarSizeInBits() - 1)))) - NewXNext = Builder.CreateShl(X, LoopTripCount); - else { - // Otherwise, just additionally shift by one. It's the smallest solution, - // alternatively, we could check that NewX is INT_MIN (or BitPos is ) - // and select 0 instead. - NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1)); - } - - NewXNext->takeName(XNext); - if (auto *I = dyn_cast<Instruction>(NewXNext)) - I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true); - - // Step 3: Adjust the successor basic block to recieve the computed - // recurrence's final value instead of the recurrence itself. - - XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB); - XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB); - - // Step 4: Rewrite the loop into a countable form, with canonical IV. - - // The new canonical induction variable. - Builder.SetInsertPoint(&LoopHeaderBB->front()); - auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv"); - - // The induction itself. - // Note that while NUW is always safe, while NSW is only for bitwidths != 2. - Builder.SetInsertPoint(LoopHeaderBB->getTerminator()); - auto *IVNext = Builder.CreateNUWAdd(IV, ConstantInt::get(Ty, 1), - IV->getName() + ".next"); - - // The loop trip count check. - auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount, - CurLoop->getName() + ".ivcheck"); - Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB); - LoopHeaderBB->getTerminator()->eraseFromParent(); - - // Populate the IV PHI. - IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB); - IV->addIncoming(IVNext, LoopHeaderBB); - - // Step 5: Forget the "non-computable" trip-count SCEV associated with the - // loop. The loop would otherwise not be deleted even if it becomes empty. - - SE->forgetLoop(CurLoop); - - // Other passes will take care of actually deleting the loop if possible. - - LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n"); - - ++NumShiftUntilBitTest; - return MadeChange; -} + +/// Match loop-invariant value. +template <typename SubPattern_t> struct match_LoopInvariant { + SubPattern_t SubPattern; + const Loop *L; + + match_LoopInvariant(const SubPattern_t &SP, const Loop *L) + : SubPattern(SP), L(L) {} + + template <typename ITy> bool match(ITy *V) { + return L->isLoopInvariant(V) && SubPattern.match(V); + } +}; + +/// Matches if the value is loop-invariant. +template <typename Ty> +inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) { + return match_LoopInvariant<Ty>(M, L); +} + +/// Return true if the idiom is detected in the loop. +/// +/// The core idiom we are trying to detect is: +/// \code +/// entry: +/// <...> +/// %bitmask = shl i32 1, %bitpos +/// br label %loop +/// +/// loop: +/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ] +/// %x.curr.bitmasked = and i32 %x.curr, %bitmask +/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 +/// %x.next = shl i32 %x.curr, 1 +/// <...> +/// br i1 %x.curr.isbitunset, label %loop, label %end +/// +/// end: +/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...> +/// %x.next.res = phi i32 [ %x.next, %loop ] <...> +/// <...> +/// \endcode +static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, + Value *&BitMask, Value *&BitPos, + Value *&CurrX, Instruction *&NextX) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE + " Performing shift-until-bittest idiom detection.\n"); + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n"); + return false; + } + + BasicBlock *LoopHeaderBB = CurLoop->getHeader(); + BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader(); + assert(LoopPreheaderBB && "There is always a loop preheader."); + + using namespace PatternMatch; + + // Step 1: Check if the loop backedge is in desirable form. + + ICmpInst::Predicate Pred; + Value *CmpLHS, *CmpRHS; + BasicBlock *TrueBB, *FalseBB; + if (!match(LoopHeaderBB->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)), + m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n"); + return false; + } + + // Step 2: Check if the backedge's condition is in desirable form. + + auto MatchVariableBitMask = [&]() { + return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && + match(CmpLHS, + m_c_And(m_Value(CurrX), + m_CombineAnd( + m_Value(BitMask), + m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)), + CurLoop)))); + }; + auto MatchConstantBitMask = [&]() { + return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && + match(CmpLHS, m_And(m_Value(CurrX), + m_CombineAnd(m_Value(BitMask), m_Power2()))) && + (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask))); + }; + auto MatchDecomposableConstantBitMask = [&]() { + APInt Mask; + return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) && + ICmpInst::isEquality(Pred) && Mask.isPowerOf2() && + (BitMask = ConstantInt::get(CurrX->getType(), Mask)) && + (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2())); + }; + + if (!MatchVariableBitMask() && !MatchConstantBitMask() && + !MatchDecomposableConstantBitMask()) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n"); + return false; + } + + // Step 3: Check if the recurrence is in desirable form. + auto *CurrXPN = dyn_cast<PHINode>(CurrX); + if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n"); + return false; + } + + BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB); + NextX = + dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB)); + + if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) { + // FIXME: support right-shift? + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n"); + return false; + } + + // Step 4: Check if the backedge's destinations are in desirable form. + + assert(ICmpInst::isEquality(Pred) && + "Should only get equality predicates here."); + + // cmp-br is commutative, so canonicalize to a single variant. + if (Pred != ICmpInst::Predicate::ICMP_EQ) { + Pred = ICmpInst::getInversePredicate(Pred); + std::swap(TrueBB, FalseBB); + } + + // We expect to exit loop when comparison yields false, + // so when it yields true we should branch back to loop header. + if (TrueBB != LoopHeaderBB) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n"); + return false; + } + + // Okay, idiom checks out. + return true; +} + +/// Look for the following loop: +/// \code +/// entry: +/// <...> +/// %bitmask = shl i32 1, %bitpos +/// br label %loop +/// +/// loop: +/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ] +/// %x.curr.bitmasked = and i32 %x.curr, %bitmask +/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 +/// %x.next = shl i32 %x.curr, 1 +/// <...> +/// br i1 %x.curr.isbitunset, label %loop, label %end +/// +/// end: +/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...> +/// %x.next.res = phi i32 [ %x.next, %loop ] <...> +/// <...> +/// \endcode +/// +/// And transform it into: +/// \code +/// entry: +/// %bitmask = shl i32 1, %bitpos +/// %lowbitmask = add i32 %bitmask, -1 +/// %mask = or i32 %lowbitmask, %bitmask +/// %x.masked = and i32 %x, %mask +/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked, +/// i1 true) +/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros +/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1 +/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos +/// %tripcount = add i32 %backedgetakencount, 1 +/// %x.curr = shl i32 %x, %backedgetakencount +/// %x.next = shl i32 %x, %tripcount +/// br label %loop +/// +/// loop: +/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ] +/// %loop.iv.next = add nuw i32 %loop.iv, 1 +/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount +/// <...> +/// br i1 %loop.ivcheck, label %end, label %loop +/// +/// end: +/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...> +/// %x.next.res = phi i32 [ %x.next, %loop ] <...> +/// <...> +/// \endcode +bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { + bool MadeChange = false; + + Value *X, *BitMask, *BitPos, *XCurr; + Instruction *XNext; + if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, + XNext)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE + " shift-until-bittest idiom detection failed.\n"); + return MadeChange; + } + LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n"); + + // Ok, it is the idiom we were looking for, we *could* transform this loop, + // but is it profitable to transform? + + BasicBlock *LoopHeaderBB = CurLoop->getHeader(); + BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader(); + assert(LoopPreheaderBB && "There is always a loop preheader."); + + BasicBlock *SuccessorBB = CurLoop->getExitBlock(); + assert(LoopPreheaderBB && "There is only a single successor."); + + IRBuilder<> Builder(LoopPreheaderBB->getTerminator()); + Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc()); + + Intrinsic::ID IntrID = Intrinsic::ctlz; + Type *Ty = X->getType(); + + TargetTransformInfo::TargetCostKind CostKind = + TargetTransformInfo::TCK_SizeAndLatency; + + // The rewrite is considered to be unprofitable iff and only iff the + // intrinsic/shift we'll use are not cheap. Note that we are okay with *just* + // making the loop countable, even if nothing else changes. + IntrinsicCostAttributes Attrs( + IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()}); + int Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind); + if (Cost > TargetTransformInfo::TCC_Basic) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE + " Intrinsic is too costly, not beneficial\n"); + return MadeChange; + } + if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) > + TargetTransformInfo::TCC_Basic) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n"); + return MadeChange; + } + + // Ok, transform appears worthwhile. + MadeChange = true; + + // Step 1: Compute the loop trip count. + + Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty), + BitPos->getName() + ".lowbitmask"); + Value *Mask = + Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask"); + Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked"); + CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic( + IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()}, + /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros"); + Value *XMaskedNumActiveBits = Builder.CreateSub( + ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros, + XMasked->getName() + ".numactivebits"); + Value *XMaskedLeadingOnePos = + Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty), + XMasked->getName() + ".leadingonepos"); + + Value *LoopBackedgeTakenCount = Builder.CreateSub( + BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount"); + // We know loop's backedge-taken count, but what's loop's trip count? + // Note that while NUW is always safe, while NSW is only for bitwidths != 2. + Value *LoopTripCount = + Builder.CreateNUWAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1), + CurLoop->getName() + ".tripcount"); + + // Step 2: Compute the recurrence's final value without a loop. + + // NewX is always safe to compute, because `LoopBackedgeTakenCount` + // will always be smaller than `bitwidth(X)`, i.e. we never get poison. + Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount); + NewX->takeName(XCurr); + if (auto *I = dyn_cast<Instruction>(NewX)) + I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true); + + Value *NewXNext; + // Rewriting XNext is more complicated, however, because `X << LoopTripCount` + // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen + // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know + // that isn't the case, we'll need to emit an alternative, safe IR. + if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() || + PatternMatch::match( + BitPos, PatternMatch::m_SpecificInt_ICMP( + ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(), + Ty->getScalarSizeInBits() - 1)))) + NewXNext = Builder.CreateShl(X, LoopTripCount); + else { + // Otherwise, just additionally shift by one. It's the smallest solution, + // alternatively, we could check that NewX is INT_MIN (or BitPos is ) + // and select 0 instead. + NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1)); + } + + NewXNext->takeName(XNext); + if (auto *I = dyn_cast<Instruction>(NewXNext)) + I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true); + + // Step 3: Adjust the successor basic block to recieve the computed + // recurrence's final value instead of the recurrence itself. + + XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB); + XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB); + + // Step 4: Rewrite the loop into a countable form, with canonical IV. + + // The new canonical induction variable. + Builder.SetInsertPoint(&LoopHeaderBB->front()); + auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv"); + + // The induction itself. + // Note that while NUW is always safe, while NSW is only for bitwidths != 2. + Builder.SetInsertPoint(LoopHeaderBB->getTerminator()); + auto *IVNext = Builder.CreateNUWAdd(IV, ConstantInt::get(Ty, 1), + IV->getName() + ".next"); + + // The loop trip count check. + auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount, + CurLoop->getName() + ".ivcheck"); + Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB); + LoopHeaderBB->getTerminator()->eraseFromParent(); + + // Populate the IV PHI. + IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB); + IV->addIncoming(IVNext, LoopHeaderBB); + + // Step 5: Forget the "non-computable" trip-count SCEV associated with the + // loop. The loop would otherwise not be deleted even if it becomes empty. + + SE->forgetLoop(CurLoop); + + // Other passes will take care of actually deleting the loop if possible. + + LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n"); + + ++NumShiftUntilBitTest; + return MadeChange; +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp index d9dbc0deb4..4f8809275f 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/LoopInterchange.h" +#include "llvm/Transforms/Scalar/LoopInterchange.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -28,7 +28,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -429,7 +429,7 @@ private: const LoopInterchangeLegality &LIL; }; -struct LoopInterchange { +struct LoopInterchange { ScalarEvolution *SE = nullptr; LoopInfo *LI = nullptr; DependenceInfo *DI = nullptr; @@ -438,12 +438,12 @@ struct LoopInterchange { /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, - DominatorTree *DT, OptimizationRemarkEmitter *ORE) - : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {} + LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, + DominatorTree *DT, OptimizationRemarkEmitter *ORE) + : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {} - bool run(Loop *L) { - if (L->getParentLoop()) + bool run(Loop *L) { + if (L->getParentLoop()) return false; return processLoopList(populateWorklist(*L)); @@ -452,7 +452,7 @@ struct LoopInterchange { bool isComputableLoopNest(LoopVector LoopList) { for (Loop *L : LoopList) { const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(ExitCountOuter)) { + if (isa<SCEVCouldNotCompute>(ExitCountOuter)) { LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n"); return false; } @@ -611,13 +611,13 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { containsUnsafeInstructions(OuterLoopLatch)) return false; - // Also make sure the inner loop preheader does not contain any unsafe - // instructions. Note that all instructions in the preheader will be moved to - // the outer loop header when interchanging. - if (InnerLoopPreHeader != OuterLoopHeader && - containsUnsafeInstructions(InnerLoopPreHeader)) - return false; - + // Also make sure the inner loop preheader does not contain any unsafe + // instructions. Note that all instructions in the preheader will be moved to + // the outer loop header when interchanging. + if (InnerLoopPreHeader != OuterLoopHeader && + containsUnsafeInstructions(InnerLoopPreHeader)) + return false; + LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n"); // We have a perfect loop nest. return true; @@ -661,10 +661,10 @@ static Value *followLCSSA(Value *SV) { // Check V's users to see if it is involved in a reduction in L. static PHINode *findInnerReductionPhi(Loop *L, Value *V) { - // Reduction variables cannot be constants. - if (isa<Constant>(V)) - return nullptr; - + // Reduction variables cannot be constants. + if (isa<Constant>(V)) + return nullptr; + for (Value *User : V->users()) { if (PHINode *PHI = dyn_cast<PHINode>(User)) { if (PHI->getNumIncomingValues() == 1) @@ -705,7 +705,7 @@ bool LoopInterchangeLegality::findInductionAndReductions( Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch())); PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V); if (!InnerRedPhi || - !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) { + !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) { LLVM_DEBUG( dbgs() << "Failed to recognize PHI as an induction or reduction.\n"); @@ -1042,10 +1042,10 @@ int LoopInterchangeProfitability::getInstrOrderCost() { bool FoundInnerInduction = false; bool FoundOuterInduction = false; for (unsigned i = 0; i < NumOp; ++i) { - // Skip operands that are not SCEV-able. - if (!SE->isSCEVable(GEP->getOperand(i)->getType())) - continue; - + // Skip operands that are not SCEV-able. + if (!SE->isSCEVable(GEP->getOperand(i)->getType())) + continue; + const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); if (!AR) @@ -1190,7 +1190,7 @@ void LoopInterchangeTransform::restructureLoops( removeChildLoop(NewInner, NewOuter); LI->changeTopLevelLoop(NewInner, NewOuter); } - while (!NewOuter->isInnermost()) + while (!NewOuter->isInnermost()) NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin())); NewOuter->addChildLoop(NewInner); @@ -1306,21 +1306,21 @@ bool LoopInterchangeTransform::transform() { LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n"); } - // Instructions in the original inner loop preheader may depend on values - // defined in the outer loop header. Move them there, because the original - // inner loop preheader will become the entry into the interchanged loop nest. - // Currently we move all instructions and rely on LICM to move invariant - // instructions outside the loop nest. - BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); - BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); - if (InnerLoopPreHeader != OuterLoopHeader) { - SmallPtrSet<Instruction *, 4> NeedsMoving; - for (Instruction &I : - make_early_inc_range(make_range(InnerLoopPreHeader->begin(), - std::prev(InnerLoopPreHeader->end())))) - I.moveBefore(OuterLoopHeader->getTerminator()); - } - + // Instructions in the original inner loop preheader may depend on values + // defined in the outer loop header. Move them there, because the original + // inner loop preheader will become the entry into the interchanged loop nest. + // Currently we move all instructions and rely on LICM to move invariant + // instructions outside the loop nest. + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + if (InnerLoopPreHeader != OuterLoopHeader) { + SmallPtrSet<Instruction *, 4> NeedsMoving; + for (Instruction &I : + make_early_inc_range(make_range(InnerLoopPreHeader->begin(), + std::prev(InnerLoopPreHeader->end())))) + I.moveBefore(OuterLoopHeader->getTerminator()); + } + Transformed |= adjustLoopLinks(); if (!Transformed) { LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n"); @@ -1537,7 +1537,7 @@ bool LoopInterchangeTransform::adjustLoopBranches() { InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); // The outer loop header might or might not branch to the outer latch. // We are guaranteed to branch to the inner loop preheader. - if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) + if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates, /*MustUpdateOnce=*/false); updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, @@ -1584,9 +1584,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() { // Now update the reduction PHIs in the inner and outer loop headers. SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs; - for (PHINode &PHI : drop_begin(InnerLoopHeader->phis())) + for (PHINode &PHI : drop_begin(InnerLoopHeader->phis())) InnerLoopPHIs.push_back(cast<PHINode>(&PHI)); - for (PHINode &PHI : drop_begin(OuterLoopHeader->phis())) + for (PHINode &PHI : drop_begin(OuterLoopHeader->phis())) OuterLoopPHIs.push_back(cast<PHINode>(&PHI)); auto &OuterInnerReductions = LIL.getOuterInnerReductions(); @@ -1610,17 +1610,17 @@ bool LoopInterchangeTransform::adjustLoopBranches() { InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader); InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); - // Values defined in the outer loop header could be used in the inner loop - // latch. In that case, we need to create LCSSA phis for them, because after - // interchanging they will be defined in the new inner loop and used in the - // new outer loop. - IRBuilder<> Builder(OuterLoopHeader->getContext()); - SmallVector<Instruction *, 4> MayNeedLCSSAPhis; - for (Instruction &I : - make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end()))) - MayNeedLCSSAPhis.push_back(&I); - formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder); - + // Values defined in the outer loop header could be used in the inner loop + // latch. In that case, we need to create LCSSA phis for them, because after + // interchanging they will be defined in the new inner loop and used in the + // new outer loop. + IRBuilder<> Builder(OuterLoopHeader->getContext()); + SmallVector<Instruction *, 4> MayNeedLCSSAPhis; + for (Instruction &I : + make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end()))) + MayNeedLCSSAPhis.push_back(&I); + formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder); + return true; } @@ -1638,58 +1638,58 @@ bool LoopInterchangeTransform::adjustLoopLinks() { return Changed; } -/// Main LoopInterchange Pass. -struct LoopInterchangeLegacyPass : public LoopPass { - static char ID; - - LoopInterchangeLegacyPass() : LoopPass(ID) { - initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DependenceAnalysisWrapperPass>(); - AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); - - getLoopAnalysisUsage(AU); - } - - bool runOnLoop(Loop *L, LPPassManager &LPM) override { - if (skipLoop(L)) - return false; - - auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - - return LoopInterchange(SE, LI, DI, DT, ORE).run(L); - } -}; - -char LoopInterchangeLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange", +/// Main LoopInterchange Pass. +struct LoopInterchangeLegacyPass : public LoopPass { + static char ID; + + LoopInterchangeLegacyPass() : LoopPass(ID) { + initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DependenceAnalysisWrapperPass>(); + AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); + + getLoopAnalysisUsage(AU); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L)) + return false; + + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); + + return LoopInterchange(SE, LI, DI, DT, ORE).run(L); + } +}; + +char LoopInterchangeLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange", +INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange", "Interchanges loops for cache reuse", false, false) -Pass *llvm::createLoopInterchangePass() { - return new LoopInterchangeLegacyPass(); -} - -PreservedAnalyses LoopInterchangePass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &U) { - Function &F = *L.getHeader()->getParent(); - - DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); - OptimizationRemarkEmitter ORE(&F); - if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(&L)) - return PreservedAnalyses::all(); - return getLoopPassPreservedAnalyses(); -} +Pass *llvm::createLoopInterchangePass() { + return new LoopInterchangeLegacyPass(); +} + +PreservedAnalyses LoopInterchangePass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function &F = *L.getHeader()->getParent(); + + DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); + OptimizationRemarkEmitter ORE(&F); + if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(&L)) + return PreservedAnalyses::all(); + return getLoopPassPreservedAnalyses(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp index 058612149a..0d3f053e1e 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -55,7 +55,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -308,8 +308,8 @@ public: /// We need a check if one is a pointer for a candidate load and the other is /// a pointer for a possibly intervening store. bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2, - const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath, - const SmallPtrSetImpl<Value *> &CandLoadPtrs) { + const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath, + const SmallPtrSetImpl<Value *> &CandLoadPtrs) { Value *Ptr1 = LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue; Value *Ptr2 = @@ -384,9 +384,9 @@ public: findPointersWrittenOnForwardingPath(Candidates); // Collect the pointers of the candidate loads. - SmallPtrSet<Value *, 4> CandLoadPtrs; - for (const auto &Candidate : Candidates) - CandLoadPtrs.insert(Candidate.getLoadPtr()); + SmallPtrSet<Value *, 4> CandLoadPtrs; + for (const auto &Candidate : Candidates) + CandLoadPtrs.insert(Candidate.getLoadPtr()); const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks(); SmallVector<RuntimePointerCheck, 4> Checks; @@ -505,16 +505,16 @@ public: if (!Cand.isDependenceDistanceOfOne(PSE, L)) continue; - assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) && - "Loading from something other than indvar?"); - assert( - isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) && - "Storing to something other than indvar?"); - - Candidates.push_back(Cand); + assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) && + "Loading from something other than indvar?"); + assert( + isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) && + "Storing to something other than indvar?"); + + Candidates.push_back(Cand); LLVM_DEBUG( dbgs() - << Candidates.size() + << Candidates.size() << ". Valid store-to-load forwarding across the loop backedge\n"); } if (Candidates.empty()) @@ -563,19 +563,19 @@ public: // Point of no-return, start the transformation. First, version the loop // if necessary. - LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE()); + LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE()); LV.versionLoop(); - - // After versioning, some of the candidates' pointers could stop being - // SCEVAddRecs. We need to filter them out. - auto NoLongerGoodCandidate = [this]( - const StoreToLoadForwardingCandidate &Cand) { - return !isa<SCEVAddRecExpr>( - PSE.getSCEV(Cand.Load->getPointerOperand())) || - !isa<SCEVAddRecExpr>( - PSE.getSCEV(Cand.Store->getPointerOperand())); - }; - llvm::erase_if(Candidates, NoLongerGoodCandidate); + + // After versioning, some of the candidates' pointers could stop being + // SCEVAddRecs. We need to filter them out. + auto NoLongerGoodCandidate = [this]( + const StoreToLoadForwardingCandidate &Cand) { + return !isa<SCEVAddRecExpr>( + PSE.getSCEV(Cand.Load->getPointerOperand())) || + !isa<SCEVAddRecExpr>( + PSE.getSCEV(Cand.Store->getPointerOperand())); + }; + llvm::erase_if(Candidates, NoLongerGoodCandidate); } // Next, propagate the value stored by the store to the users of the load. @@ -584,7 +584,7 @@ public: "storeforward"); for (const auto &Cand : Candidates) propagateStoredValueToLoadUsers(Cand, SEE); - NumLoopLoadEliminted += Candidates.size(); + NumLoopLoadEliminted += Candidates.size(); return true; } @@ -610,7 +610,7 @@ private: static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - ScalarEvolution *SE, AssumptionCache *AC, + ScalarEvolution *SE, AssumptionCache *AC, function_ref<const LoopAccessInfo &(Loop &)> GetLAI) { // Build up a worklist of inner-loops to transform to avoid iterator // invalidation. @@ -619,21 +619,21 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, // which merely optimizes the use of loads in a loop. SmallVector<Loop *, 8> Worklist; - bool Changed = false; - + bool Changed = false; + for (Loop *TopLevelLoop : LI) - for (Loop *L : depth_first(TopLevelLoop)) { - Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false); + for (Loop *L : depth_first(TopLevelLoop)) { + Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false); // We only handle inner-most loops. - if (L->isInnermost()) + if (L->isInnermost()) Worklist.push_back(L); - } + } // Now walk the identified inner loops. for (Loop *L : Worklist) { - // Match historical behavior - if (!L->isRotatedForm() || !L->getExitingBlock()) - continue; + // Match historical behavior + if (!L->isRotatedForm() || !L->getExitingBlock()) + continue; // The actual work is performed by LoadEliminationForLoop. LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI); Changed |= LEL.processLoop(); @@ -667,7 +667,7 @@ public: // Process each loop nest in the function. return eliminateLoadsAcrossLoops( - F, LI, DT, BFI, PSI, /*SE*/ nullptr, /*AC*/ nullptr, + F, LI, DT, BFI, PSI, /*SE*/ nullptr, /*AC*/ nullptr, [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); }); } @@ -724,9 +724,9 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F, auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); bool Changed = eliminateLoadsAcrossLoops( - F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, - TLI, TTI, nullptr, MSSA}; + F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & { + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, + TLI, TTI, nullptr, MSSA}; return LAM.getResult<LoopAccessAnalysis>(L, AR); }); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp index 3fe8e72591..13330c1c80 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/LoopPassManager.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Support/TimeProfiler.h" using namespace llvm; @@ -30,133 +30,133 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, if (DebugLogging) dbgs() << "Starting Loop pass manager run.\n"; - // Runs loop-nest passes only when the current loop is a top-level one. - PreservedAnalyses PA = (L.isOutermost() && !LoopNestPasses.empty()) - ? runWithLoopNestPasses(L, AM, AR, U) - : runWithoutLoopNestPasses(L, AM, AR, U); - - // Invalidation for the current loop should be handled above, and other loop - // analysis results shouldn't be impacted by runs over this loop. Therefore, - // the remaining analysis results in the AnalysisManager are preserved. We - // mark this with a set so that we don't need to inspect each one - // individually. - // FIXME: This isn't correct! This loop and all nested loops' analyses should - // be preserved, but unrolling should invalidate the parent loop's analyses. - PA.preserveSet<AllAnalysesOn<Loop>>(); - - if (DebugLogging) - dbgs() << "Finished Loop pass manager run.\n"; - - return PA; -} - -// Run both loop passes and loop-nest passes on top-level loop \p L. -PreservedAnalyses -LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &U) { - assert(L.isOutermost() && - "Loop-nest passes should only run on top-level loops."); - PreservedAnalyses PA = PreservedAnalyses::all(); - + // Runs loop-nest passes only when the current loop is a top-level one. + PreservedAnalyses PA = (L.isOutermost() && !LoopNestPasses.empty()) + ? runWithLoopNestPasses(L, AM, AR, U) + : runWithoutLoopNestPasses(L, AM, AR, U); + + // Invalidation for the current loop should be handled above, and other loop + // analysis results shouldn't be impacted by runs over this loop. Therefore, + // the remaining analysis results in the AnalysisManager are preserved. We + // mark this with a set so that we don't need to inspect each one + // individually. + // FIXME: This isn't correct! This loop and all nested loops' analyses should + // be preserved, but unrolling should invalidate the parent loop's analyses. + PA.preserveSet<AllAnalysesOn<Loop>>(); + + if (DebugLogging) + dbgs() << "Finished Loop pass manager run.\n"; + + return PA; +} + +// Run both loop passes and loop-nest passes on top-level loop \p L. +PreservedAnalyses +LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + assert(L.isOutermost() && + "Loop-nest passes should only run on top-level loops."); + PreservedAnalyses PA = PreservedAnalyses::all(); + // Request PassInstrumentation from analysis manager, will use it to run // instrumenting callbacks for the passes later. PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR); - unsigned LoopPassIndex = 0, LoopNestPassIndex = 0; - - // `LoopNestPtr` points to the `LoopNest` object for the current top-level - // loop and `IsLoopNestPtrValid` indicates whether the pointer is still valid. - // The `LoopNest` object will have to be re-constructed if the pointer is - // invalid when encountering a loop-nest pass. - std::unique_ptr<LoopNest> LoopNestPtr; - bool IsLoopNestPtrValid = false; - - for (size_t I = 0, E = IsLoopNestPass.size(); I != E; ++I) { - Optional<PreservedAnalyses> PassPA; - if (!IsLoopNestPass[I]) { - // The `I`-th pass is a loop pass. - auto &Pass = LoopPasses[LoopPassIndex++]; - PassPA = runSinglePass(L, Pass, AM, AR, U, PI); - } else { - // The `I`-th pass is a loop-nest pass. - auto &Pass = LoopNestPasses[LoopNestPassIndex++]; - - // If the loop-nest object calculated before is no longer valid, - // re-calculate it here before running the loop-nest pass. - if (!IsLoopNestPtrValid) { - LoopNestPtr = LoopNest::getLoopNest(L, AR.SE); - IsLoopNestPtrValid = true; - } - PassPA = runSinglePass(*LoopNestPtr, Pass, AM, AR, U, PI); + unsigned LoopPassIndex = 0, LoopNestPassIndex = 0; + + // `LoopNestPtr` points to the `LoopNest` object for the current top-level + // loop and `IsLoopNestPtrValid` indicates whether the pointer is still valid. + // The `LoopNest` object will have to be re-constructed if the pointer is + // invalid when encountering a loop-nest pass. + std::unique_ptr<LoopNest> LoopNestPtr; + bool IsLoopNestPtrValid = false; + + for (size_t I = 0, E = IsLoopNestPass.size(); I != E; ++I) { + Optional<PreservedAnalyses> PassPA; + if (!IsLoopNestPass[I]) { + // The `I`-th pass is a loop pass. + auto &Pass = LoopPasses[LoopPassIndex++]; + PassPA = runSinglePass(L, Pass, AM, AR, U, PI); + } else { + // The `I`-th pass is a loop-nest pass. + auto &Pass = LoopNestPasses[LoopNestPassIndex++]; + + // If the loop-nest object calculated before is no longer valid, + // re-calculate it here before running the loop-nest pass. + if (!IsLoopNestPtrValid) { + LoopNestPtr = LoopNest::getLoopNest(L, AR.SE); + IsLoopNestPtrValid = true; + } + PassPA = runSinglePass(*LoopNestPtr, Pass, AM, AR, U, PI); } - // `PassPA` is `None` means that the before-pass callbacks in - // `PassInstrumentation` return false. The pass does not run in this case, - // so we can skip the following procedure. - if (!PassPA) - continue; - - // If the loop was deleted, abort the run and return to the outer walk. - if (U.skipCurrentLoop()) { - PA.intersect(std::move(*PassPA)); - break; - } - - // Update the analysis manager as each pass runs and potentially - // invalidates analyses. - AM.invalidate(L, *PassPA); - - // Finally, we intersect the final preserved analyses to compute the - // aggregate preserved set for this pass manager. - PA.intersect(std::move(*PassPA)); - - // Check if the current pass preserved the loop-nest object or not. - IsLoopNestPtrValid &= PassPA->getChecker<LoopNestAnalysis>().preserved(); - - // FIXME: Historically, the pass managers all called the LLVM context's - // yield function here. We don't have a generic way to acquire the - // context and it isn't yet clear what the right pattern is for yielding - // in the new pass manager so it is currently omitted. - // ...getContext().yield(); - } - return PA; -} - -// Run all loop passes on loop \p L. Loop-nest passes don't run either because -// \p L is not a top-level one or simply because there are no loop-nest passes -// in the pass manager at all. -PreservedAnalyses -LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &U) { - PreservedAnalyses PA = PreservedAnalyses::all(); - - // Request PassInstrumentation from analysis manager, will use it to run - // instrumenting callbacks for the passes later. - PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR); - for (auto &Pass : LoopPasses) { - Optional<PreservedAnalyses> PassPA = runSinglePass(L, Pass, AM, AR, U, PI); - - // `PassPA` is `None` means that the before-pass callbacks in - // `PassInstrumentation` return false. The pass does not run in this case, - // so we can skip the following procedure. - if (!PassPA) - continue; + // `PassPA` is `None` means that the before-pass callbacks in + // `PassInstrumentation` return false. The pass does not run in this case, + // so we can skip the following procedure. + if (!PassPA) + continue; // If the loop was deleted, abort the run and return to the outer walk. if (U.skipCurrentLoop()) { - PA.intersect(std::move(*PassPA)); + PA.intersect(std::move(*PassPA)); break; } + // Update the analysis manager as each pass runs and potentially + // invalidates analyses. + AM.invalidate(L, *PassPA); + + // Finally, we intersect the final preserved analyses to compute the + // aggregate preserved set for this pass manager. + PA.intersect(std::move(*PassPA)); + + // Check if the current pass preserved the loop-nest object or not. + IsLoopNestPtrValid &= PassPA->getChecker<LoopNestAnalysis>().preserved(); + + // FIXME: Historically, the pass managers all called the LLVM context's + // yield function here. We don't have a generic way to acquire the + // context and it isn't yet clear what the right pattern is for yielding + // in the new pass manager so it is currently omitted. + // ...getContext().yield(); + } + return PA; +} + +// Run all loop passes on loop \p L. Loop-nest passes don't run either because +// \p L is not a top-level one or simply because there are no loop-nest passes +// in the pass manager at all. +PreservedAnalyses +LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + // Request PassInstrumentation from analysis manager, will use it to run + // instrumenting callbacks for the passes later. + PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR); + for (auto &Pass : LoopPasses) { + Optional<PreservedAnalyses> PassPA = runSinglePass(L, Pass, AM, AR, U, PI); + + // `PassPA` is `None` means that the before-pass callbacks in + // `PassInstrumentation` return false. The pass does not run in this case, + // so we can skip the following procedure. + if (!PassPA) + continue; + + // If the loop was deleted, abort the run and return to the outer walk. + if (U.skipCurrentLoop()) { + PA.intersect(std::move(*PassPA)); + break; + } + // Update the analysis manager as each pass runs and potentially // invalidates analyses. - AM.invalidate(L, *PassPA); + AM.invalidate(L, *PassPA); // Finally, we intersect the final preserved analyses to compute the // aggregate preserved set for this pass manager. - PA.intersect(std::move(*PassPA)); + PA.intersect(std::move(*PassPA)); // FIXME: Historically, the pass managers all called the LLVM context's // yield function here. We don't have a generic way to acquire the @@ -164,162 +164,162 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, // in the new pass manager so it is currently omitted. // ...getContext().yield(); } - return PA; -} -} // namespace llvm - -PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, - FunctionAnalysisManager &AM) { - // Before we even compute any loop analyses, first run a miniature function - // pass pipeline to put loops into their canonical form. Note that we can - // directly build up function analyses after this as the function pass - // manager handles all the invalidation at that layer. - PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F); - - PreservedAnalyses PA = PreservedAnalyses::all(); - // Check the PassInstrumentation's BeforePass callbacks before running the - // canonicalization pipeline. - if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) { - PA = LoopCanonicalizationFPM.run(F, AM); - PI.runAfterPass<Function>(LoopCanonicalizationFPM, F, PA); - } - - // Get the loop structure for this function - LoopInfo &LI = AM.getResult<LoopAnalysis>(F); - - // If there are no loops, there is nothing to do here. - if (LI.empty()) - return PA; - - // Get the analysis results needed by loop passes. - MemorySSA *MSSA = - UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr; - BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData() - ? (&AM.getResult<BlockFrequencyAnalysis>(F)) - : nullptr; - LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F), - AM.getResult<AssumptionAnalysis>(F), - AM.getResult<DominatorTreeAnalysis>(F), - AM.getResult<LoopAnalysis>(F), - AM.getResult<ScalarEvolutionAnalysis>(F), - AM.getResult<TargetLibraryAnalysis>(F), - AM.getResult<TargetIRAnalysis>(F), - BFI, - MSSA}; - - // Setup the loop analysis manager from its proxy. It is important that - // this is only done when there are loops to process and we have built the - // LoopStandardAnalysisResults object. The loop analyses cached in this - // manager have access to those analysis results and so it must invalidate - // itself when they go away. - auto &LAMFP = AM.getResult<LoopAnalysisManagerFunctionProxy>(F); - if (UseMemorySSA) - LAMFP.markMSSAUsed(); - LoopAnalysisManager &LAM = LAMFP.getManager(); - - // A postorder worklist of loops to process. - SmallPriorityWorklist<Loop *, 4> Worklist; - - // Register the worklist and loop analysis manager so that loop passes can - // update them when they mutate the loop nest structure. - LPMUpdater Updater(Worklist, LAM, LoopNestMode); - - // Add the loop nests in the reverse order of LoopInfo. See method - // declaration. - if (!LoopNestMode) { - appendLoopsToWorklist(LI, Worklist); - } else { - for (Loop *L : LI) - Worklist.insert(L); - } - -#ifndef NDEBUG - PI.pushBeforeNonSkippedPassCallback([&LAR, &LI](StringRef PassID, Any IR) { - if (isSpecialPass(PassID, {"PassManager"})) - return; - assert(any_isa<const Loop *>(IR) || any_isa<const LoopNest *>(IR)); - const Loop *L = any_isa<const Loop *>(IR) - ? any_cast<const Loop *>(IR) - : &any_cast<const LoopNest *>(IR)->getOutermostLoop(); - assert(L && "Loop should be valid for printing"); - - // Verify the loop structure and LCSSA form before visiting the loop. - L->verifyLoop(); - assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) && - "Loops must remain in LCSSA form!"); - }); -#endif - - do { - Loop *L = Worklist.pop_back_val(); - assert(!(LoopNestMode && L->getParentLoop()) && - "L should be a top-level loop in loop-nest mode."); - - // Reset the update structure for this loop. - Updater.CurrentL = L; - Updater.SkipCurrentLoop = false; - -#ifndef NDEBUG - // Save a parent loop pointer for asserts. - Updater.ParentL = L->getParentLoop(); -#endif - // Check the PassInstrumentation's BeforePass callbacks before running the - // pass, skip its execution completely if asked to (callback returns - // false). - if (!PI.runBeforePass<Loop>(*Pass, *L)) - continue; - - PreservedAnalyses PassPA; - { - TimeTraceScope TimeScope(Pass->name()); - PassPA = Pass->run(*L, LAM, LAR, Updater); - } - - // Do not pass deleted Loop into the instrumentation. - if (Updater.skipCurrentLoop()) - PI.runAfterPassInvalidated<Loop>(*Pass, PassPA); - else - PI.runAfterPass<Loop>(*Pass, *L, PassPA); - - // FIXME: We should verify the set of analyses relevant to Loop passes - // are preserved. - - // If the loop hasn't been deleted, we need to handle invalidation here. - if (!Updater.skipCurrentLoop()) - // We know that the loop pass couldn't have invalidated any other - // loop's analyses (that's the contract of a loop pass), so directly - // handle the loop analysis manager's invalidation here. - LAM.invalidate(*L, PassPA); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - PA.intersect(std::move(PassPA)); - } while (!Worklist.empty()); - -#ifndef NDEBUG - PI.popBeforeNonSkippedPassCallback(); -#endif - - // By definition we preserve the proxy. We also preserve all analyses on - // Loops. This precludes *any* invalidation of loop analyses by the proxy, - // but that's OK because we've taken care to invalidate analyses in the - // loop analysis manager incrementally above. - PA.preserveSet<AllAnalysesOn<Loop>>(); - PA.preserve<LoopAnalysisManagerFunctionProxy>(); - // We also preserve the set of standard analyses. - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LoopAnalysis>(); - PA.preserve<ScalarEvolutionAnalysis>(); - if (UseBlockFrequencyInfo && F.hasProfileData()) - PA.preserve<BlockFrequencyAnalysis>(); - if (UseMemorySSA) - PA.preserve<MemorySSAAnalysis>(); - // FIXME: What we really want to do here is preserve an AA category, but - // that concept doesn't exist yet. - PA.preserve<AAManager>(); - PA.preserve<BasicAA>(); - PA.preserve<GlobalsAA>(); - PA.preserve<SCEVAA>(); + return PA; +} +} // namespace llvm + +PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, + FunctionAnalysisManager &AM) { + // Before we even compute any loop analyses, first run a miniature function + // pass pipeline to put loops into their canonical form. Note that we can + // directly build up function analyses after this as the function pass + // manager handles all the invalidation at that layer. + PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F); + + PreservedAnalyses PA = PreservedAnalyses::all(); + // Check the PassInstrumentation's BeforePass callbacks before running the + // canonicalization pipeline. + if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) { + PA = LoopCanonicalizationFPM.run(F, AM); + PI.runAfterPass<Function>(LoopCanonicalizationFPM, F, PA); + } + + // Get the loop structure for this function + LoopInfo &LI = AM.getResult<LoopAnalysis>(F); + + // If there are no loops, there is nothing to do here. + if (LI.empty()) + return PA; + + // Get the analysis results needed by loop passes. + MemorySSA *MSSA = + UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr; + BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData() + ? (&AM.getResult<BlockFrequencyAnalysis>(F)) + : nullptr; + LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F), + AM.getResult<AssumptionAnalysis>(F), + AM.getResult<DominatorTreeAnalysis>(F), + AM.getResult<LoopAnalysis>(F), + AM.getResult<ScalarEvolutionAnalysis>(F), + AM.getResult<TargetLibraryAnalysis>(F), + AM.getResult<TargetIRAnalysis>(F), + BFI, + MSSA}; + + // Setup the loop analysis manager from its proxy. It is important that + // this is only done when there are loops to process and we have built the + // LoopStandardAnalysisResults object. The loop analyses cached in this + // manager have access to those analysis results and so it must invalidate + // itself when they go away. + auto &LAMFP = AM.getResult<LoopAnalysisManagerFunctionProxy>(F); + if (UseMemorySSA) + LAMFP.markMSSAUsed(); + LoopAnalysisManager &LAM = LAMFP.getManager(); + + // A postorder worklist of loops to process. + SmallPriorityWorklist<Loop *, 4> Worklist; + + // Register the worklist and loop analysis manager so that loop passes can + // update them when they mutate the loop nest structure. + LPMUpdater Updater(Worklist, LAM, LoopNestMode); + + // Add the loop nests in the reverse order of LoopInfo. See method + // declaration. + if (!LoopNestMode) { + appendLoopsToWorklist(LI, Worklist); + } else { + for (Loop *L : LI) + Worklist.insert(L); + } + +#ifndef NDEBUG + PI.pushBeforeNonSkippedPassCallback([&LAR, &LI](StringRef PassID, Any IR) { + if (isSpecialPass(PassID, {"PassManager"})) + return; + assert(any_isa<const Loop *>(IR) || any_isa<const LoopNest *>(IR)); + const Loop *L = any_isa<const Loop *>(IR) + ? any_cast<const Loop *>(IR) + : &any_cast<const LoopNest *>(IR)->getOutermostLoop(); + assert(L && "Loop should be valid for printing"); + + // Verify the loop structure and LCSSA form before visiting the loop. + L->verifyLoop(); + assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) && + "Loops must remain in LCSSA form!"); + }); +#endif + + do { + Loop *L = Worklist.pop_back_val(); + assert(!(LoopNestMode && L->getParentLoop()) && + "L should be a top-level loop in loop-nest mode."); + + // Reset the update structure for this loop. + Updater.CurrentL = L; + Updater.SkipCurrentLoop = false; + +#ifndef NDEBUG + // Save a parent loop pointer for asserts. + Updater.ParentL = L->getParentLoop(); +#endif + // Check the PassInstrumentation's BeforePass callbacks before running the + // pass, skip its execution completely if asked to (callback returns + // false). + if (!PI.runBeforePass<Loop>(*Pass, *L)) + continue; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass->name()); + PassPA = Pass->run(*L, LAM, LAR, Updater); + } + + // Do not pass deleted Loop into the instrumentation. + if (Updater.skipCurrentLoop()) + PI.runAfterPassInvalidated<Loop>(*Pass, PassPA); + else + PI.runAfterPass<Loop>(*Pass, *L, PassPA); + + // FIXME: We should verify the set of analyses relevant to Loop passes + // are preserved. + + // If the loop hasn't been deleted, we need to handle invalidation here. + if (!Updater.skipCurrentLoop()) + // We know that the loop pass couldn't have invalidated any other + // loop's analyses (that's the contract of a loop pass), so directly + // handle the loop analysis manager's invalidation here. + LAM.invalidate(*L, PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + } while (!Worklist.empty()); + +#ifndef NDEBUG + PI.popBeforeNonSkippedPassCallback(); +#endif + + // By definition we preserve the proxy. We also preserve all analyses on + // Loops. This precludes *any* invalidation of loop analyses by the proxy, + // but that's OK because we've taken care to invalidate analyses in the + // loop analysis manager incrementally above. + PA.preserveSet<AllAnalysesOn<Loop>>(); + PA.preserve<LoopAnalysisManagerFunctionProxy>(); + // We also preserve the set of standard analyses. + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + PA.preserve<ScalarEvolutionAnalysis>(); + if (UseBlockFrequencyInfo && F.hasProfileData()) + PA.preserve<BlockFrequencyAnalysis>(); + if (UseMemorySSA) + PA.preserve<MemorySSAAnalysis>(); + // FIXME: What we really want to do here is preserve an AA category, but + // that concept doesn't exist yet. + PA.preserve<AAManager>(); + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + PA.preserve<SCEVAA>(); return PA; } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp index 4f97641e20..e46c3d64e6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp @@ -362,7 +362,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, // For the new PM, we also can't use BranchProbabilityInfo as an analysis // pass. Function analyses need to be preserved across loop transformations // but BPI is not preserved, hence a newly built one is needed. - BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr); + BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr); LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI); if (!LP.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -439,8 +439,8 @@ static bool isSafeToTruncateWideIVType(const DataLayout &DL, Type *RangeCheckType) { if (!EnableIVTruncation) return false; - assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedSize() > - DL.getTypeSizeInBits(RangeCheckType).getFixedSize() && + assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedSize() > + DL.getTypeSizeInBits(RangeCheckType).getFixedSize() && "Expected latch check IV type to be larger than range check operand " "type!"); // The start and end values of the IV should be known. This is to guarantee @@ -454,13 +454,13 @@ static bool isSafeToTruncateWideIVType(const DataLayout &DL, // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the // IV wraps around, and the truncation of the IV would lose the range of // iterations between 2^32 and 2^64. - if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred)) + if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred)) return false; // The active bits should be less than the bits in the RangeCheckType. This // guarantees that truncating the latch check to RangeCheckType is a safe // operation. - auto RangeCheckTypeBitSize = - DL.getTypeSizeInBits(RangeCheckType).getFixedSize(); + auto RangeCheckTypeBitSize = + DL.getTypeSizeInBits(RangeCheckType).getFixedSize(); return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize && Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize; } @@ -477,8 +477,8 @@ static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL, if (RangeCheckType == LatchType) return LatchCheck; // For now, bail out if latch type is narrower than range type. - if (DL.getTypeSizeInBits(LatchType).getFixedSize() < - DL.getTypeSizeInBits(RangeCheckType).getFixedSize()) + if (DL.getTypeSizeInBits(LatchType).getFixedSize() < + DL.getTypeSizeInBits(RangeCheckType).getFixedSize()) return None; if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType)) return None; diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp index 65a6205f03..18caeabaca 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -50,7 +50,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopReroll.h" +#include "llvm/Transforms/Scalar/LoopReroll.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -162,12 +162,12 @@ namespace { IL_End }; - class LoopRerollLegacyPass : public LoopPass { + class LoopRerollLegacyPass : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRerollLegacyPass() : LoopPass(ID) { - initializeLoopRerollLegacyPassPass(*PassRegistry::getPassRegistry()); + LoopRerollLegacyPass() : LoopPass(ID) { + initializeLoopRerollLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -176,16 +176,16 @@ namespace { AU.addRequired<TargetLibraryInfoWrapperPass>(); getLoopAnalysisUsage(AU); } - }; - - class LoopReroll { - public: - LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE, - TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA) - : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT), - PreserveLCSSA(PreserveLCSSA) {} - bool runOnLoop(Loop *L); - + }; + + class LoopReroll { + public: + LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE, + TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA) + : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT), + PreserveLCSSA(PreserveLCSSA) {} + bool runOnLoop(Loop *L); + protected: AliasAnalysis *AA; LoopInfo *LI; @@ -494,16 +494,16 @@ namespace { } // end anonymous namespace -char LoopRerollLegacyPass::ID = 0; +char LoopRerollLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", - false, false) +INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", + false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false, - false) +INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false, + false) -Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; } +Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; } // Returns true if the provided instruction is used outside the given loop. // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in @@ -1081,12 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po DenseSet<Instruction*> V; collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V); for (auto *I : V) { - if (I->mayHaveSideEffects()) { - LLVM_DEBUG(dbgs() << "LRR: Aborting - " - << "An instruction which does not belong to any root " - << "sets must not have side effects: " << *I); - return false; - } + if (I->mayHaveSideEffects()) { + LLVM_DEBUG(dbgs() << "LRR: Aborting - " + << "An instruction which does not belong to any root " + << "sets must not have side effects: " << *I); + return false; + } Uses[I].set(IL_All); } @@ -1102,7 +1102,7 @@ LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In, UsesTy::iterator *StartI) { UsesTy::iterator I = StartI ? *StartI : In.begin(); while (I != In.end() && (I->second.test(Val) == 0 || - Exclude.contains(I->first))) + Exclude.contains(I->first))) ++I; return I; } @@ -1660,7 +1660,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, return true; } -bool LoopReroll::runOnLoop(Loop *L) { +bool LoopReroll::runOnLoop(Loop *L) { BasicBlock *Header = L->getHeader(); LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << " (" << L->getNumBlocks() @@ -1709,26 +1709,26 @@ bool LoopReroll::runOnLoop(Loop *L) { return Changed; } - -bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { - if (skipLoop(L)) - return false; - - auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( - *L->getHeader()->getParent()); - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - - return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L); -} - -PreservedAnalyses LoopRerollPass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &U) { - return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L) - ? getLoopPassPreservedAnalyses() - : PreservedAnalyses::all(); -} + +bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( + *L->getHeader()->getParent()); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L); +} + +PreservedAnalyses LoopRerollPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L) + ? getLoopPassPreservedAnalyses() + : PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp index ad1cfc68ec..252668e1d0 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp @@ -12,7 +12,7 @@ #include "llvm/Transforms/Scalar/LoopRotation.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" @@ -34,35 +34,35 @@ static cl::opt<unsigned> DefaultRotationThreshold( "rotation-max-header-size", cl::init(16), cl::Hidden, cl::desc("The default maximum header size for automatic loop rotation")); -static cl::opt<bool> PrepareForLTOOption( - "rotation-prepare-for-lto", cl::init(false), cl::Hidden, - cl::desc("Run loop-rotation in the prepare-for-lto stage. This option " - "should be used for testing only.")); - -LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO) - : EnableHeaderDuplication(EnableHeaderDuplication), - PrepareForLTO(PrepareForLTO) {} +static cl::opt<bool> PrepareForLTOOption( + "rotation-prepare-for-lto", cl::init(false), cl::Hidden, + cl::desc("Run loop-rotation in the prepare-for-lto stage. This option " + "should be used for testing only.")); +LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO) + : EnableHeaderDuplication(EnableHeaderDuplication), + PrepareForLTO(PrepareForLTO) {} + PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { - // Vectorization requires loop-rotation. Use default threshold for loops the - // user explicitly marked for vectorization, even when header duplication is - // disabled. - int Threshold = EnableHeaderDuplication || - hasVectorizeTransformation(&L) == TM_ForcedByUser - ? DefaultRotationThreshold - : 0; + // Vectorization requires loop-rotation. Use default threshold for loops the + // user explicitly marked for vectorization, even when header duplication is + // disabled. + int Threshold = EnableHeaderDuplication || + hasVectorizeTransformation(&L) == TM_ForcedByUser + ? DefaultRotationThreshold + : 0; const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL); Optional<MemorySSAUpdater> MSSAU; if (AR.MSSA) MSSAU = MemorySSAUpdater(AR.MSSA); - bool Changed = - LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false, - Threshold, false, PrepareForLTO || PrepareForLTOOption); + bool Changed = + LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false, + Threshold, false, PrepareForLTO || PrepareForLTOOption); if (!Changed) return PreservedAnalyses::all(); @@ -80,13 +80,13 @@ namespace { class LoopRotateLegacyPass : public LoopPass { unsigned MaxHeaderSize; - bool PrepareForLTO; + bool PrepareForLTO; public: static char ID; // Pass ID, replacement for typeid - LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1, - bool PrepareForLTO = false) - : LoopPass(ID), PrepareForLTO(PrepareForLTO) { + LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1, + bool PrepareForLTO = false) + : LoopPass(ID), PrepareForLTO(PrepareForLTO) { initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry()); if (SpecifiedMaxHeaderSize == -1) MaxHeaderSize = DefaultRotationThreshold; @@ -122,17 +122,17 @@ public: if (MSSAA) MSSAU = MemorySSAUpdater(&MSSAA->getMSSA()); } - // Vectorization requires loop-rotation. Use default threshold for loops the - // user explicitly marked for vectorization, even when header duplication is - // disabled. - int Threshold = hasVectorizeTransformation(L) == TM_ForcedByUser - ? DefaultRotationThreshold - : MaxHeaderSize; - + // Vectorization requires loop-rotation. Use default threshold for loops the + // user explicitly marked for vectorization, even when header duplication is + // disabled. + int Threshold = hasVectorizeTransformation(L) == TM_ForcedByUser + ? DefaultRotationThreshold + : MaxHeaderSize; + return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, - false, Threshold, false, - PrepareForLTO || PrepareForLTOOption); + false, Threshold, false, + PrepareForLTO || PrepareForLTOOption); } }; } // end namespace @@ -147,6 +147,6 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass(int MaxHeaderSize, bool PrepareForLTO) { - return new LoopRotateLegacyPass(MaxHeaderSize, PrepareForLTO); +Pass *llvm::createLoopRotatePass(int MaxHeaderSize, bool PrepareForLTO) { + return new LoopRotateLegacyPass(MaxHeaderSize, PrepareForLTO); } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index cc6d112208..17f99ff2a9 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -365,20 +365,20 @@ private: unsigned DummyIdx = 1; for (BasicBlock *BB : DeadExitBlocks) { - // Eliminate all Phis and LandingPads from dead exits. - // TODO: Consider removing all instructions in this dead block. - SmallVector<Instruction *, 4> DeadInstructions; + // Eliminate all Phis and LandingPads from dead exits. + // TODO: Consider removing all instructions in this dead block. + SmallVector<Instruction *, 4> DeadInstructions; for (auto &PN : BB->phis()) - DeadInstructions.push_back(&PN); - - if (auto *LandingPad = dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) - DeadInstructions.emplace_back(LandingPad); - - for (Instruction *I : DeadInstructions) { - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); + DeadInstructions.push_back(&PN); + + if (auto *LandingPad = dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) + DeadInstructions.emplace_back(LandingPad); + + for (Instruction *I : DeadInstructions) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); } - + assert(DummyIdx != 0 && "Too many dead exits!"); DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB); DTUpdates.push_back({DominatorTree::Insert, Preheader, BB}); @@ -415,9 +415,9 @@ private: assert(FixLCSSALoop && "Should be a loop!"); // We need all DT updates to be done before forming LCSSA. if (MSSAU) - MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); - else - DTU.applyUpdates(DTUpdates); + MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); + else + DTU.applyUpdates(DTUpdates); DTUpdates.clear(); formLCSSARecursively(*FixLCSSALoop, DT, &LI, &SE); } @@ -425,7 +425,7 @@ private: if (MSSAU) { // Clear all updates now. Facilitates deletes that follow. - MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); + MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); DTUpdates.clear(); if (VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -451,7 +451,7 @@ private: if (LI.isLoopHeader(BB)) { assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!"); Loop *DL = LI.getLoopFor(BB); - if (!DL->isOutermost()) { + if (!DL->isOutermost()) { for (auto *PL = DL->getParentLoop(); PL; PL = PL->getParentLoop()) for (auto *BB : DL->getBlocks()) PL->removeBlockFromLoop(BB); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp index 47698fdde6..0296b12878 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp @@ -39,8 +39,8 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" @@ -69,14 +69,14 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking( "max-uses-for-sinking", cl::Hidden, cl::init(30), cl::desc("Do not sink instructions that have too many uses.")); -static cl::opt<bool> EnableMSSAInLoopSink( - "enable-mssa-in-loop-sink", cl::Hidden, cl::init(true), - cl::desc("Enable MemorySSA for LoopSink in new pass manager")); - -static cl::opt<bool> EnableMSSAInLegacyLoopSink( - "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false), - cl::desc("Enable MemorySSA for LoopSink in legacy pass manager")); - +static cl::opt<bool> EnableMSSAInLoopSink( + "enable-mssa-in-loop-sink", cl::Hidden, cl::init(true), + cl::desc("Enable MemorySSA for LoopSink in new pass manager")); + +static cl::opt<bool> EnableMSSAInLegacyLoopSink( + "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false), + cl::desc("Enable MemorySSA for LoopSink in legacy pass manager")); + /// Return adjusted total frequency of \p BBs. /// /// * If there is only one BB, sinking instruction will not introduce code @@ -182,10 +182,10 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs, // sinking is successful. // \p LoopBlockNumber is used to sort the insertion blocks to ensure // determinism. -static bool sinkInstruction( - Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, - const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI, - DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) { +static bool sinkInstruction( + Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, + const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI, + DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) { // Compute the set of blocks in loop L which contain a use of I. SmallPtrSet<BasicBlock *, 2> BBs; for (auto &U : I.uses()) { @@ -222,7 +222,7 @@ static bool sinkInstruction( // of the loop block numbers as iterating the set doesn't give a useful // order. No need to stable sort as the block numbers are a total ordering. SmallVector<BasicBlock *, 2> SortedBBsToSinkInto; - llvm::append_range(SortedBBsToSinkInto, BBsToSinkInto); + llvm::append_range(SortedBBsToSinkInto, BBsToSinkInto); llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) { return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second; }); @@ -238,21 +238,21 @@ static bool sinkInstruction( Instruction *IC = I.clone(); IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); - - if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { - // Create a new MemoryAccess and let MemorySSA set its defining access. - MemoryAccess *NewMemAcc = - MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning); - if (NewMemAcc) { - if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc)) - MSSAU->insertDef(MemDef, /*RenameUses=*/true); - else { - auto *MemUse = cast<MemoryUse>(NewMemAcc); - MSSAU->insertUse(MemUse, /*RenameUses=*/true); - } - } - } - + + if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + // Create a new MemoryAccess and let MemorySSA set its defining access. + MemoryAccess *NewMemAcc = + MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning); + if (NewMemAcc) { + if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc)) + MSSAU->insertDef(MemDef, /*RenameUses=*/true); + else { + auto *MemUse = cast<MemoryUse>(NewMemAcc); + MSSAU->insertUse(MemUse, /*RenameUses=*/true); + } + } + } + // Replaces uses of I with IC in N I.replaceUsesWithIf(IC, [N](Use &U) { return cast<Instruction>(U.getUser())->getParent() == N; @@ -267,11 +267,11 @@ static bool sinkInstruction( NumLoopSunk++; I.moveBefore(&*MoveBB->getFirstInsertionPt()); - if (MSSAU) - if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>( - MSSAU->getMemorySSA()->getMemoryAccess(&I))) - MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning); - + if (MSSAU) + if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>( + MSSAU->getMemorySSA()->getMemoryAccess(&I))) + MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning); + return true; } @@ -280,14 +280,14 @@ static bool sinkInstruction( static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI, - ScalarEvolution *SE, - AliasSetTracker *CurAST, - MemorySSA *MSSA) { + ScalarEvolution *SE, + AliasSetTracker *CurAST, + MemorySSA *MSSA) { BasicBlock *Preheader = L.getLoopPreheader(); - assert(Preheader && "Expected loop to have preheader"); + assert(Preheader && "Expected loop to have preheader"); - assert(Preheader->getParent()->hasProfileData() && - "Unexpected call when profile data unavailable."); + assert(Preheader->getParent()->hasProfileData() && + "Unexpected call when profile data unavailable."); const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader); // If there are no basic blocks with lower frequency than the preheader then @@ -298,14 +298,14 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, })) return false; - std::unique_ptr<MemorySSAUpdater> MSSAU; - std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags; - if (MSSA) { - MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); - LICMFlags = - std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA); - } - + std::unique_ptr<MemorySSAUpdater> MSSAU; + std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags; + if (MSSA) { + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + LICMFlags = + std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA); + } + bool Changed = false; // Sort loop's basic blocks by frequency @@ -329,11 +329,11 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false, - LICMFlags.get())) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false, + LICMFlags.get())) continue; - if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, - MSSAU.get())) + if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, + MSSAU.get())) Changed = true; } @@ -342,13 +342,13 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, return Changed; } -static void computeAliasSet(Loop &L, BasicBlock &Preheader, - AliasSetTracker &CurAST) { - for (BasicBlock *BB : L.blocks()) - CurAST.add(*BB); - CurAST.add(Preheader); -} - +static void computeAliasSet(Loop &L, BasicBlock &Preheader, + AliasSetTracker &CurAST) { + for (BasicBlock *BB : L.blocks()) + CurAST.add(*BB); + CurAST.add(Preheader); +} + PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { LoopInfo &LI = FAM.getResult<LoopAnalysis>(F); // Nothing to do if there are no loops. @@ -359,10 +359,10 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); - MemorySSA *MSSA = EnableMSSAInLoopSink - ? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA() - : nullptr; - + MemorySSA *MSSA = EnableMSSAInLoopSink + ? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA() + : nullptr; + // We want to do a postorder walk over the loops. Since loops are a tree this // is equivalent to a reversed preorder walk and preorder is easy to compute // without recursion. Since we reverse the preorder, we will visit siblings @@ -374,27 +374,27 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { do { Loop &L = *PreorderLoops.pop_back_val(); - BasicBlock *Preheader = L.getLoopPreheader(); - if (!Preheader) - continue; - - // Enable LoopSink only when runtime profile is available. - // With static profile, the sinking decision may be sub-optimal. - if (!Preheader->getParent()->hasProfileData()) - continue; - - std::unique_ptr<AliasSetTracker> CurAST; - if (!EnableMSSAInLoopSink) { - CurAST = std::make_unique<AliasSetTracker>(AA); - computeAliasSet(L, *Preheader, *CurAST.get()); - } - + BasicBlock *Preheader = L.getLoopPreheader(); + if (!Preheader) + continue; + + // Enable LoopSink only when runtime profile is available. + // With static profile, the sinking decision may be sub-optimal. + if (!Preheader->getParent()->hasProfileData()) + continue; + + std::unique_ptr<AliasSetTracker> CurAST; + if (!EnableMSSAInLoopSink) { + CurAST = std::make_unique<AliasSetTracker>(AA); + computeAliasSet(L, *Preheader, *CurAST.get()); + } + // Note that we don't pass SCEV here because it is only used to invalidate // loops in SCEV and we don't preserve (or request) SCEV at all making that // unnecessary. Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, - /*ScalarEvolution*/ nullptr, - CurAST.get(), MSSA); + /*ScalarEvolution*/ nullptr, + CurAST.get(), MSSA); } while (!PreorderLoops.empty()); if (!Changed) @@ -402,14 +402,14 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); - - if (MSSA) { - PA.preserve<MemorySSAAnalysis>(); - - if (VerifyMemorySSA) - MSSA->verifyMemorySSA(); - } - + + if (MSSA) { + PA.preserve<MemorySSAAnalysis>(); + + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); + } + return PA; } @@ -424,46 +424,46 @@ struct LegacyLoopSinkPass : public LoopPass { if (skipLoop(L)) return false; - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) - return false; - - // Enable LoopSink only when runtime profile is available. - // With static profile, the sinking decision may be sub-optimal. - if (!Preheader->getParent()->hasProfileData()) - return false; - - AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + + // Enable LoopSink only when runtime profile is available. + // With static profile, the sinking decision may be sub-optimal. + if (!Preheader->getParent()->hasProfileData()) + return false; + + AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); - std::unique_ptr<AliasSetTracker> CurAST; - MemorySSA *MSSA = nullptr; - if (EnableMSSAInLegacyLoopSink) - MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); - else { - CurAST = std::make_unique<AliasSetTracker>(AA); - computeAliasSet(*L, *Preheader, *CurAST.get()); - } - - bool Changed = sinkLoopInvariantInstructions( - *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), + std::unique_ptr<AliasSetTracker> CurAST; + MemorySSA *MSSA = nullptr; + if (EnableMSSAInLegacyLoopSink) + MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); + else { + CurAST = std::make_unique<AliasSetTracker>(AA); + computeAliasSet(*L, *Preheader, *CurAST.get()); + } + + bool Changed = sinkLoopInvariantInstructions( + *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), getAnalysis<DominatorTreeWrapperPass>().getDomTree(), getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(), - SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA); - - if (MSSA && VerifyMemorySSA) - MSSA->verifyMemorySSA(); - - return Changed; + SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA); + + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + + return Changed; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<BlockFrequencyInfoWrapperPass>(); getLoopAnalysisUsage(AU); - if (EnableMSSAInLegacyLoopSink) { - AU.addRequired<MemorySSAWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); - } + if (EnableMSSAInLegacyLoopSink) { + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); + } } }; } @@ -473,7 +473,7 @@ INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false) Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5dec9b5420..2b2f30340a 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -75,13 +75,13 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" @@ -424,7 +424,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L, // Handle a multiplication by -1 (negation) if it didn't fold. if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) if (Mul->getOperand(0)->isAllOnesValue()) { - SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands())); + SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands())); const SCEV *NewMul = SE.getMulExpr(Ops); SmallVector<const SCEV *, 4> MyGood; @@ -485,10 +485,10 @@ bool Formula::isCanonical(const Loop &L) const { // If ScaledReg is not a recurrent expr, or it is but its loop is not current // loop, meanwhile BaseRegs contains a recurrent expr reg related with current // loop, we want to swap the reg in BaseRegs with ScaledReg. - auto I = find_if(BaseRegs, [&](const SCEV *S) { - return isa<const SCEVAddRecExpr>(S) && - (cast<SCEVAddRecExpr>(S)->getLoop() == &L); - }); + auto I = find_if(BaseRegs, [&](const SCEV *S) { + return isa<const SCEVAddRecExpr>(S) && + (cast<SCEVAddRecExpr>(S)->getLoop() == &L); + }); return I == BaseRegs.end(); } @@ -507,7 +507,7 @@ void Formula::canonicalize(const Loop &L) { // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. if (!ScaledReg) { - ScaledReg = BaseRegs.pop_back_val(); + ScaledReg = BaseRegs.pop_back_val(); Scale = 1; } @@ -516,10 +516,10 @@ void Formula::canonicalize(const Loop &L) { // reg with ScaledReg. const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg); if (!SAR || SAR->getLoop() != &L) { - auto I = find_if(BaseRegs, [&](const SCEV *S) { - return isa<const SCEVAddRecExpr>(S) && - (cast<SCEVAddRecExpr>(S)->getLoop() == &L); - }); + auto I = find_if(BaseRegs, [&](const SCEV *S) { + return isa<const SCEVAddRecExpr>(S) && + (cast<SCEVAddRecExpr>(S)->getLoop() == &L); + }); if (I != BaseRegs.end()) std::swap(ScaledReg, *I); } @@ -752,13 +752,13 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { return C->getValue()->getSExtValue(); } } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - SmallVector<const SCEV *, 8> NewOps(Add->operands()); + SmallVector<const SCEV *, 8> NewOps(Add->operands()); int64_t Result = ExtractImmediate(NewOps.front(), SE); if (Result != 0) S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - SmallVector<const SCEV *, 8> NewOps(AR->operands()); + SmallVector<const SCEV *, 8> NewOps(AR->operands()); int64_t Result = ExtractImmediate(NewOps.front(), SE); if (Result != 0) S = SE.getAddRecExpr(NewOps, AR->getLoop(), @@ -778,13 +778,13 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { return GV; } } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - SmallVector<const SCEV *, 8> NewOps(Add->operands()); + SmallVector<const SCEV *, 8> NewOps(Add->operands()); GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); if (Result) S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - SmallVector<const SCEV *, 8> NewOps(AR->operands()); + SmallVector<const SCEV *, 8> NewOps(AR->operands()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); if (Result) S = SE.getAddRecExpr(NewOps, AR->getLoop(), @@ -934,8 +934,8 @@ static bool isHighCostExpansion(const SCEV *S, case scSignExtend: return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(), Processed, SE); - default: - break; + default: + break; } if (!Processed.insert(S).second) @@ -1211,7 +1211,7 @@ static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { return 0; if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg)) return getSetupCost(S->getStart(), Depth - 1); - if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg)) + if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg)) return getSetupCost(S->getOperand(), Depth - 1); if (auto S = dyn_cast<SCEVNAryExpr>(Reg)) return std::accumulate(S->op_begin(), S->op_end(), 0, @@ -2787,7 +2787,7 @@ static const SCEV *getExprBase(const SCEV *S) { case scAddRecExpr: return getExprBase(cast<SCEVAddRecExpr>(S)->getStart()); } - llvm_unreachable("Unknown SCEV kind!"); + llvm_unreachable("Unknown SCEV kind!"); } /// Return true if the chain increment is profitable to expand into a loop @@ -3402,7 +3402,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) Worklist.append(N->op_begin(), N->op_end()); - else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S)) + else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S)) Worklist.push_back(C->getOperand()); else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { Worklist.push_back(D->getLHS()); @@ -3835,14 +3835,14 @@ void LSRInstance::GenerateConstantOffsetsImpl( F.BaseOffset = (uint64_t)F.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) return; - if (IsScaledReg) { + if (IsScaledReg) { F.ScaledReg = G; - } else { + } else { F.BaseRegs[Idx] = G; - // We may generate non canonical Formula if G is a recurrent expr reg - // related with current loop while F.ScaledReg is not. - F.canonicalize(*L); - } + // We may generate non canonical Formula if G is a recurrent expr reg + // related with current loop while F.ScaledReg is not. + F.canonicalize(*L); + } (void)InsertFormula(LU, LUIdx, F); } @@ -5383,11 +5383,11 @@ void LSRInstance::RewriteForPHI( // Split the critical edge. BasicBlock *NewBB = nullptr; if (!Parent->isLandingPad()) { - NewBB = - SplitCriticalEdge(BB, Parent, - CriticalEdgeSplittingOptions(&DT, &LI, MSSAU) - .setMergeIdenticalEdges() - .setKeepOneInputPHIs()); + NewBB = + SplitCriticalEdge(BB, Parent, + CriticalEdgeSplittingOptions(&DT, &LI, MSSAU) + .setMergeIdenticalEdges() + .setKeepOneInputPHIs()); } else { SmallVector<BasicBlock*, 2> NewBBs; SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI); @@ -5520,8 +5520,8 @@ void LSRInstance::ImplementSolution( // we can remove them after we are done working. SmallVector<WeakTrackingVH, 16> DeadInsts; - SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", - false); + SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", + false); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif @@ -5620,19 +5620,19 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, if (IU.empty()) return; // Skip nested loops until we can model them better with formulae. - if (!L->isInnermost()) { + if (!L->isInnermost()) { LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); return; } // Start collecting data and preparing for the solver. - // If number of registers is not the major cost, we cannot benefit from the - // current profitable chain optimization which is based on number of - // registers. - // FIXME: add profitable chain optimization for other kinds major cost, for - // example number of instructions. - if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain) - CollectChains(); + // If number of registers is not the major cost, we cannot benefit from the + // current profitable chain optimization which is based on number of + // registers. + // FIXME: add profitable chain optimization for other kinds major cost, for + // example number of instructions. + if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain) + CollectChains(); CollectInterestingTypesAndFactors(); CollectFixupsAndInitialFormulae(); CollectLoopInvariantFixupsAndFormulae(); @@ -5772,63 +5772,63 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MemorySSAWrapperPass>(); } -using EqualValues = SmallVector<std::tuple<WeakVH, int64_t, DIExpression *>, 4>; -using EqualValuesMap = DenseMap<DbgValueInst *, EqualValues>; - -static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE, - EqualValuesMap &DbgValueToEqualSet) { - for (auto &B : L->getBlocks()) { - for (auto &I : *B) { - auto DVI = dyn_cast<DbgValueInst>(&I); - if (!DVI) - continue; - auto V = DVI->getVariableLocation(); - if (!V || !SE.isSCEVable(V->getType())) - continue; - auto DbgValueSCEV = SE.getSCEV(V); - EqualValues EqSet; - for (PHINode &Phi : L->getHeader()->phis()) { - if (V->getType() != Phi.getType()) - continue; - if (!SE.isSCEVable(Phi.getType())) - continue; - auto PhiSCEV = SE.getSCEV(&Phi); - Optional<APInt> Offset = - SE.computeConstantDifference(DbgValueSCEV, PhiSCEV); - if (Offset && Offset->getMinSignedBits() <= 64) - EqSet.emplace_back(std::make_tuple( - &Phi, Offset.getValue().getSExtValue(), DVI->getExpression())); - } - DbgValueToEqualSet[DVI] = std::move(EqSet); - } - } -} - -static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet) { - for (auto A : DbgValueToEqualSet) { - auto DVI = A.first; - // Only update those that are now undef. - if (!isa_and_nonnull<UndefValue>(DVI->getVariableLocation())) - continue; - for (auto EV : A.second) { - auto V = std::get<WeakVH>(EV); - if (!V) - continue; - auto DbgDIExpr = std::get<DIExpression *>(EV); - auto Offset = std::get<int64_t>(EV); - auto &Ctx = DVI->getContext(); - DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V))); - if (Offset) { - SmallVector<uint64_t, 8> Ops; - DIExpression::appendOffset(Ops, Offset); - DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true); - } - DVI->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr)); - break; - } - } -} - +using EqualValues = SmallVector<std::tuple<WeakVH, int64_t, DIExpression *>, 4>; +using EqualValuesMap = DenseMap<DbgValueInst *, EqualValues>; + +static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE, + EqualValuesMap &DbgValueToEqualSet) { + for (auto &B : L->getBlocks()) { + for (auto &I : *B) { + auto DVI = dyn_cast<DbgValueInst>(&I); + if (!DVI) + continue; + auto V = DVI->getVariableLocation(); + if (!V || !SE.isSCEVable(V->getType())) + continue; + auto DbgValueSCEV = SE.getSCEV(V); + EqualValues EqSet; + for (PHINode &Phi : L->getHeader()->phis()) { + if (V->getType() != Phi.getType()) + continue; + if (!SE.isSCEVable(Phi.getType())) + continue; + auto PhiSCEV = SE.getSCEV(&Phi); + Optional<APInt> Offset = + SE.computeConstantDifference(DbgValueSCEV, PhiSCEV); + if (Offset && Offset->getMinSignedBits() <= 64) + EqSet.emplace_back(std::make_tuple( + &Phi, Offset.getValue().getSExtValue(), DVI->getExpression())); + } + DbgValueToEqualSet[DVI] = std::move(EqSet); + } + } +} + +static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet) { + for (auto A : DbgValueToEqualSet) { + auto DVI = A.first; + // Only update those that are now undef. + if (!isa_and_nonnull<UndefValue>(DVI->getVariableLocation())) + continue; + for (auto EV : A.second) { + auto V = std::get<WeakVH>(EV); + if (!V) + continue; + auto DbgDIExpr = std::get<DIExpression *>(EV); + auto Offset = std::get<int64_t>(EV); + auto &Ctx = DVI->getContext(); + DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V))); + if (Offset) { + SmallVector<uint64_t, 8> Ops; + DIExpression::appendOffset(Ops, Offset); + DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true); + } + DVI->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr)); + break; + } + } +} + static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, @@ -5844,17 +5844,17 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged(); - // Debug preservation - before we start removing anything create equivalence - // sets for the llvm.dbg.value intrinsics. - EqualValuesMap DbgValueToEqualSet; - DbgGatherEqualValues(L, SE, DbgValueToEqualSet); - + // Debug preservation - before we start removing anything create equivalence + // sets for the llvm.dbg.value intrinsics. + EqualValuesMap DbgValueToEqualSet; + DbgGatherEqualValues(L, SE, DbgValueToEqualSet); + // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); if (EnablePhiElim && L->isLoopSimplifyForm()) { SmallVector<WeakTrackingVH, 16> DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - SCEVExpander Rewriter(SE, DL, "lsr", false); + SCEVExpander Rewriter(SE, DL, "lsr", false); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif @@ -5866,9 +5866,9 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); } } - - DbgApplyEqualValues(DbgValueToEqualSet); - + + DbgApplyEqualValues(DbgValueToEqualSet); + return Changed; } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 495906e1a7..d65e9dd059 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -41,7 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/LoopPeel.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" @@ -288,13 +288,13 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, None, None, None, None, None); TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(L, SE, TTI, None, None); - - TransformationMode EnableMode = hasUnrollAndJamTransformation(L); - if (EnableMode & TM_Disable) - return LoopUnrollResult::Unmodified; - if (EnableMode & TM_ForcedByUser) - UP.UnrollAndJam = true; - + + TransformationMode EnableMode = hasUnrollAndJamTransformation(L); + if (EnableMode & TM_Disable) + return LoopUnrollResult::Unmodified; + if (EnableMode & TM_ForcedByUser) + UP.UnrollAndJam = true; + if (AllowUnrollAndJam.getNumOccurrences() > 0) UP.UnrollAndJam = AllowUnrollAndJam; if (UnrollAndJamThreshold.getNumOccurrences() > 0) diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp index 1b974576a3..de36dce3a0 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -56,7 +56,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/LoopPeel.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -76,19 +76,19 @@ using namespace llvm; cl::opt<bool> llvm::ForgetSCEVInLoopUnroll( "forget-scev-loop-unroll", cl::init(false), cl::Hidden, cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just" - " the current top-most loop. This is sometimes preferred to reduce" + " the current top-most loop. This is sometimes preferred to reduce" " compile time.")); static cl::opt<unsigned> UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling")); -static cl::opt<unsigned> - UnrollOptSizeThreshold( - "unroll-optsize-threshold", cl::init(0), cl::Hidden, - cl::desc("The cost threshold for loop unrolling when optimizing for " - "size")); - +static cl::opt<unsigned> + UnrollOptSizeThreshold( + "unroll-optsize-threshold", cl::init(0), cl::Hidden, + cl::desc("The cost threshold for loop unrolling when optimizing for " + "size")); + static cl::opt<unsigned> UnrollPartialThreshold( "unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling")); @@ -194,9 +194,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.Threshold = OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault; UP.MaxPercentThresholdBoost = 400; - UP.OptSizeThreshold = UnrollOptSizeThreshold; + UP.OptSizeThreshold = UnrollOptSizeThreshold; UP.PartialThreshold = 150; - UP.PartialOptSizeThreshold = UnrollOptSizeThreshold; + UP.PartialOptSizeThreshold = UnrollOptSizeThreshold; UP.Count = 0; UP.DefaultUnrollRuntimeCount = 8; UP.MaxCount = std::numeric_limits<unsigned>::max(); @@ -218,10 +218,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( // Apply size attributes bool OptForSize = L->getHeader()->getParent()->hasOptSize() || - // Let unroll hints / pragmas take precedence over PGSO. - (hasUnrollTransformation(L) != TM_ForcedByUser && - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, - PGSOQueryType::IRPass)); + // Let unroll hints / pragmas take precedence over PGSO. + (hasUnrollTransformation(L) != TM_ForcedByUser && + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, + PGSOQueryType::IRPass)); if (OptForSize) { UP.Threshold = UP.OptSizeThreshold; UP.PartialThreshold = UP.PartialOptSizeThreshold; @@ -347,7 +347,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( // Only analyze inner loops. We can't properly estimate cost of nested loops // and we won't visit inner loops again anyway. - if (!L->isInnermost()) + if (!L->isInnermost()) return None; // Don't simulate loops with a big or unknown tripcount @@ -389,10 +389,10 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( assert(CostWorklist.empty() && "Must start with an empty cost list"); assert(PHIUsedList.empty() && "Must start with an empty phi used list"); CostWorklist.push_back(&RootI); - TargetTransformInfo::TargetCostKind CostKind = - RootI.getFunction()->hasMinSize() ? - TargetTransformInfo::TCK_CodeSize : - TargetTransformInfo::TCK_SizeAndLatency; + TargetTransformInfo::TargetCostKind CostKind = + RootI.getFunction()->hasMinSize() ? + TargetTransformInfo::TCK_CodeSize : + TargetTransformInfo::TCK_SizeAndLatency; for (;; --Iteration) { do { Instruction *I = CostWorklist.pop_back_val(); @@ -433,7 +433,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( // First accumulate the cost of this instruction. if (!Cost.IsFree) { - UnrolledCost += TTI.getUserCost(I, CostKind); + UnrolledCost += TTI.getUserCost(I, CostKind); LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); LLVM_DEBUG(I->dump()); @@ -473,9 +473,9 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); - TargetTransformInfo::TargetCostKind CostKind = - L->getHeader()->getParent()->hasMinSize() ? - TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; + TargetTransformInfo::TargetCostKind CostKind = + L->getHeader()->getParent()->hasMinSize() ? + TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, @@ -529,7 +529,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( // Track this instruction's expected baseline cost when executing the // rolled loop form. - RolledDynamicCost += TTI.getUserCost(&I, CostKind); + RolledDynamicCost += TTI.getUserCost(&I, CostKind); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after @@ -851,7 +851,7 @@ bool llvm::computeUnrollCount( } // 4th priority is loop peeling. - computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold); + computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold); if (PP.PeelCount) { UP.Runtime = false; UP.Count = 1; @@ -1043,7 +1043,7 @@ static LoopUnrollResult tryToUnrollLoop( return LoopUnrollResult::Unmodified; } - // When automatic unrolling is disabled, do not unroll unless overridden for + // When automatic unrolling is disabled, do not unroll unless overridden for // this loop. if (OnlyWhenForced && !(TM & TM_Enable)) return LoopUnrollResult::Unmodified; @@ -1057,7 +1057,7 @@ static LoopUnrollResult tryToUnrollLoop( ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedFullUnrollMaxCount); TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences( - L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true); + L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. @@ -1105,7 +1105,7 @@ static LoopUnrollResult tryToUnrollLoop( // If the loop contains a convergent operation, the prelude we'd add // to do the first few instructions before we hit the unrolled loop // is unsafe -- it adds a control-flow dependency to the convergent - // operation. Therefore restrict remainder loop (try unrolling without). + // operation. Therefore restrict remainder loop (try unrolling without). // // TODO: This is quite conservative. In practice, convergent_op() // is likely to be called unconditionally in the loop. In this @@ -1301,7 +1301,7 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced, Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV) { return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1, - 0, 0, 0, 1); + 0, 0, 0, 1); } PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, @@ -1329,7 +1329,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, OnlyWhenForced, ForgetSCEV, /*Count*/ None, /*Threshold*/ None, /*AllowPartial*/ false, /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ true, + /*AllowPeeling*/ true, /*AllowProfileBasedPeeling*/ false, /*FullUnrollMaxCount*/ None) != LoopUnrollResult::Unmodified; @@ -1371,7 +1371,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, } // Otherwise erase the loop from the list if it was in the old loops. - return OldLoops.contains(SibLoop); + return OldLoops.contains(SibLoop); }); Updater.addSiblingLoops(SibLoops); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp index 822a786fc7..843be6cbb9 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -32,7 +32,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LazyBlockFrequencyInfo.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -99,12 +99,12 @@ static cl::opt<unsigned> Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden); -static cl::opt<unsigned> - MSSAThreshold("loop-unswitch-memoryssa-threshold", - cl::desc("Max number of memory uses to explore during " - "partial unswitching analysis"), - cl::init(100), cl::Hidden); - +static cl::opt<unsigned> + MSSAThreshold("loop-unswitch-memoryssa-threshold", + cl::desc("Max number of memory uses to explore during " + "partial unswitching analysis"), + cl::init(100), cl::Hidden); + namespace { class LUAnalysisCache { @@ -191,7 +191,7 @@ namespace { Loop *CurrentLoop = nullptr; DominatorTree *DT = nullptr; MemorySSA *MSSA = nullptr; - AAResults *AA = nullptr; + AAResults *AA = nullptr; std::unique_ptr<MemorySSAUpdater> MSSAU; BasicBlock *LoopHeader = nullptr; BasicBlock *LoopPreheader = nullptr; @@ -225,10 +225,10 @@ namespace { /// loop preheaders be inserted into the CFG. /// void getAnalysisUsage(AnalysisUsage &AU) const override { - // Lazy BFI and BPI are marked as preserved here so Loop Unswitching - // can remain part of the same loop pass as LICM - AU.addPreserved<LazyBlockFrequencyInfoPass>(); - AU.addPreserved<LazyBranchProbabilityInfoPass>(); + // Lazy BFI and BPI are marked as preserved here so Loop Unswitching + // can remain part of the same loop pass as LICM + AU.addPreserved<LazyBlockFrequencyInfoPass>(); + AU.addPreserved<LazyBranchProbabilityInfoPass>(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetTransformInfoWrapperPass>(); if (EnableMSSALoopDependency) { @@ -256,22 +256,22 @@ namespace { bool tryTrivialLoopUnswitch(bool &Changed); bool unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI = nullptr, - ArrayRef<Instruction *> ToDuplicate = {}); + Instruction *TI = nullptr, + ArrayRef<Instruction *> ToDuplicate = {}); void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock, Instruction *TI); void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, - Instruction *TI, - ArrayRef<Instruction *> ToDuplicate = {}); + Instruction *TI, + ArrayRef<Instruction *> ToDuplicate = {}); void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool IsEqual); - void - emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, BasicBlock *FalseDest, - BranchInst *OldBranch, Instruction *TI, - ArrayRef<Instruction *> ToDuplicate = {}); + void + emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef<Instruction *> ToDuplicate = {}); void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L); @@ -538,7 +538,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); LPM = &LPMRef; DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); if (EnableMSSALoopDependency) { MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); @@ -640,145 +640,145 @@ static bool equalityPropUnSafe(Value &LoopCond) { return false; } -/// Check if the loop header has a conditional branch that is not -/// loop-invariant, because it involves load instructions. If all paths from -/// either the true or false successor to the header or loop exists do not -/// modify the memory feeding the condition, perform 'partial unswitching'. That -/// is, duplicate the instructions feeding the condition in the pre-header. Then -/// unswitch on the duplicated condition. The condition is now known in the -/// unswitched version for the 'invariant' path through the original loop. -/// -/// If the branch condition of the header is partially invariant, return a pair -/// containing the instructions to duplicate and a boolean Constant to update -/// the condition in the loops created for the true or false successors. -static std::pair<SmallVector<Instruction *, 4>, Constant *> -hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) { - SmallVector<Instruction *, 4> ToDuplicate; - - auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator()); - if (!TI || !TI->isConditional()) - return {}; - - auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); - // The case with the condition outside the loop should already be handled - // earlier. - if (!CondI || !L->contains(CondI)) - return {}; - - ToDuplicate.push_back(CondI); - - SmallVector<Value *, 4> WorkList; - WorkList.append(CondI->op_begin(), CondI->op_end()); - - SmallVector<MemoryAccess *, 4> AccessesToCheck; - SmallVector<MemoryLocation, 4> AccessedLocs; - while (!WorkList.empty()) { - Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); - if (!I || !L->contains(I)) - continue; - - // TODO: support additional instructions. - if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) - return {}; - - // Do not duplicate volatile and atomic loads. - if (auto *LI = dyn_cast<LoadInst>(I)) - if (LI->isVolatile() || LI->isAtomic()) - return {}; - - ToDuplicate.push_back(I); - if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { - if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { - // Queue the defining access to check for alias checks. - AccessesToCheck.push_back(MemUse->getDefiningAccess()); - AccessedLocs.push_back(MemoryLocation::get(I)); - } else { - // MemoryDefs may clobber the location or may be atomic memory - // operations. Bail out. - return {}; - } - } - WorkList.append(I->op_begin(), I->op_end()); - } - - if (ToDuplicate.size() <= 1) - return {}; - - auto HasNoClobbersOnPath = - [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header, - SmallVector<MemoryAccess *, 4> AccessesToCheck) { - // First, collect all blocks in the loop that are on a patch from Succ - // to the header. - SmallVector<BasicBlock *, 4> WorkList; - WorkList.push_back(Succ); - WorkList.push_back(Header); - SmallPtrSet<BasicBlock *, 4> Seen; - Seen.insert(Header); - while (!WorkList.empty()) { - BasicBlock *Current = WorkList.pop_back_val(); - if (!L->contains(Current)) - continue; - const auto &SeenIns = Seen.insert(Current); - if (!SeenIns.second) - continue; - - WorkList.append(succ_begin(Current), succ_end(Current)); - } - - // Require at least 2 blocks on a path through the loop. This skips - // paths that directly exit the loop. - if (Seen.size() < 2) - return false; - - // Next, check if there are any MemoryDefs that are on the path through - // the loop (in the Seen set) and they may-alias any of the locations in - // AccessedLocs. If that is the case, they may modify the condition and - // partial unswitching is not possible. - SmallPtrSet<MemoryAccess *, 4> SeenAccesses; - while (!AccessesToCheck.empty()) { - MemoryAccess *Current = AccessesToCheck.pop_back_val(); - auto SeenI = SeenAccesses.insert(Current); - if (!SeenI.second || !Seen.contains(Current->getBlock())) - continue; - - // Bail out if exceeded the threshold. - if (SeenAccesses.size() >= MSSAThreshold) - return false; - - // MemoryUse are read-only accesses. - if (isa<MemoryUse>(Current)) - continue; - - // For a MemoryDef, check if is aliases any of the location feeding - // the original condition. - if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { - if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { - return isModSet( - AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); - })) - return false; - } - - for (Use &U : Current->uses()) - AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); - } - - return true; - }; - - // If we branch to the same successor, partial unswitching will not be - // beneficial. - if (TI->getSuccessor(0) == TI->getSuccessor(1)) - return {}; - - if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck)) - return {ToDuplicate, ConstantInt::getTrue(TI->getContext())}; - if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck)) - return {ToDuplicate, ConstantInt::getFalse(TI->getContext())}; - - return {}; -} - +/// Check if the loop header has a conditional branch that is not +/// loop-invariant, because it involves load instructions. If all paths from +/// either the true or false successor to the header or loop exists do not +/// modify the memory feeding the condition, perform 'partial unswitching'. That +/// is, duplicate the instructions feeding the condition in the pre-header. Then +/// unswitch on the duplicated condition. The condition is now known in the +/// unswitched version for the 'invariant' path through the original loop. +/// +/// If the branch condition of the header is partially invariant, return a pair +/// containing the instructions to duplicate and a boolean Constant to update +/// the condition in the loops created for the true or false successors. +static std::pair<SmallVector<Instruction *, 4>, Constant *> +hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) { + SmallVector<Instruction *, 4> ToDuplicate; + + auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); + // The case with the condition outside the loop should already be handled + // earlier. + if (!CondI || !L->contains(CondI)) + return {}; + + ToDuplicate.push_back(CondI); + + SmallVector<Value *, 4> WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + SmallVector<MemoryAccess *, 4> AccessesToCheck; + SmallVector<MemoryLocation, 4> AccessedLocs; + while (!WorkList.empty()) { + Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); + if (!I || !L->contains(I)) + continue; + + // TODO: support additional instructions. + if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) + return {}; + + // Do not duplicate volatile and atomic loads. + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->isVolatile() || LI->isAtomic()) + return {}; + + ToDuplicate.push_back(I); + if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { + if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { + // Queue the defining access to check for alias checks. + AccessesToCheck.push_back(MemUse->getDefiningAccess()); + AccessedLocs.push_back(MemoryLocation::get(I)); + } else { + // MemoryDefs may clobber the location or may be atomic memory + // operations. Bail out. + return {}; + } + } + WorkList.append(I->op_begin(), I->op_end()); + } + + if (ToDuplicate.size() <= 1) + return {}; + + auto HasNoClobbersOnPath = + [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header, + SmallVector<MemoryAccess *, 4> AccessesToCheck) { + // First, collect all blocks in the loop that are on a patch from Succ + // to the header. + SmallVector<BasicBlock *, 4> WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet<BasicBlock *, 4> Seen; + Seen.insert(Header); + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.pop_back_val(); + if (!L->contains(Current)) + continue; + const auto &SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + WorkList.append(succ_begin(Current), succ_end(Current)); + } + + // Require at least 2 blocks on a path through the loop. This skips + // paths that directly exit the loop. + if (Seen.size() < 2) + return false; + + // Next, check if there are any MemoryDefs that are on the path through + // the loop (in the Seen set) and they may-alias any of the locations in + // AccessedLocs. If that is the case, they may modify the condition and + // partial unswitching is not possible. + SmallPtrSet<MemoryAccess *, 4> SeenAccesses; + while (!AccessesToCheck.empty()) { + MemoryAccess *Current = AccessesToCheck.pop_back_val(); + auto SeenI = SeenAccesses.insert(Current); + if (!SeenI.second || !Seen.contains(Current->getBlock())) + continue; + + // Bail out if exceeded the threshold. + if (SeenAccesses.size() >= MSSAThreshold) + return false; + + // MemoryUse are read-only accesses. + if (isa<MemoryUse>(Current)) + continue; + + // For a MemoryDef, check if is aliases any of the location feeding + // the original condition. + if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { + if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { + return isModSet( + AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); + })) + return false; + } + + for (Use &U : Current->uses()) + AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); + } + + return true; + }; + + // If we branch to the same successor, partial unswitching will not be + // beneficial. + if (TI->getSuccessor(0) == TI->getSuccessor(1)) + return {}; + + if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck)) + return {ToDuplicate, ConstantInt::getTrue(TI->getContext())}; + if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck)) + return {ToDuplicate, ConstantInt::getFalse(TI->getContext())}; + + return {}; +} + /// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -816,7 +816,7 @@ bool LoopUnswitch::processCurrentLoop() { // FIXME: Use Function::hasOptSize(). if (OptimizeForSize || LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) - return Changed; + return Changed; // Run through the instructions in the loop, keeping track of three things: // @@ -840,10 +840,10 @@ bool LoopUnswitch::processCurrentLoop() { if (!CB) continue; if (CB->isConvergent()) - return Changed; + return Changed; if (auto *II = dyn_cast<InvokeInst>(&I)) if (!II->getUnwindDest()->canSplitPredecessors()) - return Changed; + return Changed; if (auto *II = dyn_cast<IntrinsicInst>(&I)) if (II->getIntrinsicID() == Intrinsic::experimental_guard) Guards.push_back(II); @@ -978,28 +978,28 @@ bool LoopUnswitch::processCurrentLoop() { } } } - - // Check if there is a header condition that is invariant along the patch from - // either the true or false successors to the header. This allows unswitching - // conditions depending on memory accesses, if there's a path not clobbering - // the memory locations. Check if this transform has been disabled using - // metadata, to avoid unswitching the same loop multiple times. - if (MSSA && - !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { - auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA); - if (!ToDuplicate.first.empty()) { - LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition " - << *ToDuplicate.first[0] << "\n"); - ++NumBranches; - unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second, - CurrentLoop->getHeader()->getTerminator(), - ToDuplicate.first); - - RedoLoop = false; - return true; - } - } - + + // Check if there is a header condition that is invariant along the patch from + // either the true or false successors to the header. This allows unswitching + // conditions depending on memory accesses, if there's a path not clobbering + // the memory locations. Check if this transform has been disabled using + // metadata, to avoid unswitching the same loop multiple times. + if (MSSA && + !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { + auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA); + if (!ToDuplicate.first.empty()) { + LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition " + << *ToDuplicate.first[0] << "\n"); + ++NumBranches; + unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second, + CurrentLoop->getHeader()->getTerminator(), + ToDuplicate.first); + + RedoLoop = false; + return true; + } + } + return Changed; } @@ -1057,8 +1057,8 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { /// simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI, - ArrayRef<Instruction *> ToDuplicate) { + Instruction *TI, + ArrayRef<Instruction *> ToDuplicate) { // Check to see if it would be profitable to unswitch current loop. if (!BranchesInfo.costAllowsUnswitching()) { LLVM_DEBUG(dbgs() << "NOT unswitching loop %" @@ -1078,69 +1078,69 @@ bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, return false; } - unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); + unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); return true; } /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, /// otherwise branch to FalseDest. Insert the code immediately before OldBranch /// and remove (but not erase!) it from the function. -void LoopUnswitch::emitPreheaderBranchOnCondition( - Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, - BranchInst *OldBranch, Instruction *TI, - ArrayRef<Instruction *> ToDuplicate) { +void LoopUnswitch::emitPreheaderBranchOnCondition( + Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef<Instruction *> ToDuplicate) { assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); assert(TrueDest != FalseDest && "Branch targets should be different"); - + // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; bool Swapped = false; - - if (!ToDuplicate.empty()) { - ValueToValueMapTy Old2New; - for (Instruction *I : reverse(ToDuplicate)) { - auto *New = I->clone(); - New->insertBefore(OldBranch); - RemapInstruction(New, Old2New, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - Old2New[I] = New; - - if (MSSAU) { - MemorySSA *MSSA = MSSAU->getMemorySSA(); - auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I)); - if (!MemA) - continue; - - Loop *L = LI->getLoopFor(I->getParent()); - auto *DefiningAccess = MemA->getDefiningAccess(); - // Get the first defining access before the loop. - while (L->contains(DefiningAccess->getBlock())) { - // If the defining access is a MemoryPhi, get the incoming - // value for the pre-header as defining access. - if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) { - DefiningAccess = - MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); - } else { - DefiningAccess = - cast<MemoryDef>(DefiningAccess)->getDefiningAccess(); - } - } - MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), - MemorySSA::BeforeTerminator); - } - } - BranchVal = Old2New[ToDuplicate[0]]; - } else { - - if (!isa<ConstantInt>(Val) || - Val->getType() != Type::getInt1Ty(LIC->getContext())) - BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); - else if (Val != ConstantInt::getTrue(Val->getContext())) { - // We want to enter the new loop when the condition is true. - std::swap(TrueDest, FalseDest); - Swapped = true; - } + + if (!ToDuplicate.empty()) { + ValueToValueMapTy Old2New; + for (Instruction *I : reverse(ToDuplicate)) { + auto *New = I->clone(); + New->insertBefore(OldBranch); + RemapInstruction(New, Old2New, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + Old2New[I] = New; + + if (MSSAU) { + MemorySSA *MSSA = MSSAU->getMemorySSA(); + auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I)); + if (!MemA) + continue; + + Loop *L = LI->getLoopFor(I->getParent()); + auto *DefiningAccess = MemA->getDefiningAccess(); + // Get the first defining access before the loop. + while (L->contains(DefiningAccess->getBlock())) { + // If the defining access is a MemoryPhi, get the incoming + // value for the pre-header as defining access. + if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) { + DefiningAccess = + MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); + } else { + DefiningAccess = + cast<MemoryDef>(DefiningAccess)->getDefiningAccess(); + } + } + MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), + MemorySSA::BeforeTerminator); + } + } + BranchVal = Old2New[ToDuplicate[0]]; + } else { + + if (!isa<ConstantInt>(Val) || + Val->getType() != Type::getInt1Ty(LIC->getContext())) + BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); + else if (Val != ConstantInt::getTrue(Val->getContext())) { + // We want to enter the new loop when the condition is true. + std::swap(TrueDest, FalseDest); + Swapped = true; + } } // Old branch will be removed, so save its parent and successor to update the @@ -1173,9 +1173,9 @@ void LoopUnswitch::emitPreheaderBranchOnCondition( } if (MSSAU) - MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); - else - DT->applyUpdates(Updates); + MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); + else + DT->applyUpdates(Updates); } // If either edge is critical, split it. This helps preserve LoopSimplify @@ -1424,9 +1424,9 @@ void LoopUnswitch::splitExitEdges( /// We determined that the loop is profitable to unswitch when LIC equal Val. /// Split it into loop versions and test the condition outside of either loop. /// Return the loops created as Out1/Out2. -void LoopUnswitch::unswitchNontrivialCondition( - Value *LIC, Constant *Val, Loop *L, Instruction *TI, - ArrayRef<Instruction *> ToDuplicate) { +void LoopUnswitch::unswitchNontrivialCondition( + Value *LIC, Constant *Val, Loop *L, Instruction *TI, + ArrayRef<Instruction *> ToDuplicate) { Function *F = LoopHeader->getParent(); LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << LoopHeader->getName() << " [" << L->getBlocks().size() @@ -1451,7 +1451,7 @@ void LoopUnswitch::unswitchNontrivialCondition( LoopBlocks.push_back(NewPreheader); // We want the loop to come after the preheader, but before the exit blocks. - llvm::append_range(LoopBlocks, L->blocks()); + llvm::append_range(LoopBlocks, L->blocks()); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -1465,7 +1465,7 @@ void LoopUnswitch::unswitchNontrivialCondition( L->getUniqueExitBlocks(ExitBlocks); // Add exit blocks to the loop blocks. - llvm::append_range(LoopBlocks, ExitBlocks); + llvm::append_range(LoopBlocks, ExitBlocks); // Next step, clone all of the basic blocks that make up the loop (including // the loop preheader and exit blocks), keeping track of the mapping between @@ -1558,7 +1558,7 @@ void LoopUnswitch::unswitchNontrivialCondition( // Emit the new branch that selects between the two versions of this loop. emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, - TI, ToDuplicate); + TI, ToDuplicate); if (MSSAU) { // Update MemoryPhis in Exit blocks. MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); @@ -1580,39 +1580,39 @@ void LoopUnswitch::unswitchNontrivialCondition( // iteration. WeakTrackingVH LICHandle(LIC); - if (ToDuplicate.empty()) { - // Now we rewrite the original code to know that the condition is true and - // the new code to know that the condition is false. - rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); - - // It's possible that simplifying one loop could cause the other to be - // changed to another value or a constant. If its a constant, don't - // simplify it. - if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && - LICHandle && !isa<Constant>(LICHandle)) - rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, - /*IsEqual=*/true); - } else { - // Partial unswitching. Update the condition in the right loop with the - // constant. - auto *CC = cast<ConstantInt>(Val); - if (CC->isOneValue()) { - rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, - /*IsEqual=*/true); - } else - rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); - - // Mark the new loop as partially unswitched, to avoid unswitching on the - // same condition again. - auto &Context = NewLoop->getHeader()->getContext(); - MDNode *DisableUnswitchMD = MDNode::get( - Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); - MDNode *NewLoopID = makePostTransformationMetadata( - Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, - {DisableUnswitchMD}); - NewLoop->setLoopID(NewLoopID); - } - + if (ToDuplicate.empty()) { + // Now we rewrite the original code to know that the condition is true and + // the new code to know that the condition is false. + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); + + // It's possible that simplifying one loop could cause the other to be + // changed to another value or a constant. If its a constant, don't + // simplify it. + if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && + LICHandle && !isa<Constant>(LICHandle)) + rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, + /*IsEqual=*/true); + } else { + // Partial unswitching. Update the condition in the right loop with the + // constant. + auto *CC = cast<ConstantInt>(Val); + if (CC->isOneValue()) { + rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, + /*IsEqual=*/true); + } else + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); + + // Mark the new loop as partially unswitched, to avoid unswitching on the + // same condition again. + auto &Context = NewLoop->getHeader()->getContext(); + MDNode *DisableUnswitchMD = MDNode::get( + Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, + {DisableUnswitchMD}); + NewLoop->setLoopID(NewLoopID); + } + if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); } @@ -1620,7 +1620,7 @@ void LoopUnswitch::unswitchNontrivialCondition( /// Remove all instances of I from the worklist vector specified. static void removeFromWorklist(Instruction *I, std::vector<Instruction *> &Worklist) { - llvm::erase_value(Worklist, I); + llvm::erase_value(Worklist, I); } /// When we find that I really equals V, remove I from the diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 2ff1e84807..b1a41e0c9d 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -59,7 +59,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/LoopVersioningLICM.h" +#include "llvm/Transforms/Scalar/LoopVersioningLICM.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -115,18 +115,18 @@ static cl::opt<unsigned> LVLoopDepthThreshold( namespace { -struct LoopVersioningLICMLegacyPass : public LoopPass { +struct LoopVersioningLICMLegacyPass : public LoopPass { static char ID; - LoopVersioningLICMLegacyPass() : LoopPass(ID) { - initializeLoopVersioningLICMLegacyPassPass( - *PassRegistry::getPassRegistry()); + LoopVersioningLICMLegacyPass() : LoopPass(ID) { + initializeLoopVersioningLICMLegacyPassPass( + *PassRegistry::getPassRegistry()); } bool runOnLoop(Loop *L, LPPassManager &LPM) override; - StringRef getPassName() const override { return "Loop Versioning for LICM"; } - + StringRef getPassName() const override { return "Loop Versioning for LICM"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AAResultsWrapperPass>(); @@ -140,22 +140,22 @@ struct LoopVersioningLICMLegacyPass : public LoopPass { AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); } -}; - -struct LoopVersioningLICM { - // We don't explicitly pass in LoopAccessInfo to the constructor since the - // loop versioning might return early due to instructions that are not safe - // for versioning. By passing the proxy instead the construction of - // LoopAccessInfo will take place only when it's necessary. - LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE, - function_ref<const LoopAccessInfo &(Loop *)> GetLAI) - : AA(AA), SE(SE), GetLAI(GetLAI), - LoopDepthThreshold(LVLoopDepthThreshold), - InvariantThreshold(LVInvarThreshold), ORE(ORE) {} - - bool runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT); - +}; + +struct LoopVersioningLICM { + // We don't explicitly pass in LoopAccessInfo to the constructor since the + // loop versioning might return early due to instructions that are not safe + // for versioning. By passing the proxy instead the construction of + // LoopAccessInfo will take place only when it's necessary. + LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE, + OptimizationRemarkEmitter *ORE, + function_ref<const LoopAccessInfo &(Loop *)> GetLAI) + : AA(AA), SE(SE), GetLAI(GetLAI), + LoopDepthThreshold(LVLoopDepthThreshold), + InvariantThreshold(LVInvarThreshold), ORE(ORE) {} + + bool runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT); + void reset() { AA = nullptr; SE = nullptr; @@ -186,9 +186,9 @@ private: // Current Loop's LoopAccessInfo const LoopAccessInfo *LAI = nullptr; - // Proxy for retrieving LoopAccessInfo. - function_ref<const LoopAccessInfo &(Loop *)> GetLAI; - + // Proxy for retrieving LoopAccessInfo. + function_ref<const LoopAccessInfo &(Loop *)> GetLAI; + // The current loop we are working on. Loop *CurLoop = nullptr; @@ -267,7 +267,7 @@ bool LoopVersioningLICM::legalLoopStructure() { // We need to be able to compute the loop trip count in order // to generate the bound checks. const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop); - if (isa<SCEVCouldNotCompute>(ExitCount)) { + if (isa<SCEVCouldNotCompute>(ExitCount)) { LLVM_DEBUG(dbgs() << " loop does not has trip count\n"); return false; } @@ -414,8 +414,8 @@ bool LoopVersioningLICM::legalLoopInstructions() { return false; } } - // Get LoopAccessInfo from current loop via the proxy. - LAI = &GetLAI(CurLoop); + // Get LoopAccessInfo from current loop via the proxy. + LAI = &GetLAI(CurLoop); // Check LoopAccessInfo for need of runtime check. if (LAI->getRuntimePointerChecking()->getChecks().empty()) { LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); @@ -554,7 +554,7 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); StringRef Name = "LVAliasScope"; MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); - SmallVector<Metadata *, 4> Scopes{NewScope}, NoAliases{NewScope}; + SmallVector<Metadata *, 4> Scopes{NewScope}, NoAliases{NewScope}; // Iterate over each instruction of loop. // set no-alias for all load & store instructions. for (auto *Block : CurLoop->getBlocks()) { @@ -576,25 +576,25 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { } } -bool LoopVersioningLICMLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { - if (skipLoop(L)) - return false; - - AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - OptimizationRemarkEmitter *ORE = - &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - - auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & { - return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(L); - }; - - return LoopVersioningLICM(AA, SE, ORE, GetLAI).runOnLoop(L, LI, DT); -} - -bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) { +bool LoopVersioningLICMLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + OptimizationRemarkEmitter *ORE = + &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & { + return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(L); + }; + + return LoopVersioningLICM(AA, SE, ORE, GetLAI).runOnLoop(L, LI, DT); +} + +bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) { // This will automatically release all resources hold by the current // LoopVersioningLICM object. AutoResetter Resetter(*this); @@ -622,8 +622,8 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) { // Do loop versioning. // Create memcheck for memory accessed inside loop. // Clone original loop, and set blocks properly. - LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(), - CurLoop, LI, DT, SE); + LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(), + CurLoop, LI, DT, SE); LVer.versionLoop(); // Set Loop Versioning metaData for original loop. addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData); @@ -641,9 +641,9 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) { return Changed; } -char LoopVersioningLICMLegacyPass::ID = 0; +char LoopVersioningLICMLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LoopVersioningLICMLegacyPass, "loop-versioning-licm", +INITIALIZE_PASS_BEGIN(LoopVersioningLICMLegacyPass, "loop-versioning-licm", "Loop Versioning For LICM", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) @@ -654,31 +654,31 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(LoopVersioningLICMLegacyPass, "loop-versioning-licm", +INITIALIZE_PASS_END(LoopVersioningLICMLegacyPass, "loop-versioning-licm", "Loop Versioning For LICM", false, false) -Pass *llvm::createLoopVersioningLICMPass() { - return new LoopVersioningLICMLegacyPass(); -} - -namespace llvm { - -PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &LAR, - LPMUpdater &U) { - AliasAnalysis *AA = &LAR.AA; - ScalarEvolution *SE = &LAR.SE; - DominatorTree *DT = &LAR.DT; - LoopInfo *LI = &LAR.LI; - const Function *F = L.getHeader()->getParent(); - OptimizationRemarkEmitter ORE(F); - - auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & { - return AM.getResult<LoopAccessAnalysis>(*L, LAR); - }; - - if (!LoopVersioningLICM(AA, SE, &ORE, GetLAI).runOnLoop(&L, LI, DT)) - return PreservedAnalyses::all(); - return getLoopPassPreservedAnalyses(); -} -} // namespace llvm +Pass *llvm::createLoopVersioningLICMPass() { + return new LoopVersioningLICMLegacyPass(); +} + +namespace llvm { + +PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &LAR, + LPMUpdater &U) { + AliasAnalysis *AA = &LAR.AA; + ScalarEvolution *SE = &LAR.SE; + DominatorTree *DT = &LAR.DT; + LoopInfo *LI = &LAR.LI; + const Function *F = L.getHeader()->getParent(); + OptimizationRemarkEmitter ORE(F); + + auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & { + return AM.getResult<LoopAccessAnalysis>(*L, LAR); + }; + + if (!LoopVersioningLICM(AA, SE, &ORE, GetLAI).runOnLoop(&L, LI, DT)) + return PreservedAnalyses::all(); + return getLoopPassPreservedAnalyses(); +} +} // namespace llvm diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp index bb30c48127..c17c903dd2 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp @@ -43,10 +43,10 @@ STATISTIC(ObjectSizeIntrinsicsHandled, "Number of 'objectsize' intrinsic calls handled"); static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) { - if (auto *C = dyn_cast<Constant>(II->getOperand(0))) - if (C->isManifestConstant()) - return ConstantInt::getTrue(II->getType()); - return ConstantInt::getFalse(II->getType()); + if (auto *C = dyn_cast<Constant>(II->getOperand(0))) + if (C->isManifestConstant()) + return ConstantInt::getTrue(II->getType()); + return ConstantInt::getFalse(II->getType()); } static bool replaceConditionalBranchesOnConstant(Instruction *II, @@ -78,7 +78,7 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II, Other->removePredecessor(Source); BI->eraseFromParent(); BranchInst::Create(Target, Source); - if (pred_empty(Other)) + if (pred_empty(Other)) HasDeadBlocks = true; } } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index da13075dfe..98b6adee87 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -46,10 +46,10 @@ STATISTIC(ExpectIntrinsicsHandled, // 'select' instructions. It may be worthwhile to hoist these values to some // shared space, so they can be used directly by other passes. -cl::opt<uint32_t> llvm::LikelyBranchWeight( +cl::opt<uint32_t> llvm::LikelyBranchWeight( "likely-branch-weight", cl::Hidden, cl::init(2000), cl::desc("Weight of the branch likely to be taken (default = 2000)")); -cl::opt<uint32_t> llvm::UnlikelyBranchWeight( +cl::opt<uint32_t> llvm::UnlikelyBranchWeight( "unlikely-branch-weight", cl::Hidden, cl::init(1), cl::desc("Weight of the branch unlikely to be taken (default = 1)")); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 8e251ca940..d9f8c9f83d 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -42,8 +42,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/MatrixUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/MatrixUtils.h" using namespace llvm; using namespace PatternMatch; @@ -63,9 +63,9 @@ static cl::opt<unsigned> TileSize( "fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc( "Tile size for matrix instruction fusion using square-shaped tiles.")); -static cl::opt<bool> TileUseLoops("fuse-matrix-use-loops", cl::init(false), - cl::Hidden, - cl::desc("Generate loop nest for tiling.")); +static cl::opt<bool> TileUseLoops("fuse-matrix-use-loops", cl::init(false), + cl::Hidden, + cl::desc("Generate loop nest for tiling.")); static cl::opt<bool> ForceFusion( "force-fuse-matrix", cl::init(false), cl::Hidden, cl::desc("Force matrix instruction fusion even if not profitable.")); @@ -187,10 +187,10 @@ class LowerMatrixIntrinsics { Function &Func; const DataLayout &DL; const TargetTransformInfo &TTI; - AliasAnalysis *AA; - DominatorTree *DT; - LoopInfo *LI; - OptimizationRemarkEmitter *ORE; + AliasAnalysis *AA; + DominatorTree *DT; + LoopInfo *LI; + OptimizationRemarkEmitter *ORE; /// Contains estimates of the number of operations (loads, stores, compute) required to lower a matrix operation. struct OpInfoTy { @@ -246,7 +246,7 @@ class LowerMatrixIntrinsics { void setVector(unsigned i, Value *V) { Vectors[i] = V; } - Type *getElementType() const { return getVectorTy()->getElementType(); } + Type *getElementType() const { return getVectorTy()->getElementType(); } unsigned getNumVectors() const { if (isColumnMajor()) @@ -276,7 +276,7 @@ class LowerMatrixIntrinsics { return getVectorTy(); } - VectorType *getVectorTy() const { + VectorType *getVectorTy() const { return cast<VectorType>(Vectors[0]->getType()); } @@ -335,7 +335,7 @@ class LowerMatrixIntrinsics { IRBuilder<> &Builder) const { Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I); return Builder.CreateShuffleVector( - Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0), + Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0), "block"); } }; @@ -397,8 +397,8 @@ class LowerMatrixIntrinsics { public: LowerMatrixIntrinsics(Function &F, TargetTransformInfo &TTI, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - OptimizationRemarkEmitter *ORE) + AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, + OptimizationRemarkEmitter *ORE) : Func(F), DL(F.getParent()->getDataLayout()), TTI(TTI), AA(AA), DT(DT), LI(LI), ORE(ORE) {} @@ -450,7 +450,7 @@ public: MaskStart < cast<FixedVectorType>(VType)->getNumElements(); MaskStart += SI.getStride()) { Value *V = Builder.CreateShuffleVector( - MatrixVal, createSequentialMask(MaskStart, SI.getStride(), 0), + MatrixVal, createSequentialMask(MaskStart, SI.getStride(), 0), "split"); SplitVecs.push_back(V); } @@ -488,7 +488,7 @@ public: case Instruction::FAdd: case Instruction::FSub: case Instruction::FMul: // Scalar multiply. - case Instruction::FNeg: + case Instruction::FNeg: case Instruction::Add: case Instruction::Mul: case Instruction::Sub: @@ -531,7 +531,7 @@ public: // list. LLVM_DEBUG(dbgs() << "Forward-propagate shapes:\n"); while (!WorkList.empty()) { - Instruction *Inst = WorkList.pop_back_val(); + Instruction *Inst = WorkList.pop_back_val(); // New entry, set the value and insert operands bool Propagate = false; @@ -601,7 +601,7 @@ public: // worklist. LLVM_DEBUG(dbgs() << "Backward-propagate shapes:\n"); while (!WorkList.empty()) { - Value *V = WorkList.pop_back_val(); + Value *V = WorkList.pop_back_val(); size_t BeforeProcessingV = WorkList.size(); if (!isa<Instruction>(V)) @@ -723,18 +723,18 @@ public: Value *Op2; if (auto *BinOp = dyn_cast<BinaryOperator>(Inst)) Changed |= VisitBinaryOperator(BinOp); - if (auto *UnOp = dyn_cast<UnaryOperator>(Inst)) - Changed |= VisitUnaryOperator(UnOp); + if (auto *UnOp = dyn_cast<UnaryOperator>(Inst)) + Changed |= VisitUnaryOperator(UnOp); if (match(Inst, m_Load(m_Value(Op1)))) Changed |= VisitLoad(cast<LoadInst>(Inst), Op1, Builder); else if (match(Inst, m_Store(m_Value(Op1), m_Value(Op2)))) Changed |= VisitStore(cast<StoreInst>(Inst), Op1, Op2, Builder); } - if (ORE) { - RemarkGenerator RemarkGen(Inst2ColumnMatrix, *ORE, Func); - RemarkGen.emitRemarks(); - } + if (ORE) { + RemarkGenerator RemarkGen(Inst2ColumnMatrix, *ORE, Func); + RemarkGen.emitRemarks(); + } for (Instruction *Inst : reverse(ToRemove)) Inst->eraseFromParent(); @@ -941,7 +941,7 @@ public: assert(NumElts >= BlockNumElts && "Too few elements for current block"); Block = Builder.CreateShuffleVector( - Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts)); + Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts)); // If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7, // 8, 4, 5, 6 @@ -1089,7 +1089,7 @@ public: MemoryLocation StoreLoc = MemoryLocation::get(Store); MemoryLocation LoadLoc = MemoryLocation::get(Load); - AliasResult LdAliased = AA->alias(LoadLoc, StoreLoc); + AliasResult LdAliased = AA->alias(LoadLoc, StoreLoc); // If we can statically determine noalias we're good. if (!LdAliased) @@ -1105,17 +1105,17 @@ public: // as we adjust Check0 and Check1's branches. SmallVector<DominatorTree::UpdateType, 4> DTUpdates; for (BasicBlock *Succ : successors(Check0)) - DTUpdates.push_back({DT->Delete, Check0, Succ}); + DTUpdates.push_back({DT->Delete, Check0, Succ}); - BasicBlock *Check1 = - SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, - nullptr, "alias_cont"); + BasicBlock *Check1 = + SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, + nullptr, "alias_cont"); BasicBlock *Copy = - SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, - nullptr, "copy"); - BasicBlock *Fusion = - SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, - nullptr, "no_alias"); + SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, + nullptr, "copy"); + BasicBlock *Fusion = + SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, + nullptr, "no_alias"); // Check if the loaded memory location begins before the end of the store // location. If the condition holds, they might overlap, otherwise they are @@ -1159,11 +1159,11 @@ public: PHI->addIncoming(NewLd, Copy); // Adjust DT. - DTUpdates.push_back({DT->Insert, Check0, Check1}); - DTUpdates.push_back({DT->Insert, Check0, Fusion}); - DTUpdates.push_back({DT->Insert, Check1, Copy}); - DTUpdates.push_back({DT->Insert, Check1, Fusion}); - DT->applyUpdates(DTUpdates); + DTUpdates.push_back({DT->Insert, Check0, Check1}); + DTUpdates.push_back({DT->Insert, Check0, Fusion}); + DTUpdates.push_back({DT->Insert, Check1, Copy}); + DTUpdates.push_back({DT->Insert, Check1, Fusion}); + DT->applyUpdates(DTUpdates); return PHI; } @@ -1209,63 +1209,63 @@ public: return Res; } - void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape, - Value *RPtr, ShapeInfo RShape, StoreInst *Store, - bool AllowContract) { - auto *EltType = cast<VectorType>(MatMul->getType())->getElementType(); - - // Create the main tiling loop nest. - TileInfo TI(LShape.NumRows, RShape.NumColumns, LShape.NumColumns, TileSize); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - Instruction *InsertI = cast<Instruction>(MatMul); - BasicBlock *Start = InsertI->getParent(); - BasicBlock *End = - SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue"); - IRBuilder<> Builder(MatMul); - BasicBlock *InnerBody = TI.CreateTiledLoops(Start, End, Builder, DTU, *LI); - - Type *TileVecTy = - FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize); - MatrixTy TileResult; - // Insert in the inner loop header. - Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator()); - // Create PHI nodes for the result columns to accumulate across iterations. - SmallVector<PHINode *, 4> ColumnPhis; - for (unsigned I = 0; I < TileSize; I++) { - auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I)); - Phi->addIncoming(ConstantAggregateZero::get(TileVecTy), - TI.RowLoopHeader->getSingleSuccessor()); - TileResult.addVector(Phi); - ColumnPhis.push_back(Phi); - } - - // Insert in the inner loop body, which computes - // Res += Load(CurrentRow, K) * Load(K, CurrentColumn) - Builder.SetInsertPoint(InnerBody->getTerminator()); - // Load tiles of the operands. - MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK, - {TileSize, TileSize}, EltType, Builder); - MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol, - {TileSize, TileSize}, EltType, Builder); - emitMatrixMultiply(TileResult, A, B, AllowContract, Builder, true); - // Store result after the inner loop is done. - Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator()); - storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(), - Store->isVolatile(), {LShape.NumRows, RShape.NumColumns}, - TI.CurrentRow, TI.CurrentCol, EltType, Builder); - - for (unsigned I = 0; I < TileResult.getNumVectors(); I++) - ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch); - - // Force unrolling of a few iterations of the inner loop, to make sure there - // is enough work per iteration. - // FIXME: The unroller should make this decision directly instead, but - // currently the cost-model is not up to the task. - unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize); - addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader), - "llvm.loop.unroll.count", InnerLoopUnrollCount); - } - + void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape, + Value *RPtr, ShapeInfo RShape, StoreInst *Store, + bool AllowContract) { + auto *EltType = cast<VectorType>(MatMul->getType())->getElementType(); + + // Create the main tiling loop nest. + TileInfo TI(LShape.NumRows, RShape.NumColumns, LShape.NumColumns, TileSize); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + Instruction *InsertI = cast<Instruction>(MatMul); + BasicBlock *Start = InsertI->getParent(); + BasicBlock *End = + SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue"); + IRBuilder<> Builder(MatMul); + BasicBlock *InnerBody = TI.CreateTiledLoops(Start, End, Builder, DTU, *LI); + + Type *TileVecTy = + FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize); + MatrixTy TileResult; + // Insert in the inner loop header. + Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator()); + // Create PHI nodes for the result columns to accumulate across iterations. + SmallVector<PHINode *, 4> ColumnPhis; + for (unsigned I = 0; I < TileSize; I++) { + auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I)); + Phi->addIncoming(ConstantAggregateZero::get(TileVecTy), + TI.RowLoopHeader->getSingleSuccessor()); + TileResult.addVector(Phi); + ColumnPhis.push_back(Phi); + } + + // Insert in the inner loop body, which computes + // Res += Load(CurrentRow, K) * Load(K, CurrentColumn) + Builder.SetInsertPoint(InnerBody->getTerminator()); + // Load tiles of the operands. + MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK, + {TileSize, TileSize}, EltType, Builder); + MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol, + {TileSize, TileSize}, EltType, Builder); + emitMatrixMultiply(TileResult, A, B, AllowContract, Builder, true); + // Store result after the inner loop is done. + Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator()); + storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(), + Store->isVolatile(), {LShape.NumRows, RShape.NumColumns}, + TI.CurrentRow, TI.CurrentCol, EltType, Builder); + + for (unsigned I = 0; I < TileResult.getNumVectors(); I++) + ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch); + + // Force unrolling of a few iterations of the inner loop, to make sure there + // is enough work per iteration. + // FIXME: The unroller should make this decision directly instead, but + // currently the cost-model is not up to the task. + unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize); + addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader), + "llvm.loop.unroll.count", InnerLoopUnrollCount); + } + void emitSIMDTiling(CallInst *MatMul, LoadInst *LoadOp0, LoadInst *LoadOp1, StoreInst *Store, SmallPtrSetImpl<Instruction *> &FusedInsts) { @@ -1288,34 +1288,34 @@ public: bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) && MatMul->hasAllowContract()); - if (TileUseLoops && (R % TileSize == 0 && C % TileSize == 0)) - createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store, - AllowContract); - else { - IRBuilder<> Builder(Store); - for (unsigned J = 0; J < C; J += TileSize) - for (unsigned I = 0; I < R; I += TileSize) { - const unsigned TileR = std::min(R - I, unsigned(TileSize)); - const unsigned TileC = std::min(C - J, unsigned(TileSize)); - MatrixTy Res = getZeroMatrix(EltType, TileR, TileC); - - for (unsigned K = 0; K < M; K += TileSize) { - const unsigned TileM = std::min(M - K, unsigned(TileSize)); - MatrixTy A = - loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(), - LShape, Builder.getInt64(I), Builder.getInt64(K), - {TileR, TileM}, EltType, Builder); - MatrixTy B = - loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(), - RShape, Builder.getInt64(K), Builder.getInt64(J), - {TileM, TileC}, EltType, Builder); - emitMatrixMultiply(Res, A, B, AllowContract, Builder, true); - } - storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M}, - Builder.getInt64(I), Builder.getInt64(J), EltType, - Builder); + if (TileUseLoops && (R % TileSize == 0 && C % TileSize == 0)) + createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store, + AllowContract); + else { + IRBuilder<> Builder(Store); + for (unsigned J = 0; J < C; J += TileSize) + for (unsigned I = 0; I < R; I += TileSize) { + const unsigned TileR = std::min(R - I, unsigned(TileSize)); + const unsigned TileC = std::min(C - J, unsigned(TileSize)); + MatrixTy Res = getZeroMatrix(EltType, TileR, TileC); + + for (unsigned K = 0; K < M; K += TileSize) { + const unsigned TileM = std::min(M - K, unsigned(TileSize)); + MatrixTy A = + loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(), + LShape, Builder.getInt64(I), Builder.getInt64(K), + {TileR, TileM}, EltType, Builder); + MatrixTy B = + loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(), + RShape, Builder.getInt64(K), Builder.getInt64(J), + {TileM, TileC}, EltType, Builder); + emitMatrixMultiply(Res, A, B, AllowContract, Builder, true); + } + storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M}, + Builder.getInt64(I), Builder.getInt64(J), EltType, + Builder); } - } + } // Mark eliminated instructions as fused and remove them. FusedInsts.insert(Store); @@ -1342,11 +1342,11 @@ public: void LowerMatrixMultiplyFused(CallInst *MatMul, SmallPtrSetImpl<Instruction *> &FusedInsts) { if (!FuseMatrix || !MatMul->hasOneUse() || - MatrixLayout != MatrixLayoutTy::ColumnMajor || !DT) + MatrixLayout != MatrixLayoutTy::ColumnMajor || !DT) return; - assert(AA && LI && "Analyses should be available"); - + assert(AA && LI && "Analyses should be available"); + auto *LoadOp0 = dyn_cast<LoadInst>(MatMul->getOperand(0)); auto *LoadOp1 = dyn_cast<LoadInst>(MatMul->getOperand(1)); auto *Store = dyn_cast<StoreInst>(*MatMul->user_begin()); @@ -1355,7 +1355,7 @@ public: // we create invalid IR. // FIXME: See if we can hoist the store address computation. auto *AddrI = dyn_cast<Instruction>(Store->getOperand(1)); - if (AddrI && (!DT->dominates(AddrI, MatMul))) + if (AddrI && (!DT->dominates(AddrI, MatMul))) return; emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts); @@ -1372,8 +1372,8 @@ public: const MatrixTy &Lhs = getMatrix(MatMul->getArgOperand(0), LShape, Builder); const MatrixTy &Rhs = getMatrix(MatMul->getArgOperand(1), RShape, Builder); - assert(Lhs.getElementType() == Rhs.getElementType() && - "Matrix multiply argument element types do not match."); + assert(Lhs.getElementType() == Rhs.getElementType() && + "Matrix multiply argument element types do not match."); const unsigned R = LShape.NumRows; const unsigned C = RShape.NumColumns; @@ -1381,8 +1381,8 @@ public: // Initialize the output MatrixTy Result(R, C, EltType); - assert(Lhs.getElementType() == Result.getElementType() && - "Matrix multiply result element type does not match arguments."); + assert(Lhs.getElementType() == Result.getElementType() && + "Matrix multiply result element type does not match arguments."); bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) && MatMul->hasAllowContract()); @@ -1500,40 +1500,40 @@ public: return true; } - /// Lower unary operators, if shape information is available. - bool VisitUnaryOperator(UnaryOperator *Inst) { - auto I = ShapeMap.find(Inst); - if (I == ShapeMap.end()) - return false; - - Value *Op = Inst->getOperand(0); - - IRBuilder<> Builder(Inst); - ShapeInfo &Shape = I->second; - - MatrixTy Result; - MatrixTy M = getMatrix(Op, Shape, Builder); - - // Helper to perform unary op on vectors. - auto BuildVectorOp = [&Builder, Inst](Value *Op) { - switch (Inst->getOpcode()) { - case Instruction::FNeg: - return Builder.CreateFNeg(Op); - default: - llvm_unreachable("Unsupported unary operator for matrix"); - } - }; - - for (unsigned I = 0; I < Shape.getNumVectors(); ++I) - Result.addVector(BuildVectorOp(M.getVector(I))); - - finalizeLowering(Inst, - Result.addNumComputeOps(getNumOps(Result.getVectorTy()) * - Result.getNumVectors()), - Builder); - return true; - } - + /// Lower unary operators, if shape information is available. + bool VisitUnaryOperator(UnaryOperator *Inst) { + auto I = ShapeMap.find(Inst); + if (I == ShapeMap.end()) + return false; + + Value *Op = Inst->getOperand(0); + + IRBuilder<> Builder(Inst); + ShapeInfo &Shape = I->second; + + MatrixTy Result; + MatrixTy M = getMatrix(Op, Shape, Builder); + + // Helper to perform unary op on vectors. + auto BuildVectorOp = [&Builder, Inst](Value *Op) { + switch (Inst->getOpcode()) { + case Instruction::FNeg: + return Builder.CreateFNeg(Op); + default: + llvm_unreachable("Unsupported unary operator for matrix"); + } + }; + + for (unsigned I = 0; I < Shape.getNumVectors(); ++I) + Result.addVector(BuildVectorOp(M.getVector(I))); + + finalizeLowering(Inst, + Result.addNumComputeOps(getNumOps(Result.getVectorTy()) * + Result.getNumVectors()), + Builder); + return true; + } + /// Helper to linearize a matrix expression tree into a string. Currently /// matrix expressions are linarized by starting at an expression leaf and /// linearizing bottom up. @@ -1598,7 +1598,7 @@ public: if (Value *Ptr = getPointerOperand(V)) return getUnderlyingObjectThroughLoads(Ptr); else if (V->getType()->isPointerTy()) - return getUnderlyingObject(V); + return getUnderlyingObject(V); return V; } @@ -1634,7 +1634,7 @@ public: write(StringRef(Intrinsic::getName(II->getIntrinsicID(), {})) .drop_front(StringRef("llvm.matrix.").size())); write("."); - std::string Tmp; + std::string Tmp; raw_string_ostream SS(Tmp); switch (II->getIntrinsicID()) { @@ -1972,25 +1972,25 @@ public: PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult<TargetIRAnalysis>(F); - OptimizationRemarkEmitter *ORE = nullptr; - AAResults *AA = nullptr; - DominatorTree *DT = nullptr; - LoopInfo *LI = nullptr; - - if (!Minimal) { - ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - AA = &AM.getResult<AAManager>(F); - DT = &AM.getResult<DominatorTreeAnalysis>(F); - LI = &AM.getResult<LoopAnalysis>(F); - } - + OptimizationRemarkEmitter *ORE = nullptr; + AAResults *AA = nullptr; + DominatorTree *DT = nullptr; + LoopInfo *LI = nullptr; + + if (!Minimal) { + ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F); + AA = &AM.getResult<AAManager>(F); + DT = &AM.getResult<DominatorTreeAnalysis>(F); + LI = &AM.getResult<LoopAnalysis>(F); + } + LowerMatrixIntrinsics LMT(F, TTI, AA, DT, LI, ORE); if (LMT.Visit()) { PreservedAnalyses PA; - if (!Minimal) { - PA.preserve<LoopAnalysis>(); - PA.preserve<DominatorTreeAnalysis>(); - } + if (!Minimal) { + PA.preserve<LoopAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + } return PA; } return PreservedAnalyses::all(); @@ -2013,7 +2013,7 @@ public: auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE); + LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE); bool C = LMT.Visit(); return C; } @@ -2044,45 +2044,45 @@ INITIALIZE_PASS_END(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name, Pass *llvm::createLowerMatrixIntrinsicsPass() { return new LowerMatrixIntrinsicsLegacyPass(); } - -namespace { - -/// A lightweight version of the matrix lowering pass that only requires TTI. -/// Advanced features that require DT, AA or ORE like tiling are disabled. This -/// is used to lower matrix intrinsics if the main lowering pass is not run, for -/// example with -O0. -class LowerMatrixIntrinsicsMinimalLegacyPass : public FunctionPass { -public: - static char ID; - - LowerMatrixIntrinsicsMinimalLegacyPass() : FunctionPass(ID) { - initializeLowerMatrixIntrinsicsMinimalLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - LowerMatrixIntrinsics LMT(F, TTI, nullptr, nullptr, nullptr, nullptr); - bool C = LMT.Visit(); - return C; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.setPreservesCFG(); - } -}; -} // namespace - -static const char pass_name_minimal[] = "Lower the matrix intrinsics (minimal)"; -char LowerMatrixIntrinsicsMinimalLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsMinimalLegacyPass, - "lower-matrix-intrinsics-minimal", pass_name_minimal, - false, false) -INITIALIZE_PASS_END(LowerMatrixIntrinsicsMinimalLegacyPass, - "lower-matrix-intrinsics-minimal", pass_name_minimal, false, - false) - -Pass *llvm::createLowerMatrixIntrinsicsMinimalPass() { - return new LowerMatrixIntrinsicsMinimalLegacyPass(); -} + +namespace { + +/// A lightweight version of the matrix lowering pass that only requires TTI. +/// Advanced features that require DT, AA or ORE like tiling are disabled. This +/// is used to lower matrix intrinsics if the main lowering pass is not run, for +/// example with -O0. +class LowerMatrixIntrinsicsMinimalLegacyPass : public FunctionPass { +public: + static char ID; + + LowerMatrixIntrinsicsMinimalLegacyPass() : FunctionPass(ID) { + initializeLowerMatrixIntrinsicsMinimalLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + LowerMatrixIntrinsics LMT(F, TTI, nullptr, nullptr, nullptr, nullptr); + bool C = LMT.Visit(); + return C; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.setPreservesCFG(); + } +}; +} // namespace + +static const char pass_name_minimal[] = "Lower the matrix intrinsics (minimal)"; +char LowerMatrixIntrinsicsMinimalLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsMinimalLegacyPass, + "lower-matrix-intrinsics-minimal", pass_name_minimal, + false, false) +INITIALIZE_PASS_END(LowerMatrixIntrinsicsMinimalLegacyPass, + "lower-matrix-intrinsics-minimal", pass_name_minimal, false, + false) + +Pass *llvm::createLowerMatrixIntrinsicsMinimalPass() { + return new LowerMatrixIntrinsicsMinimalLegacyPass(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a4e695497f..c5ef74e869 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -21,11 +21,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -67,15 +67,15 @@ using namespace llvm; #define DEBUG_TYPE "memcpyopt" -static cl::opt<bool> - EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden, - cl::desc("Use MemorySSA-backed MemCpyOpt.")); - +static cl::opt<bool> + EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Use MemorySSA-backed MemCpyOpt.")); + STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); -STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); +STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); namespace { @@ -279,17 +279,17 @@ private: AU.setPreservesCFG(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); - if (!EnableMemorySSA) - AU.addRequired<MemoryDependenceWrapperPass>(); + if (!EnableMemorySSA) + AU.addRequired<MemoryDependenceWrapperPass>(); AU.addPreserved<MemoryDependenceWrapperPass>(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - if (EnableMemorySSA) - AU.addRequired<MemorySSAWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + if (EnableMemorySSA) + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); } }; @@ -311,56 +311,56 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) -// Check that V is either not accessible by the caller, or unwinding cannot -// occur between Start and End. -static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, - Instruction *End) { - assert(Start->getParent() == End->getParent() && "Must be in same block"); - if (!Start->getFunction()->doesNotThrow() && - !isa<AllocaInst>(getUnderlyingObject(V))) { - for (const Instruction &I : - make_range(Start->getIterator(), End->getIterator())) { - if (I.mayThrow()) - return true; - } - } - return false; -} - -void MemCpyOptPass::eraseInstruction(Instruction *I) { - if (MSSAU) - MSSAU->removeMemoryAccess(I); - if (MD) - MD->removeInstruction(I); - I->eraseFromParent(); -} - -// Check for mod or ref of Loc between Start and End, excluding both boundaries. -// Start and End must be in the same block -static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, - const MemoryUseOrDef *Start, - const MemoryUseOrDef *End) { - assert(Start->getBlock() == End->getBlock() && "Only local supported"); - for (const MemoryAccess &MA : - make_range(++Start->getIterator(), End->getIterator())) { - if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(), - Loc))) - return true; - } - return false; -} - -// Check for mod of Loc between Start and End, excluding both boundaries. -// Start and End can be in different blocks. -static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, - const MemoryUseOrDef *Start, - const MemoryUseOrDef *End) { - // TODO: Only walk until we hit Start. - MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - End->getDefiningAccess(), Loc); - return !MSSA->dominates(Clobber, Start); -} - +// Check that V is either not accessible by the caller, or unwinding cannot +// occur between Start and End. +static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, + Instruction *End) { + assert(Start->getParent() == End->getParent() && "Must be in same block"); + if (!Start->getFunction()->doesNotThrow() && + !isa<AllocaInst>(getUnderlyingObject(V))) { + for (const Instruction &I : + make_range(Start->getIterator(), End->getIterator())) { + if (I.mayThrow()) + return true; + } + } + return false; +} + +void MemCpyOptPass::eraseInstruction(Instruction *I) { + if (MSSAU) + MSSAU->removeMemoryAccess(I); + if (MD) + MD->removeInstruction(I); + I->eraseFromParent(); +} + +// Check for mod or ref of Loc between Start and End, excluding both boundaries. +// Start and End must be in the same block +static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, + const MemoryUseOrDef *Start, + const MemoryUseOrDef *End) { + assert(Start->getBlock() == End->getBlock() && "Only local supported"); + for (const MemoryAccess &MA : + make_range(++Start->getIterator(), End->getIterator())) { + if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(), + Loc))) + return true; + } + return false; +} + +// Check for mod of Loc between Start and End, excluding both boundaries. +// Start and End can be in different blocks. +static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, + const MemoryUseOrDef *Start, + const MemoryUseOrDef *End) { + // TODO: Only walk until we hit Start. + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + End->getDefiningAccess(), Loc); + return !MSSA->dominates(Clobber, Start); +} + /// When scanning forward over instructions, we look for some other patterns to /// fold away. In particular, this looks for stores to neighboring locations of /// memory. If it sees enough consecutive ones, it attempts to merge them @@ -377,27 +377,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, MemsetRanges Ranges(DL); BasicBlock::iterator BI(StartInst); - - // Keeps track of the last memory use or def before the insertion point for - // the new memset. The new MemoryDef for the inserted memsets will be inserted - // after MemInsertPoint. It points to either LastMemDef or to the last user - // before the insertion point of the memset, if there are any such users. - MemoryUseOrDef *MemInsertPoint = nullptr; - // Keeps track of the last MemoryDef between StartInst and the insertion point - // for the new memset. This will become the defining access of the inserted - // memsets. - MemoryDef *LastMemDef = nullptr; + + // Keeps track of the last memory use or def before the insertion point for + // the new memset. The new MemoryDef for the inserted memsets will be inserted + // after MemInsertPoint. It points to either LastMemDef or to the last user + // before the insertion point of the memset, if there are any such users. + MemoryUseOrDef *MemInsertPoint = nullptr; + // Keeps track of the last MemoryDef between StartInst and the insertion point + // for the new memset. This will become the defining access of the inserted + // memsets. + MemoryDef *LastMemDef = nullptr; for (++BI; !BI->isTerminator(); ++BI) { - if (MSSAU) { - auto *CurrentAcc = cast_or_null<MemoryUseOrDef>( - MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); - if (CurrentAcc) { - MemInsertPoint = CurrentAcc; - if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc)) - LastMemDef = CurrentDef; - } - } - + if (MSSAU) { + auto *CurrentAcc = cast_or_null<MemoryUseOrDef>( + MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); + if (CurrentAcc) { + MemInsertPoint = CurrentAcc; + if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc)) + LastMemDef = CurrentDef; + } + } + if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: @@ -411,15 +411,15 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; - Value *StoredVal = NextStore->getValueOperand(); - - // Don't convert stores of non-integral pointer types to memsets (which - // stores integers). - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) - break; - + Value *StoredVal = NextStore->getValueOperand(); + + // Don't convert stores of non-integral pointer types to memsets (which + // stores integers). + if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) + break; + // Check to see if this stored value is of the same byte-splattable value. - Value *StoredByte = isBytewiseValue(StoredVal, DL); + Value *StoredByte = isBytewiseValue(StoredVal, DL); if (isa<UndefValue>(ByteVal) && StoredByte) ByteVal = StoredByte; if (ByteVal != StoredByte) @@ -486,24 +486,24 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); - if (MSSAU) { - assert(LastMemDef && MemInsertPoint && - "Both LastMemDef and MemInsertPoint need to be set"); - auto *NewDef = - cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI - ? MSSAU->createMemoryAccessBefore( - AMemSet, LastMemDef, MemInsertPoint) - : MSSAU->createMemoryAccessAfter( - AMemSet, LastMemDef, MemInsertPoint)); - MSSAU->insertDef(NewDef, /*RenameUses=*/true); - LastMemDef = NewDef; - MemInsertPoint = NewDef; - } - + if (MSSAU) { + assert(LastMemDef && MemInsertPoint && + "Both LastMemDef and MemInsertPoint need to be set"); + auto *NewDef = + cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI + ? MSSAU->createMemoryAccessBefore( + AMemSet, LastMemDef, MemInsertPoint) + : MSSAU->createMemoryAccessAfter( + AMemSet, LastMemDef, MemInsertPoint)); + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + LastMemDef = NewDef; + MemInsertPoint = NewDef; + } + // Zap all the stores. - for (Instruction *SI : Range.TheStores) - eraseInstruction(SI); - + for (Instruction *SI : Range.TheStores) + eraseInstruction(SI); + ++NumMemSetInfer; } @@ -514,10 +514,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // It will lift the store and its argument + that anything that // may alias with these. // The method returns true if it was successful. -bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { +bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { // If the store alias this position, early bail out. MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc))) + if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc))) return false; // Keep track of the arguments of all instruction we plan to lift @@ -528,7 +528,7 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { Args.insert(Ptr); // Instruction to lift before P. - SmallVector<Instruction *, 8> ToLift{SI}; + SmallVector<Instruction *, 8> ToLift{SI}; // Memory locations of lifted instructions. SmallVector<MemoryLocation, 8> MemLocs{StoreLoc}; @@ -541,24 +541,24 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) { auto *C = &*I; - // Make sure hoisting does not perform a store that was not guaranteed to - // happen. - if (!isGuaranteedToTransferExecutionToSuccessor(C)) - return false; - - bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None)); + // Make sure hoisting does not perform a store that was not guaranteed to + // happen. + if (!isGuaranteedToTransferExecutionToSuccessor(C)) + return false; + bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None)); + bool NeedLift = false; if (Args.erase(C)) NeedLift = true; else if (MayAlias) { - NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) { - return isModOrRefSet(AA->getModRefInfo(C, ML)); + NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) { + return isModOrRefSet(AA->getModRefInfo(C, ML)); }); if (!NeedLift) - NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) { - return isModOrRefSet(AA->getModRefInfo(C, Call)); + NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) { + return isModOrRefSet(AA->getModRefInfo(C, Call)); }); } @@ -568,18 +568,18 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { if (MayAlias) { // Since LI is implicitly moved downwards past the lifted instructions, // none of them may modify its source. - if (isModSet(AA->getModRefInfo(C, LoadLoc))) + if (isModSet(AA->getModRefInfo(C, LoadLoc))) return false; else if (const auto *Call = dyn_cast<CallBase>(C)) { // If we can't lift this before P, it's game over. - if (isModOrRefSet(AA->getModRefInfo(P, Call))) + if (isModOrRefSet(AA->getModRefInfo(P, Call))) return false; Calls.push_back(Call); } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) { // If we can't lift this before P, it's game over. auto ML = MemoryLocation::get(C); - if (isModOrRefSet(AA->getModRefInfo(P, ML))) + if (isModOrRefSet(AA->getModRefInfo(P, ML))) return false; MemLocs.push_back(ML); @@ -599,40 +599,40 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { } } - // Find MSSA insertion point. Normally P will always have a corresponding - // memory access before which we can insert. However, with non-standard AA - // pipelines, there may be a mismatch between AA and MSSA, in which case we - // will scan for a memory access before P. In either case, we know for sure - // that at least the load will have a memory access. - // TODO: Simplify this once P will be determined by MSSA, in which case the - // discrepancy can no longer occur. - MemoryUseOrDef *MemInsertPoint = nullptr; - if (MSSAU) { - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { - MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator()); - } else { - const Instruction *ConstP = P; - for (const Instruction &I : make_range(++ConstP->getReverseIterator(), - ++LI->getReverseIterator())) { - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { - MemInsertPoint = MA; - break; - } - } - } - } - - // We made it, we need to lift. + // Find MSSA insertion point. Normally P will always have a corresponding + // memory access before which we can insert. However, with non-standard AA + // pipelines, there may be a mismatch between AA and MSSA, in which case we + // will scan for a memory access before P. In either case, we know for sure + // that at least the load will have a memory access. + // TODO: Simplify this once P will be determined by MSSA, in which case the + // discrepancy can no longer occur. + MemoryUseOrDef *MemInsertPoint = nullptr; + if (MSSAU) { + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { + MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator()); + } else { + const Instruction *ConstP = P; + for (const Instruction &I : make_range(++ConstP->getReverseIterator(), + ++LI->getReverseIterator())) { + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + MemInsertPoint = MA; + break; + } + } + } + } + + // We made it, we need to lift. for (auto *I : llvm::reverse(ToLift)) { LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); I->moveBefore(P); - if (MSSAU) { - assert(MemInsertPoint && "Must have found insert point"); - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { - MSSAU->moveAfter(MA, MemInsertPoint); - MemInsertPoint = MA; - } - } + if (MSSAU) { + assert(MemInsertPoint && "Must have found insert point"); + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { + MSSAU->moveAfter(MA, MemInsertPoint); + MemInsertPoint = MA; + } + } } return true; @@ -652,15 +652,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { const DataLayout &DL = SI->getModule()->getDataLayout(); - Value *StoredVal = SI->getValueOperand(); - - // Not all the transforms below are correct for non-integral pointers, bail - // until we've audited the individual pieces. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) - return false; - + Value *StoredVal = SI->getValueOperand(); + + // Not all the transforms below are correct for non-integral pointers, bail + // until we've audited the individual pieces. + if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) + return false; + // Load to store forwarding can be interpreted as memcpy. - if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { + if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { @@ -672,10 +672,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // the memory we load from in between the load and the store. If // such an instruction is found, we try to promote there instead // of at the store position. - // TODO: Can use MSSA for this. + // TODO: Can use MSSA for this. Instruction *P = SI; for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { - if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { + if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { P = &I; break; } @@ -686,7 +686,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // position if nothing alias the store memory after this and the store // destination is not in the range. if (P && P != SI) { - if (!moveUp(SI, P, LI)) + if (!moveUp(SI, P, LI)) P = nullptr; } @@ -697,7 +697,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // memmove must be used to preserve semantic. If not, memcpy can // be used. bool UseMemMove = false; - if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc)) + if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc)) UseMemMove = true; uint64_t Size = DL.getTypeStoreSize(T); @@ -716,16 +716,16 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); - if (MSSAU) { - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); - auto *NewAccess = - MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - - eraseInstruction(SI); - eraseInstruction(LI); + if (MSSAU) { + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + + eraseInstruction(SI); + eraseInstruction(LI); ++NumMemCpyInstr; // Make sure we do not invalidate the iterator. @@ -738,49 +738,49 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // happen to be using a load-store pair to implement it, rather than // a memcpy. CallInst *C = nullptr; - if (EnableMemorySSA) { - if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( - MSSA->getWalker()->getClobberingMemoryAccess(LI))) { - // The load most post-dom the call. Limit to the same block for now. - // TODO: Support non-local call-slot optimization? - if (LoadClobber->getBlock() == SI->getParent()) - C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); - } - } else { - MemDepResult ldep = MD->getDependency(LI); - if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) - C = dyn_cast<CallInst>(ldep.getInst()); - } + if (EnableMemorySSA) { + if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( + MSSA->getWalker()->getClobberingMemoryAccess(LI))) { + // The load most post-dom the call. Limit to the same block for now. + // TODO: Support non-local call-slot optimization? + if (LoadClobber->getBlock() == SI->getParent()) + C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); + } + } else { + MemDepResult ldep = MD->getDependency(LI); + if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) + C = dyn_cast<CallInst>(ldep.getInst()); + } if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (EnableMemorySSA) { - if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), - MSSA->getMemoryAccess(SI))) + if (EnableMemorySSA) { + if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), + MSSA->getMemoryAccess(SI))) C = nullptr; - } else { - for (BasicBlock::iterator I = --SI->getIterator(), - E = C->getIterator(); - I != E; --I) { - if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { - C = nullptr; - break; - } + } else { + for (BasicBlock::iterator I = --SI->getIterator(), + E = C->getIterator(); + I != E; --I) { + if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { + C = nullptr; + break; + } } } } if (C) { bool changed = performCallSlotOptzn( - LI, SI, SI->getPointerOperand()->stripPointerCasts(), + LI, SI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), commonAlignment(SI->getAlign(), LI->getAlign()), C); if (changed) { - eraseInstruction(SI); - eraseInstruction(LI); + eraseInstruction(SI); + eraseInstruction(LI); ++NumMemCpyInstr; return true; } @@ -814,15 +814,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); - if (MSSAU) { - assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI))); - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - - eraseInstruction(SI); + if (MSSAU) { + assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI))); + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + + eraseInstruction(SI); NumMemSetInfer++; // Make sure we do not invalidate the iterator. @@ -849,8 +849,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { /// Takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. -bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, - Instruction *cpyStore, Value *cpyDest, +bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, + Instruction *cpyStore, Value *cpyDest, Value *cpySrc, uint64_t cpyLen, Align cpyAlign, CallInst *C) { // The general transformation to keep in mind is @@ -881,7 +881,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, if (!srcArraySize) return false; - const DataLayout &DL = cpyLoad->getModule()->getDataLayout(); + const DataLayout &DL = cpyLoad->getModule()->getDataLayout(); uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); @@ -891,25 +891,25 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. - if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen), - DL, C, DT)) - return false; - - // Make sure that nothing can observe cpyDest being written early. There are - // a number of cases to consider: - // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of - // the transform. - // 2. C itself may not access cpyDest (prior to the transform). This is - // checked further below. - // 3. If cpyDest is accessible to the caller of this function (potentially - // captured and not based on an alloca), we need to ensure that we cannot - // unwind between C and cpyStore. This is checked here. - // 4. If cpyDest is potentially captured, there may be accesses to it from - // another thread. In this case, we need to check that cpyStore is - // guaranteed to be executed if C is. As it is a non-atomic access, it - // renders accesses from other threads undefined. - // TODO: This is currently not checked. - if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) + if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen), + DL, C, DT)) + return false; + + // Make sure that nothing can observe cpyDest being written early. There are + // a number of cases to consider: + // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of + // the transform. + // 2. C itself may not access cpyDest (prior to the transform). This is + // checked further below. + // 3. If cpyDest is accessible to the caller of this function (potentially + // captured and not based on an alloca), we need to ensure that we cannot + // unwind between C and cpyStore. This is checked here. + // 4. If cpyDest is potentially captured, there may be accesses to it from + // another thread. In this case, we need to check that cpyStore is + // guaranteed to be executed if C is. As it is a non-atomic access, it + // renders accesses from other threads undefined. + // TODO: This is currently not checked. + if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) return false; // Check that dest points to memory that is at least as aligned as src. @@ -924,26 +924,26 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. - SmallVector<User *, 8> srcUseList(srcAlloca->users()); + SmallVector<User *, 8> srcUseList(srcAlloca->users()); while (!srcUseList.empty()) { User *U = srcUseList.pop_back_val(); if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) { - append_range(srcUseList, U->users()); + append_range(srcUseList, U->users()); continue; } if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) { if (!G->hasAllZeroIndices()) return false; - append_range(srcUseList, U->users()); + append_range(srcUseList, U->users()); continue; } if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U)) if (IT->isLifetimeStartOrEnd()) continue; - if (U != C && U != cpyLoad) + if (U != C && U != cpyLoad) return false; } @@ -955,24 +955,24 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. - if (!DT->dominates(cpyDest, C)) { - // Support moving a constant index GEP before the call. - auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); - if (GEP && GEP->hasAllConstantIndices() && - DT->dominates(GEP->getPointerOperand(), C)) - GEP->moveBefore(C); - else + if (!DT->dominates(cpyDest, C)) { + // Support moving a constant index GEP before the call. + auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); + if (GEP && GEP->hasAllConstantIndices() && + DT->dominates(GEP->getPointerOperand(), C)) + GEP->moveBefore(C); + else return false; - } + } // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. - ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize)); + ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize)); // If necessary, perform additional analysis. if (isModOrRefSet(MR)) - MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT); + MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT); if (isModOrRefSet(MR)) return false; @@ -1014,8 +1014,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. - if (MD) - MD->removeInstruction(C); + if (MD) + MD->removeInstruction(C); // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be @@ -1024,9 +1024,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, LLVMContext::MD_noalias, LLVMContext::MD_invariant_group, LLVMContext::MD_access_group}; - combineMetadata(C, cpyLoad, KnownIDs, true); + combineMetadata(C, cpyLoad, KnownIDs, true); - ++NumCallSlot; + ++NumCallSlot; return true; } @@ -1063,28 +1063,28 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // // TODO: If the code between M and MDep is transparent to the destination "c", // then we could still perform the xform by moving M up to the first memcpy. - if (EnableMemorySSA) { - // TODO: It would be sufficient to check the MDep source up to the memcpy - // size of M, rather than MDep. - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), - MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) - return false; - } else { - // NOTE: This is conservative, it will stop on any read from the source loc, - // not just the defining memcpy. - MemDepResult SourceDep = - MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, - M->getIterator(), M->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; - } + if (EnableMemorySSA) { + // TODO: It would be sufficient to check the MDep source up to the memcpy + // size of M, rather than MDep. + if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) + return false; + } else { + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + M->getIterator(), M->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + } // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. bool UseMemMove = false; - if (!AA->isNoAlias(MemoryLocation::getForDest(M), - MemoryLocation::getForSource(MDep))) + if (!AA->isNoAlias(MemoryLocation::getForDest(M), + MemoryLocation::getForSource(MDep))) UseMemMove = true; // If all checks passed, then we can transform M. @@ -1094,25 +1094,25 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. IRBuilder<> Builder(M); - Instruction *NewM; + Instruction *NewM; if (UseMemMove) - NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), - MDep->getRawSource(), MDep->getSourceAlign(), - M->getLength(), M->isVolatile()); + NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), + MDep->getRawSource(), MDep->getSourceAlign(), + M->getLength(), M->isVolatile()); else - NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), - MDep->getRawSource(), MDep->getSourceAlign(), - M->getLength(), M->isVolatile()); - - if (MSSAU) { - assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M))); - auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - + NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), + MDep->getRawSource(), MDep->getSourceAlign(), + M->getLength(), M->isVolatile()); + + if (MSSAU) { + assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M))); + auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + // Remove the instruction we're replacing. - eraseInstruction(M); + eraseInstruction(M); ++NumMemCpyInstr; return true; } @@ -1137,41 +1137,41 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, if (MemSet->getDest() != MemCpy->getDest()) return false; - // Check that src and dst of the memcpy aren't the same. While memcpy - // operands cannot partially overlap, exact equality is allowed. - if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(), - LocationSize::precise(1)), - MemoryLocation(MemCpy->getDest(), - LocationSize::precise(1)))) + // Check that src and dst of the memcpy aren't the same. While memcpy + // operands cannot partially overlap, exact equality is allowed. + if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(), + LocationSize::precise(1)), + MemoryLocation(MemCpy->getDest(), + LocationSize::precise(1)))) return false; - if (EnableMemorySSA) { - // We know that dst up to src_size is not written. We now need to make sure - // that dst up to dst_size is not accessed. (If we did not move the memset, - // checking for reads would be sufficient.) - if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet), - MSSA->getMemoryAccess(MemSet), - MSSA->getMemoryAccess(MemCpy))) { - return false; - } - } else { - // We have already checked that dst up to src_size is not accessed. We - // need to make sure that there are no accesses up to dst_size either. - MemDepResult DstDepInfo = MD->getPointerDependencyFrom( - MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(), - MemCpy->getParent()); - if (DstDepInfo.getInst() != MemSet) - return false; - } - + if (EnableMemorySSA) { + // We know that dst up to src_size is not written. We now need to make sure + // that dst up to dst_size is not accessed. (If we did not move the memset, + // checking for reads would be sufficient.) + if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet), + MSSA->getMemoryAccess(MemSet), + MSSA->getMemoryAccess(MemCpy))) { + return false; + } + } else { + // We have already checked that dst up to src_size is not accessed. We + // need to make sure that there are no accesses up to dst_size either. + MemDepResult DstDepInfo = MD->getPointerDependencyFrom( + MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(), + MemCpy->getParent()); + if (DstDepInfo.getInst() != MemSet) + return false; + } + // Use the same i8* dest as the memcpy, killing the memset dest if different. Value *Dest = MemCpy->getRawDest(); Value *DestSize = MemSet->getLength(); Value *SrcSize = MemCpy->getLength(); - if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy)) - return false; - + if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy)) + return false; + // By default, create an unaligned memset. unsigned Align = 1; // If Dest is aligned, and SrcSize is constant, use the minimum alignment @@ -1197,25 +1197,25 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); Value *MemsetLen = Builder.CreateSelect( Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); - Instruction *NewMemSet = Builder.CreateMemSet( + Instruction *NewMemSet = Builder.CreateMemSet( Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest, SrcSize), MemSet->getOperand(1), MemsetLen, MaybeAlign(Align)); - if (MSSAU) { - assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && - "MemCpy must be a MemoryDef"); - // The new memset is inserted after the memcpy, but it is known that its - // defining access is the memset about to be removed which immediately - // precedes the memcpy. - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); - auto *NewAccess = MSSAU->createMemoryAccessBefore( - NewMemSet, LastDef->getDefiningAccess(), LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - - eraseInstruction(MemSet); + if (MSSAU) { + assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && + "MemCpy must be a MemoryDef"); + // The new memset is inserted after the memcpy, but it is known that its + // defining access is the memset about to be removed which immediately + // precedes the memcpy. + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessBefore( + NewMemSet, LastDef->getDefiningAccess(), LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + + eraseInstruction(MemSet); return true; } @@ -1234,24 +1234,24 @@ static bool hasUndefContents(Instruction *I, ConstantInt *Size) { return false; } -static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, - MemoryDef *Def, ConstantInt *Size) { - if (MSSA->isLiveOnEntryDef(Def)) - return isa<AllocaInst>(getUnderlyingObject(V)); - - if (IntrinsicInst *II = - dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start) { - ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0)); - if (AA->isMustAlias(V, II->getArgOperand(1)) && - LTSize->getZExtValue() >= Size->getZExtValue()) - return true; - } - } - - return false; -} - +static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, + MemoryDef *Def, ConstantInt *Size) { + if (MSSA->isLiveOnEntryDef(Def)) + return isa<AllocaInst>(getUnderlyingObject(V)); + + if (IntrinsicInst *II = + dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0)); + if (AA->isMustAlias(V, II->getArgOperand(1)) && + LTSize->getZExtValue() >= Size->getZExtValue()) + return true; + } + } + + return false; +} + /// Transform memcpy to memset when its source was just memset. /// In other words, turn: /// \code @@ -1270,7 +1270,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet) { // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and // memcpying from the same address. Otherwise it is hard to reason about. - if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) + if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) return false; // A known memset size is required. @@ -1287,37 +1287,37 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, // interested in the bytes from MemSetSize..CopySize here, but as we can't // easily represent this location, we use the full 0..CopySize range. MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); - bool CanReduceSize = false; - if (EnableMemorySSA) { - MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); - MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - MemSetAccess->getDefiningAccess(), MemCpyLoc); - if (auto *MD = dyn_cast<MemoryDef>(Clobber)) - if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize)) - CanReduceSize = true; - } else { - MemDepResult DepInfo = MD->getPointerDependencyFrom( - MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); - if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) - CanReduceSize = true; - } - - if (!CanReduceSize) + bool CanReduceSize = false; + if (EnableMemorySSA) { + MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + MemSetAccess->getDefiningAccess(), MemCpyLoc); + if (auto *MD = dyn_cast<MemoryDef>(Clobber)) + if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize)) + CanReduceSize = true; + } else { + MemDepResult DepInfo = MD->getPointerDependencyFrom( + MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); + if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) + CanReduceSize = true; + } + + if (!CanReduceSize) return false; - CopySize = MemSetSize; + CopySize = MemSetSize; } IRBuilder<> Builder(MemCpy); - Instruction *NewM = - Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), - CopySize, MaybeAlign(MemCpy->getDestAlignment())); - if (MSSAU) { - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - + Instruction *NewM = + Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), + CopySize, MaybeAlign(MemCpy->getDestAlignment())); + if (MSSAU) { + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + return true; } @@ -1333,7 +1333,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // If the source and destination of the memcpy are the same, then zap it. if (M->getSource() == M->getDest()) { ++BBI; - eraseInstruction(M); + eraseInstruction(M); return true; } @@ -1343,157 +1343,157 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), M->getModule()->getDataLayout())) { IRBuilder<> Builder(M); - Instruction *NewM = - Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), - MaybeAlign(M->getDestAlignment()), false); - if (MSSAU) { - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); - auto *NewAccess = - MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - - eraseInstruction(M); + Instruction *NewM = + Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), + MaybeAlign(M->getDestAlignment()), false); + if (MSSAU) { + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + + eraseInstruction(M); ++NumCpyToSet; return true; } - if (EnableMemorySSA) { - MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); - MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); - MemoryLocation DestLoc = MemoryLocation::getForDest(M); - const MemoryAccess *DestClobber = - MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); - - // Try to turn a partially redundant memset + memcpy into - // memcpy + smaller memset. We don't need the memcpy size for this. - // The memcpy most post-dom the memset, so limit this to the same basic - // block. A non-local generalization is likely not worthwhile. - if (auto *MD = dyn_cast<MemoryDef>(DestClobber)) - if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst())) - if (DestClobber->getBlock() == M->getParent()) - if (processMemSetMemCpyDependence(M, MDep)) - return true; - - // The optimizations after this point require the memcpy size. - ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); - if (!CopySize) return false; - - MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( - AnyClobber, MemoryLocation::getForSource(M)); - - // There are four possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE. - // b) call-memcpy xform for return slot optimization. - // c) memcpy from freshly alloca'd space or space that has just started - // its lifetime copies undefined data, and we can therefore eliminate - // the memcpy in favor of the data that was already at the destination. - // d) memcpy from a just-memset'd source can be turned into memset. - if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { - if (Instruction *MI = MD->getMemoryInst()) { - if (auto *C = dyn_cast<CallInst>(MI)) { - // The memcpy must post-dom the call. Limit to the same block for now. - // Additionally, we need to ensure that there are no accesses to dest - // between the call and the memcpy. Accesses to src will be checked - // by performCallSlotOptzn(). - // TODO: Support non-local call-slot optimization? - if (C->getParent() == M->getParent() && - !accessedBetween(*AA, DestLoc, MD, MA)) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, C)) { - LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" - << " call: " << *C << "\n" - << " memcpy: " << *M << "\n"); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - } - } - if (auto *MDep = dyn_cast<MemCpyInst>(MI)) - return processMemCpyMemCpyDependence(M, MDep); - if (auto *MDep = dyn_cast<MemSetInst>(MI)) { - if (performMemCpyToMemSetOptzn(M, MDep)) { - LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); - eraseInstruction(M); - ++NumCpyToSet; - return true; - } - } - } - - if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) { - LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); - eraseInstruction(M); - ++NumMemCpyInstr; + if (EnableMemorySSA) { + MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); + MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); + MemoryLocation DestLoc = MemoryLocation::getForDest(M); + const MemoryAccess *DestClobber = + MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); + + // Try to turn a partially redundant memset + memcpy into + // memcpy + smaller memset. We don't need the memcpy size for this. + // The memcpy most post-dom the memset, so limit this to the same basic + // block. A non-local generalization is likely not worthwhile. + if (auto *MD = dyn_cast<MemoryDef>(DestClobber)) + if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst())) + if (DestClobber->getBlock() == M->getParent()) + if (processMemSetMemCpyDependence(M, MDep)) + return true; + + // The optimizations after this point require the memcpy size. + ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); + if (!CopySize) return false; + + MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( + AnyClobber, MemoryLocation::getForSource(M)); + + // There are four possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + // c) memcpy from freshly alloca'd space or space that has just started + // its lifetime copies undefined data, and we can therefore eliminate + // the memcpy in favor of the data that was already at the destination. + // d) memcpy from a just-memset'd source can be turned into memset. + if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { + if (Instruction *MI = MD->getMemoryInst()) { + if (auto *C = dyn_cast<CallInst>(MI)) { + // The memcpy must post-dom the call. Limit to the same block for now. + // Additionally, we need to ensure that there are no accesses to dest + // between the call and the memcpy. Accesses to src will be checked + // by performCallSlotOptzn(). + // TODO: Support non-local call-slot optimization? + if (C->getParent() == M->getParent() && + !accessedBetween(*AA, DestLoc, MD, MA)) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), Alignment, C)) { + LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" + << " call: " << *C << "\n" + << " memcpy: " << *M << "\n"); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + } + } + if (auto *MDep = dyn_cast<MemCpyInst>(MI)) + return processMemCpyMemCpyDependence(M, MDep); + if (auto *MDep = dyn_cast<MemSetInst>(MI)) { + if (performMemCpyToMemSetOptzn(M, MDep)) { + LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); + eraseInstruction(M); + ++NumCpyToSet; + return true; + } + } + } + + if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) { + LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); + eraseInstruction(M); + ++NumMemCpyInstr; return true; } } - } else { - MemDepResult DepInfo = MD->getDependency(M); - - // Try to turn a partially redundant memset + memcpy into - // memcpy + smaller memset. We don't need the memcpy size for this. - if (DepInfo.isClobber()) - if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst())) - if (processMemSetMemCpyDependence(M, MDep)) - return true; - - // The optimizations after this point require the memcpy size. - ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); - if (!CopySize) return false; - - // There are four possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE. - // b) call-memcpy xform for return slot optimization. - // c) memcpy from freshly alloca'd space or space that has just started - // its lifetime copies undefined data, and we can therefore eliminate - // the memcpy in favor of the data that was already at the destination. - // d) memcpy from a just-memset'd source can be turned into memset. - if (DepInfo.isClobber()) { - if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, C)) { - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - } + } else { + MemDepResult DepInfo = MD->getDependency(M); + + // Try to turn a partially redundant memset + memcpy into + // memcpy + smaller memset. We don't need the memcpy size for this. + if (DepInfo.isClobber()) + if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst())) + if (processMemSetMemCpyDependence(M, MDep)) + return true; + + // The optimizations after this point require the memcpy size. + ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); + if (!CopySize) return false; + + // There are four possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + // c) memcpy from freshly alloca'd space or space that has just started + // its lifetime copies undefined data, and we can therefore eliminate + // the memcpy in favor of the data that was already at the destination. + // d) memcpy from a just-memset'd source can be turned into memset. + if (DepInfo.isClobber()) { + if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), Alignment, C)) { + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + } } - MemoryLocation SrcLoc = MemoryLocation::getForSource(M); - MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( - SrcLoc, true, M->getIterator(), M->getParent()); - - if (SrcDepInfo.isClobber()) { - if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) - return processMemCpyMemCpyDependence(M, MDep); - } else if (SrcDepInfo.isDef()) { - if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { - eraseInstruction(M); - ++NumMemCpyInstr; + MemoryLocation SrcLoc = MemoryLocation::getForSource(M); + MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( + SrcLoc, true, M->getIterator(), M->getParent()); + + if (SrcDepInfo.isClobber()) { + if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) + return processMemCpyMemCpyDependence(M, MDep); + } else if (SrcDepInfo.isDef()) { + if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { + eraseInstruction(M); + ++NumMemCpyInstr; return true; } - } - - if (SrcDepInfo.isClobber()) - if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst())) - if (performMemCpyToMemSetOptzn(M, MDep)) { - eraseInstruction(M); - ++NumCpyToSet; - return true; - } - } - + } + + if (SrcDepInfo.isClobber()) + if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst())) + if (performMemCpyToMemSetOptzn(M, MDep)) { + eraseInstruction(M); + ++NumCpyToSet; + return true; + } + } + return false; } @@ -1504,8 +1504,8 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) { return false; // See if the pointers alias. - if (!AA->isNoAlias(MemoryLocation::getForDest(M), - MemoryLocation::getForSource(M))) + if (!AA->isNoAlias(MemoryLocation::getForDest(M), + MemoryLocation::getForSource(M))) return false; LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M @@ -1518,13 +1518,13 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) { M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys)); - // For MemorySSA nothing really changes (except that memcpy may imply stricter - // aliasing guarantees). - + // For MemorySSA nothing really changes (except that memcpy may imply stricter + // aliasing guarantees). + // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. - if (MD) - MD->removeInstruction(M); + if (MD) + MD->removeInstruction(M); ++NumMoveToCpy; return true; @@ -1537,21 +1537,21 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { Value *ByValArg = CB.getArgOperand(ArgNo); Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); - MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); - MemCpyInst *MDep = nullptr; - if (EnableMemorySSA) { - MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); - MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - CallAccess->getDefiningAccess(), Loc); - if (auto *MD = dyn_cast<MemoryDef>(Clobber)) - MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); - } else { - MemDepResult DepInfo = MD->getPointerDependencyFrom( - Loc, true, CB.getIterator(), CB.getParent()); - if (!DepInfo.isClobber()) - return false; - MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); - } + MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); + MemCpyInst *MDep = nullptr; + if (EnableMemorySSA) { + MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + CallAccess->getDefiningAccess(), Loc); + if (auto *MD = dyn_cast<MemoryDef>(Clobber)) + MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); + } else { + MemDepResult DepInfo = MD->getPointerDependencyFrom( + Loc, true, CB.getIterator(), CB.getParent()); + if (!DepInfo.isClobber()) + return false; + MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); + } // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by // a memcpy, see if we can byval from the source of the memcpy instead of the @@ -1574,8 +1574,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { // source of the memcpy to the alignment we need. If we fail, we bail out. MaybeAlign MemDepAlign = MDep->getSourceAlign(); if ((!MemDepAlign || *MemDepAlign < *ByValAlign) && - getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC, - DT) < *ByValAlign) + getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC, + DT) < *ByValAlign) return false; // The address space of the memcpy source must match the byval argument @@ -1589,19 +1589,19 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). - if (EnableMemorySSA) { - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), - MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) - return false; - } else { - // NOTE: This is conservative, it will stop on any read from the source loc, - // not just the defining memcpy. - MemDepResult SourceDep = MD->getPointerDependencyFrom( - MemoryLocation::getForSource(MDep), false, - CB.getIterator(), MDep->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; - } + if (EnableMemorySSA) { + if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) + return false; + } else { + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = MD->getPointerDependencyFrom( + MemoryLocation::getForSource(MDep), false, + CB.getIterator(), MDep->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + } Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) { @@ -1632,7 +1632,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) { // instruction in a BB can't be dominated by a later instruction in the // same BB (which is a scenario that can happen for an unreachable BB that // has itself as a predecessor). - if (!DT->isReachableFromEntry(&BB)) + if (!DT->isReachableFromEntry(&BB)) continue; for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { @@ -1668,43 +1668,43 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) { } PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { - auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F) - : AM.getCachedResult<MemoryDependenceAnalysis>(F); + auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F) + : AM.getCachedResult<MemoryDependenceAnalysis>(F); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); - auto *AA = &AM.getResult<AAManager>(F); - auto *AC = &AM.getResult<AssumptionAnalysis>(F); - auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F) - : AM.getCachedResult<MemorySSAAnalysis>(F); - - bool MadeChange = - runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); + auto *AA = &AM.getResult<AAManager>(F); + auto *AC = &AM.getResult<AssumptionAnalysis>(F); + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F) + : AM.getCachedResult<MemorySSAAnalysis>(F); + + bool MadeChange = + runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); if (!MadeChange) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); PA.preserve<GlobalsAA>(); - if (MD) - PA.preserve<MemoryDependenceAnalysis>(); - if (MSSA) - PA.preserve<MemorySSAAnalysis>(); + if (MD) + PA.preserve<MemoryDependenceAnalysis>(); + if (MSSA) + PA.preserve<MemorySSAAnalysis>(); return PA; } -bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, - TargetLibraryInfo *TLI_, AliasAnalysis *AA_, - AssumptionCache *AC_, DominatorTree *DT_, - MemorySSA *MSSA_) { +bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, + TargetLibraryInfo *TLI_, AliasAnalysis *AA_, + AssumptionCache *AC_, DominatorTree *DT_, + MemorySSA *MSSA_) { bool MadeChange = false; MD = MD_; TLI = TLI_; - AA = AA_; - AC = AC_; - DT = DT_; - MSSA = MSSA_; - MemorySSAUpdater MSSAU_(MSSA_); - MSSAU = MSSA_ ? &MSSAU_ : nullptr; + AA = AA_; + AC = AC_; + DT = DT_; + MSSA = MSSA_; + MemorySSAUpdater MSSAU_(MSSA_); + MSSAU = MSSA_ ? &MSSAU_ : nullptr; // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. @@ -1717,9 +1717,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, MadeChange = true; } - if (MSSA_ && VerifyMemorySSA) - MSSA_->verifyMemorySSA(); - + if (MSSA_ && VerifyMemorySSA) + MSSA_->verifyMemorySSA(); + MD = nullptr; return MadeChange; } @@ -1729,17 +1729,17 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; - auto *MDWP = !EnableMemorySSA - ? &getAnalysis<MemoryDependenceWrapperPass>() - : getAnalysisIfAvailable<MemoryDependenceWrapperPass>(); + auto *MDWP = !EnableMemorySSA + ? &getAnalysis<MemoryDependenceWrapperPass>() + : getAnalysisIfAvailable<MemoryDependenceWrapperPass>(); auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *MSSAWP = EnableMemorySSA - ? &getAnalysis<MemorySSAWrapperPass>() - : getAnalysisIfAvailable<MemorySSAWrapperPass>(); - - return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT, - MSSAWP ? &MSSAWP->getMSSA() : nullptr); + auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *MSSAWP = EnableMemorySSA + ? &getAnalysis<MemorySSAWrapperPass>() + : getAnalysisIfAvailable<MemorySSAWrapperPass>(); + + return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT, + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp index 7f8b75ac88..5389d41e62 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp @@ -372,7 +372,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block, } else { // In this case, we expect a constant incoming value (the comparison is // chained). - const auto *const Const = cast<ConstantInt>(Val); + const auto *const Const = cast<ConstantInt>(Val); LLVM_DEBUG(dbgs() << "const\n"); if (!Const->isZero()) return {}; LLVM_DEBUG(dbgs() << "false\n"); @@ -624,17 +624,17 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons, Value *IsEqual = nullptr; LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> " << BB->getName() << "\n"); - - // If there is one block that requires splitting, we do it now, i.e. - // just before we know we will collapse the chain. The instructions - // can be executed before any of the instructions in the chain. - const auto ToSplit = llvm::find_if( - Comparisons, [](const BCECmpBlock &B) { return B.RequireSplit; }); - if (ToSplit != Comparisons.end()) { - LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); - ToSplit->split(BB, AA); - } - + + // If there is one block that requires splitting, we do it now, i.e. + // just before we know we will collapse the chain. The instructions + // can be executed before any of the instructions in the chain. + const auto ToSplit = llvm::find_if( + Comparisons, [](const BCECmpBlock &B) { return B.RequireSplit; }); + if (ToSplit != Comparisons.end()) { + LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); + ToSplit->split(BB, AA); + } + if (Comparisons.size() == 1) { LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); Value *const LhsLoad = diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp index 32bb62129e..dd2830026c 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp @@ -219,27 +219,27 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // Process the basic blocks in a depth first traversal of the dominator // tree. This order ensures that all bases of a candidate are in Candidates // when we process it. - SmallVector<WeakTrackingVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; for (const auto Node : depth_first(DT)) { BasicBlock *BB = Node->getBlock(); for (auto I = BB->begin(); I != BB->end(); ++I) { - Instruction *OrigI = &*I; - const SCEV *OrigSCEV = nullptr; - if (Instruction *NewI = tryReassociate(OrigI, OrigSCEV)) { - Changed = true; - OrigI->replaceAllUsesWith(NewI); - - // Add 'OrigI' to the list of dead instructions. - DeadInsts.push_back(WeakTrackingVH(OrigI)); - // Add the rewritten instruction to SeenExprs; the original - // instruction is deleted. - const SCEV *NewSCEV = SE->getSCEV(NewI); - SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI)); - + Instruction *OrigI = &*I; + const SCEV *OrigSCEV = nullptr; + if (Instruction *NewI = tryReassociate(OrigI, OrigSCEV)) { + Changed = true; + OrigI->replaceAllUsesWith(NewI); + + // Add 'OrigI' to the list of dead instructions. + DeadInsts.push_back(WeakTrackingVH(OrigI)); + // Add the rewritten instruction to SeenExprs; the original + // instruction is deleted. + const SCEV *NewSCEV = SE->getSCEV(NewI); + SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI)); + // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) // is equivalent to I. However, ScalarEvolution::getSCEV may - // weaken nsw causing NewSCEV not to equal OldSCEV. For example, - // suppose we reassociate + // weaken nsw causing NewSCEV not to equal OldSCEV. For example, + // suppose we reassociate // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4 // to // NewI = &a[sext(i)] + sext(j). @@ -253,47 +253,47 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can // map both SCEV before and after tryReassociate(I) to I. // - // This improvement is exercised in @reassociate_gep_nsw in - // nary-gep.ll. - if (NewSCEV != OrigSCEV) - SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI)); - } else if (OrigSCEV) - SeenExprs[OrigSCEV].push_back(WeakTrackingVH(OrigI)); + // This improvement is exercised in @reassociate_gep_nsw in + // nary-gep.ll. + if (NewSCEV != OrigSCEV) + SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI)); + } else if (OrigSCEV) + SeenExprs[OrigSCEV].push_back(WeakTrackingVH(OrigI)); } } - // Delete all dead instructions from 'DeadInsts'. - // Please note ScalarEvolution is updated along the way. - RecursivelyDeleteTriviallyDeadInstructionsPermissive( - DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); }); - + // Delete all dead instructions from 'DeadInsts'. + // Please note ScalarEvolution is updated along the way. + RecursivelyDeleteTriviallyDeadInstructionsPermissive( + DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); }); + return Changed; } -Instruction *NaryReassociatePass::tryReassociate(Instruction * I, - const SCEV *&OrigSCEV) { - - if (!SE->isSCEVable(I->getType())) - return nullptr; - +Instruction *NaryReassociatePass::tryReassociate(Instruction * I, + const SCEV *&OrigSCEV) { + + if (!SE->isSCEVable(I->getType())) + return nullptr; + switch (I->getOpcode()) { case Instruction::Add: case Instruction::Mul: - OrigSCEV = SE->getSCEV(I); + OrigSCEV = SE->getSCEV(I); return tryReassociateBinaryOp(cast<BinaryOperator>(I)); case Instruction::GetElementPtr: - OrigSCEV = SE->getSCEV(I); + OrigSCEV = SE->getSCEV(I); return tryReassociateGEP(cast<GetElementPtrInst>(I)); default: - return nullptr; + return nullptr; } - - llvm_unreachable("should not be reached"); - return nullptr; + + llvm_unreachable("should not be reached"); + return nullptr; } static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI) { - SmallVector<const Value *, 4> Indices(GEP->indices()); + SmallVector<const Value *, 4> Indices(GEP->indices()); return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), Indices) == TargetTransformInfo::TCC_Free; } @@ -369,8 +369,8 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, // Replace the I-th index with LHS. IndexExprs[I] = SE->getSCEV(LHS); if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) && - DL->getTypeSizeInBits(LHS->getType()).getFixedSize() < - DL->getTypeSizeInBits(GEP->getOperand(I)->getType()).getFixedSize()) { + DL->getTypeSizeInBits(LHS->getType()).getFixedSize() < + DL->getTypeSizeInBits(GEP->getOperand(I)->getType()).getFixedSize()) { // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to // zext if the source operand is proved non-negative. We should do that // consistently so that CandidateExpr more likely appears before. See diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp index 281d47c862..7638b0fba4 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp @@ -662,8 +662,8 @@ public: const DataLayout &DL) : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), AC(AC), DL(DL), PredInfo(std::make_unique<PredicateInfo>(F, *DT, *AC)), - SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false, - /*CanUseUndef=*/false) {} + SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false, + /*CanUseUndef=*/false) {} bool runGVN(); @@ -1248,7 +1248,7 @@ const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const { const CallExpression * NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const { // FIXME: Add operand bundles for calls. - // FIXME: Allow commutative matching for intrinsics. + // FIXME: Allow commutative matching for intrinsics. auto *E = new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA); setBasicExpressionInfo(CI, E); @@ -1535,39 +1535,39 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { LLVM_DEBUG(dbgs() << "Found predicate info from instruction !\n"); - const Optional<PredicateConstraint> &Constraint = PI->getConstraint(); - if (!Constraint) + const Optional<PredicateConstraint> &Constraint = PI->getConstraint(); + if (!Constraint) return nullptr; - CmpInst::Predicate Predicate = Constraint->Predicate; - Value *CmpOp0 = I->getOperand(0); - Value *CmpOp1 = Constraint->OtherOp; + CmpInst::Predicate Predicate = Constraint->Predicate; + Value *CmpOp0 = I->getOperand(0); + Value *CmpOp1 = Constraint->OtherOp; - Value *FirstOp = lookupOperandLeader(CmpOp0); - Value *SecondOp = lookupOperandLeader(CmpOp1); - Value *AdditionallyUsedValue = CmpOp0; + Value *FirstOp = lookupOperandLeader(CmpOp0); + Value *SecondOp = lookupOperandLeader(CmpOp1); + Value *AdditionallyUsedValue = CmpOp0; // Sort the ops. if (shouldSwapOperands(FirstOp, SecondOp)) { std::swap(FirstOp, SecondOp); - Predicate = CmpInst::getSwappedPredicate(Predicate); - AdditionallyUsedValue = CmpOp1; + Predicate = CmpInst::getSwappedPredicate(Predicate); + AdditionallyUsedValue = CmpOp1; } - if (Predicate == CmpInst::ICMP_EQ) { - addPredicateUsers(PI, I); - addAdditionalUsers(AdditionallyUsedValue, I); - return createVariableOrConstant(FirstOp); + if (Predicate == CmpInst::ICMP_EQ) { + addPredicateUsers(PI, I); + addAdditionalUsers(AdditionallyUsedValue, I); + return createVariableOrConstant(FirstOp); } - - // Handle the special case of floating point. - if (Predicate == CmpInst::FCMP_OEQ && isa<ConstantFP>(FirstOp) && - !cast<ConstantFP>(FirstOp)->isZero()) { - addPredicateUsers(PI, I); - addAdditionalUsers(AdditionallyUsedValue, I); - return createConstantExpression(cast<Constant>(FirstOp)); + + // Handle the special case of floating point. + if (Predicate == CmpInst::FCMP_OEQ && isa<ConstantFP>(FirstOp) && + !cast<ConstantFP>(FirstOp)->isZero()) { + addPredicateUsers(PI, I); + addAdditionalUsers(AdditionallyUsedValue, I); + return createConstantExpression(cast<Constant>(FirstOp)); } - + return nullptr; } @@ -2876,7 +2876,7 @@ void NewGVN::cleanupTables() { } while (!TempInst.empty()) { - auto *I = TempInst.pop_back_val(); + auto *I = TempInst.pop_back_val(); I->deleteValue(); } @@ -3371,9 +3371,9 @@ bool NewGVN::runGVN() { for (auto &B : RPOT) { auto *Node = DT->getNode(B); if (Node->getNumChildren() > 1) - llvm::sort(*Node, [&](const DomTreeNode *A, const DomTreeNode *B) { - return RPOOrdering[A] < RPOOrdering[B]; - }); + llvm::sort(*Node, [&](const DomTreeNode *A, const DomTreeNode *B) { + return RPOOrdering[A] < RPOOrdering[B]; + }); } // Now a standard depth first ordering of the domtree is equivalent to RPO. diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp index a110f7d5c2..9ee2e77af0 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -243,7 +243,7 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, BasicBlock *Pred) { // A conservative bound on the loop as a whole. const SCEV *MaxTrips = SE->getConstantMaxBackedgeTakenCount(L); - if (!isa<SCEVCouldNotCompute>(MaxTrips) && + if (!isa<SCEVCouldNotCompute>(MaxTrips) && SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN( CountedLoopTripWidth)) return true; @@ -255,7 +255,7 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, // This returns an exact expression only. TODO: We really only need an // upper bound here, but SE doesn't expose that. const SCEV *MaxExec = SE->getExitCount(L, Pred); - if (!isa<SCEVCouldNotCompute>(MaxExec) && + if (!isa<SCEVCouldNotCompute>(MaxExec) && SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN( CountedLoopTripWidth)) return true; @@ -435,7 +435,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F, return Cursor; } -const char GCSafepointPollName[] = "gc.safepoint_poll"; +const char GCSafepointPollName[] = "gc.safepoint_poll"; static bool isGCSafepointPoll(Function &F) { return F.getName().equals(GCSafepointPollName); @@ -589,7 +589,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) { for (Instruction *PollLocation : PollsNeeded) { std::vector<CallBase *> RuntimeCalls; InsertSafepointPoll(PollLocation, RuntimeCalls, TLI); - llvm::append_range(ParsePointNeeded, RuntimeCalls); + llvm::append_range(ParsePointNeeded, RuntimeCalls); } return Modified; diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp index dffeb7cc22..e4c9424aee 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp @@ -920,100 +920,100 @@ static Value *NegateValue(Value *V, Instruction *BI, return NewNeg; } -// See if this `or` looks like an load widening reduction, i.e. that it -// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't -// ensure that the pattern is *really* a load widening reduction, -// we do not ensure that it can really be replaced with a widened load, -// only that it mostly looks like one. -static bool isLoadCombineCandidate(Instruction *Or) { - SmallVector<Instruction *, 8> Worklist; - SmallSet<Instruction *, 8> Visited; - - auto Enqueue = [&](Value *V) { - auto *I = dyn_cast<Instruction>(V); - // Each node of an `or` reduction must be an instruction, - if (!I) - return false; // Node is certainly not part of an `or` load reduction. - // Only process instructions we have never processed before. - if (Visited.insert(I).second) - Worklist.emplace_back(I); - return true; // Will need to look at parent nodes. - }; - - if (!Enqueue(Or)) - return false; // Not an `or` reduction pattern. - - while (!Worklist.empty()) { - auto *I = Worklist.pop_back_val(); - - // Okay, which instruction is this node? - switch (I->getOpcode()) { - case Instruction::Or: - // Got an `or` node. That's fine, just recurse into it's operands. - for (Value *Op : I->operands()) - if (!Enqueue(Op)) - return false; // Not an `or` reduction pattern. - continue; - - case Instruction::Shl: - case Instruction::ZExt: - // `shl`/`zext` nodes are fine, just recurse into their base operand. - if (!Enqueue(I->getOperand(0))) - return false; // Not an `or` reduction pattern. - continue; - - case Instruction::Load: - // Perfect, `load` node means we've reached an edge of the graph. - continue; - - default: // Unknown node. - return false; // Not an `or` reduction pattern. - } - } - - return true; -} - -/// Return true if it may be profitable to convert this (X|Y) into (X+Y). -static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) { - // Don't bother to convert this up unless either the LHS is an associable add - // or subtract or mul or if this is only used by one of the above. - // This is only a compile-time improvement, it is not needed for correctness! - auto isInteresting = [](Value *V) { - for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul}) - if (isReassociableOp(V, Op)) - return true; - return false; - }; - - if (any_of(Or->operands(), isInteresting)) - return true; - - Value *VB = Or->user_back(); - if (Or->hasOneUse() && isInteresting(VB)) - return true; - - return false; -} - -/// If we have (X|Y), and iff X and Y have no common bits set, -/// transform this into (X+Y) to allow arithmetics reassociation. -static BinaryOperator *ConvertOrWithNoCommonBitsToAdd(Instruction *Or) { - // Convert an or into an add. - BinaryOperator *New = - CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or); - New->setHasNoSignedWrap(); - New->setHasNoUnsignedWrap(); - New->takeName(Or); - - // Everyone now refers to the add instruction. - Or->replaceAllUsesWith(New); - New->setDebugLoc(Or->getDebugLoc()); - - LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n'); - return New; -} - +// See if this `or` looks like an load widening reduction, i.e. that it +// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't +// ensure that the pattern is *really* a load widening reduction, +// we do not ensure that it can really be replaced with a widened load, +// only that it mostly looks like one. +static bool isLoadCombineCandidate(Instruction *Or) { + SmallVector<Instruction *, 8> Worklist; + SmallSet<Instruction *, 8> Visited; + + auto Enqueue = [&](Value *V) { + auto *I = dyn_cast<Instruction>(V); + // Each node of an `or` reduction must be an instruction, + if (!I) + return false; // Node is certainly not part of an `or` load reduction. + // Only process instructions we have never processed before. + if (Visited.insert(I).second) + Worklist.emplace_back(I); + return true; // Will need to look at parent nodes. + }; + + if (!Enqueue(Or)) + return false; // Not an `or` reduction pattern. + + while (!Worklist.empty()) { + auto *I = Worklist.pop_back_val(); + + // Okay, which instruction is this node? + switch (I->getOpcode()) { + case Instruction::Or: + // Got an `or` node. That's fine, just recurse into it's operands. + for (Value *Op : I->operands()) + if (!Enqueue(Op)) + return false; // Not an `or` reduction pattern. + continue; + + case Instruction::Shl: + case Instruction::ZExt: + // `shl`/`zext` nodes are fine, just recurse into their base operand. + if (!Enqueue(I->getOperand(0))) + return false; // Not an `or` reduction pattern. + continue; + + case Instruction::Load: + // Perfect, `load` node means we've reached an edge of the graph. + continue; + + default: // Unknown node. + return false; // Not an `or` reduction pattern. + } + } + + return true; +} + +/// Return true if it may be profitable to convert this (X|Y) into (X+Y). +static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) { + // Don't bother to convert this up unless either the LHS is an associable add + // or subtract or mul or if this is only used by one of the above. + // This is only a compile-time improvement, it is not needed for correctness! + auto isInteresting = [](Value *V) { + for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul}) + if (isReassociableOp(V, Op)) + return true; + return false; + }; + + if (any_of(Or->operands(), isInteresting)) + return true; + + Value *VB = Or->user_back(); + if (Or->hasOneUse() && isInteresting(VB)) + return true; + + return false; +} + +/// If we have (X|Y), and iff X and Y have no common bits set, +/// transform this into (X+Y) to allow arithmetics reassociation. +static BinaryOperator *ConvertOrWithNoCommonBitsToAdd(Instruction *Or) { + // Convert an or into an add. + BinaryOperator *New = + CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or); + New->setHasNoSignedWrap(); + New->setHasNoUnsignedWrap(); + New->takeName(Or); + + // Everyone now refers to the add instruction. + Or->replaceAllUsesWith(New); + New->setDebugLoc(Or->getDebugLoc()); + + LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n'); + return New; +} + /// Return true if we should break up this subtract of X-Y into (X + -Y). static bool ShouldBreakUpSubtract(Instruction *Sub) { // If this is a negation, we can't split it up! @@ -1128,7 +1128,7 @@ static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<WeakTrackingVH> &Ops) { if (Ops.size() == 1) return Ops.back(); - Value *V1 = Ops.pop_back_val(); + Value *V1 = Ops.pop_back_val(); Value *V2 = EmitAddTreeOfValues(I, Ops); return CreateAdd(V2, V1, "reass.add", I, I); } @@ -1992,7 +1992,7 @@ Value *ReassociatePass::OptimizeExpression(BinaryOperator *I, void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I, OrderedSet &Insts) { assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); - SmallVector<Value *, 4> Ops(I->operands()); + SmallVector<Value *, 4> Ops(I->operands()); ValueRankMap.erase(I); Insts.remove(I); RedoInsts.remove(I); @@ -2009,7 +2009,7 @@ void ReassociatePass::EraseInst(Instruction *I) { assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump()); - SmallVector<Value *, 8> Ops(I->operands()); + SmallVector<Value *, 8> Ops(I->operands()); // Erase the dead instruction. ValueRankMap.erase(I); RedoInsts.remove(I); @@ -2209,19 +2209,19 @@ void ReassociatePass::OptimizeInst(Instruction *I) { if (I->getType()->isIntegerTy(1)) return; - // If this is a bitwise or instruction of operands - // with no common bits set, convert it to X+Y. - if (I->getOpcode() == Instruction::Or && - ShouldConvertOrWithNoCommonBitsToAdd(I) && !isLoadCombineCandidate(I) && - haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), - I->getModule()->getDataLayout(), /*AC=*/nullptr, I, - /*DT=*/nullptr)) { - Instruction *NI = ConvertOrWithNoCommonBitsToAdd(I); - RedoInsts.insert(I); - MadeChange = true; - I = NI; - } - + // If this is a bitwise or instruction of operands + // with no common bits set, convert it to X+Y. + if (I->getOpcode() == Instruction::Or && + ShouldConvertOrWithNoCommonBitsToAdd(I) && !isLoadCombineCandidate(I) && + haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), + I->getModule()->getDataLayout(), /*AC=*/nullptr, I, + /*DT=*/nullptr)) { + Instruction *NI = ConvertOrWithNoCommonBitsToAdd(I); + RedoInsts.insert(I); + MadeChange = true; + I = NI; + } + // If this is a subtract instruction which is not already in negate form, // see if we can convert it to X+-Y. if (I->getOpcode() == Instruction::Sub) { diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp index a49b9ad3f6..fef2f84a63 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp @@ -15,23 +15,23 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/Reg2Mem.h" +#include "llvm/Transforms/Scalar/Reg2Mem.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include <list> using namespace llvm; @@ -41,17 +41,17 @@ using namespace llvm; STATISTIC(NumRegsDemoted, "Number of registers demoted"); STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); -static bool valueEscapes(const Instruction &Inst) { - const BasicBlock *BB = Inst.getParent(); - for (const User *U : Inst.users()) { - const Instruction *UI = cast<Instruction>(U); - if (UI->getParent() != BB || isa<PHINode>(UI)) - return true; - } - return false; +static bool valueEscapes(const Instruction &Inst) { + const BasicBlock *BB = Inst.getParent(); + for (const User *U : Inst.users()) { + const Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != BB || isa<PHINode>(UI)) + return true; + } + return false; } -static bool runPass(Function &F) { +static bool runPass(Function &F) { // Insert all new allocas into entry block. BasicBlock *BBEntry = &F.getEntryBlock(); assert(pred_empty(BBEntry) && @@ -70,72 +70,72 @@ static bool runPass(Function &F) { // Find the escaped instructions. But don't create stack slots for // allocas in entry block. std::list<Instruction*> WorkList; - for (Instruction &I : instructions(F)) - if (!(isa<AllocaInst>(I) && I.getParent() == BBEntry) && valueEscapes(I)) - WorkList.push_front(&I); + for (Instruction &I : instructions(F)) + if (!(isa<AllocaInst>(I) && I.getParent() == BBEntry) && valueEscapes(I)) + WorkList.push_front(&I); // Demote escaped instructions NumRegsDemoted += WorkList.size(); - for (Instruction *I : WorkList) - DemoteRegToStack(*I, false, AllocaInsertionPoint); + for (Instruction *I : WorkList) + DemoteRegToStack(*I, false, AllocaInsertionPoint); WorkList.clear(); // Find all phi's - for (BasicBlock &BB : F) - for (auto &Phi : BB.phis()) - WorkList.push_front(&Phi); + for (BasicBlock &BB : F) + for (auto &Phi : BB.phis()) + WorkList.push_front(&Phi); // Demote phi nodes NumPhisDemoted += WorkList.size(); - for (Instruction *I : WorkList) - DemotePHIToStack(cast<PHINode>(I), AllocaInsertionPoint); + for (Instruction *I : WorkList) + DemotePHIToStack(cast<PHINode>(I), AllocaInsertionPoint); return true; } -PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) { - auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto *LI = &AM.getResult<LoopAnalysis>(F); - unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); - bool Changed = runPass(F); - if (N == 0 && !Changed) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LoopAnalysis>(); - return PA; -} - -namespace { -struct RegToMemLegacy : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - RegToMemLegacy() : FunctionPass(ID) { - initializeRegToMemLegacyPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(BreakCriticalEdgesID); - AU.addPreservedID(BreakCriticalEdgesID); - } - - bool runOnFunction(Function &F) override { - if (F.isDeclaration() || skipFunction(F)) - return false; - return runPass(F); - } -}; -} // namespace - -char RegToMemLegacy::ID = 0; -INITIALIZE_PASS_BEGIN(RegToMemLegacy, "reg2mem", - "Demote all values to stack slots", false, false) -INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) -INITIALIZE_PASS_END(RegToMemLegacy, "reg2mem", - "Demote all values to stack slots", false, false) - +PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto *LI = &AM.getResult<LoopAnalysis>(F); + unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); + bool Changed = runPass(F); + if (N == 0 && !Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + return PA; +} + +namespace { +struct RegToMemLegacy : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + RegToMemLegacy() : FunctionPass(ID) { + initializeRegToMemLegacyPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredID(BreakCriticalEdgesID); + AU.addPreservedID(BreakCriticalEdgesID); + } + + bool runOnFunction(Function &F) override { + if (F.isDeclaration() || skipFunction(F)) + return false; + return runPass(F); + } +}; +} // namespace + +char RegToMemLegacy::ID = 0; +INITIALIZE_PASS_BEGIN(RegToMemLegacy, "reg2mem", + "Demote all values to stack slots", false, false) +INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) +INITIALIZE_PASS_END(RegToMemLegacy, "reg2mem", + "Demote all values to stack slots", false, false) + // createDemoteRegisterToMemory - Provide an entry point to create this pass. -char &llvm::DemoteRegisterToMemoryID = RegToMemLegacy::ID; +char &llvm::DemoteRegisterToMemoryID = RegToMemLegacy::ID; FunctionPass *llvm::createDemoteRegisterToMemoryPass() { - return new RegToMemLegacy(); + return new RegToMemLegacy(); } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index b7830555bf..ee39ffa000 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1487,7 +1487,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ uint32_t NumPatchBytes = 0; uint32_t Flags = uint32_t(StatepointFlags::None); - SmallVector<Value *, 8> CallArgs(Call->args()); + SmallVector<Value *, 8> CallArgs(Call->args()); Optional<ArrayRef<Use>> DeoptArgs; if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt)) DeoptArgs = Bundle->Inputs; @@ -1520,8 +1520,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ Value *CallTarget = Call->getCalledOperand(); if (Function *F = dyn_cast<Function>(CallTarget)) { - auto IID = F->getIntrinsicID(); - if (IID == Intrinsic::experimental_deoptimize) { + auto IID = F->getIntrinsicID(); + if (IID == Intrinsic::experimental_deoptimize) { // Calls to llvm.experimental.deoptimize are lowered to calls to the // __llvm_deoptimize symbol. We want to resolve this now, since the // verifier does not allow taking the address of an intrinsic function. @@ -1541,101 +1541,101 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ .getCallee(); IsDeoptimize = true; - } else if (IID == Intrinsic::memcpy_element_unordered_atomic || - IID == Intrinsic::memmove_element_unordered_atomic) { - // Unordered atomic memcpy and memmove intrinsics which are not explicitly - // marked as "gc-leaf-function" should be lowered in a GC parseable way. - // Specifically, these calls should be lowered to the - // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols. - // Similarly to __llvm_deoptimize we want to resolve this now, since the - // verifier does not allow taking the address of an intrinsic function. - // - // Moreover we need to shuffle the arguments for the call in order to - // accommodate GC. The underlying source and destination objects might be - // relocated during copy operation should the GC occur. To relocate the - // derived source and destination pointers the implementation of the - // intrinsic should know the corresponding base pointers. - // - // To make the base pointers available pass them explicitly as arguments: - // memcpy(dest_derived, source_derived, ...) => - // memcpy(dest_base, dest_offset, source_base, source_offset, ...) - auto &Context = Call->getContext(); - auto &DL = Call->getModule()->getDataLayout(); - auto GetBaseAndOffset = [&](Value *Derived) { - assert(Result.PointerToBase.count(Derived)); - unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); - unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); - Value *Base = Result.PointerToBase.find(Derived)->second; - Value *Base_int = Builder.CreatePtrToInt( - Base, Type::getIntNTy(Context, IntPtrSize)); - Value *Derived_int = Builder.CreatePtrToInt( - Derived, Type::getIntNTy(Context, IntPtrSize)); - return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int)); - }; - - auto *Dest = CallArgs[0]; - Value *DestBase, *DestOffset; - std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest); - - auto *Source = CallArgs[1]; - Value *SourceBase, *SourceOffset; - std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source); - - auto *LengthInBytes = CallArgs[2]; - auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]); - - CallArgs.clear(); - CallArgs.push_back(DestBase); - CallArgs.push_back(DestOffset); - CallArgs.push_back(SourceBase); - CallArgs.push_back(SourceOffset); - CallArgs.push_back(LengthInBytes); - - SmallVector<Type *, 8> DomainTy; - for (Value *Arg : CallArgs) - DomainTy.push_back(Arg->getType()); - auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy, - /* isVarArg = */ false); - - auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) { - uint64_t ElementSize = ElementSizeCI->getZExtValue(); - if (IID == Intrinsic::memcpy_element_unordered_atomic) { - switch (ElementSize) { - case 1: - return "__llvm_memcpy_element_unordered_atomic_safepoint_1"; - case 2: - return "__llvm_memcpy_element_unordered_atomic_safepoint_2"; - case 4: - return "__llvm_memcpy_element_unordered_atomic_safepoint_4"; - case 8: - return "__llvm_memcpy_element_unordered_atomic_safepoint_8"; - case 16: - return "__llvm_memcpy_element_unordered_atomic_safepoint_16"; - default: - llvm_unreachable("unexpected element size!"); - } - } - assert(IID == Intrinsic::memmove_element_unordered_atomic); - switch (ElementSize) { - case 1: - return "__llvm_memmove_element_unordered_atomic_safepoint_1"; - case 2: - return "__llvm_memmove_element_unordered_atomic_safepoint_2"; - case 4: - return "__llvm_memmove_element_unordered_atomic_safepoint_4"; - case 8: - return "__llvm_memmove_element_unordered_atomic_safepoint_8"; - case 16: - return "__llvm_memmove_element_unordered_atomic_safepoint_16"; - default: - llvm_unreachable("unexpected element size!"); - } - }; - - CallTarget = - F->getParent() - ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy) - .getCallee(); + } else if (IID == Intrinsic::memcpy_element_unordered_atomic || + IID == Intrinsic::memmove_element_unordered_atomic) { + // Unordered atomic memcpy and memmove intrinsics which are not explicitly + // marked as "gc-leaf-function" should be lowered in a GC parseable way. + // Specifically, these calls should be lowered to the + // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols. + // Similarly to __llvm_deoptimize we want to resolve this now, since the + // verifier does not allow taking the address of an intrinsic function. + // + // Moreover we need to shuffle the arguments for the call in order to + // accommodate GC. The underlying source and destination objects might be + // relocated during copy operation should the GC occur. To relocate the + // derived source and destination pointers the implementation of the + // intrinsic should know the corresponding base pointers. + // + // To make the base pointers available pass them explicitly as arguments: + // memcpy(dest_derived, source_derived, ...) => + // memcpy(dest_base, dest_offset, source_base, source_offset, ...) + auto &Context = Call->getContext(); + auto &DL = Call->getModule()->getDataLayout(); + auto GetBaseAndOffset = [&](Value *Derived) { + assert(Result.PointerToBase.count(Derived)); + unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); + unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); + Value *Base = Result.PointerToBase.find(Derived)->second; + Value *Base_int = Builder.CreatePtrToInt( + Base, Type::getIntNTy(Context, IntPtrSize)); + Value *Derived_int = Builder.CreatePtrToInt( + Derived, Type::getIntNTy(Context, IntPtrSize)); + return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int)); + }; + + auto *Dest = CallArgs[0]; + Value *DestBase, *DestOffset; + std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest); + + auto *Source = CallArgs[1]; + Value *SourceBase, *SourceOffset; + std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source); + + auto *LengthInBytes = CallArgs[2]; + auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]); + + CallArgs.clear(); + CallArgs.push_back(DestBase); + CallArgs.push_back(DestOffset); + CallArgs.push_back(SourceBase); + CallArgs.push_back(SourceOffset); + CallArgs.push_back(LengthInBytes); + + SmallVector<Type *, 8> DomainTy; + for (Value *Arg : CallArgs) + DomainTy.push_back(Arg->getType()); + auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy, + /* isVarArg = */ false); + + auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) { + uint64_t ElementSize = ElementSizeCI->getZExtValue(); + if (IID == Intrinsic::memcpy_element_unordered_atomic) { + switch (ElementSize) { + case 1: + return "__llvm_memcpy_element_unordered_atomic_safepoint_1"; + case 2: + return "__llvm_memcpy_element_unordered_atomic_safepoint_2"; + case 4: + return "__llvm_memcpy_element_unordered_atomic_safepoint_4"; + case 8: + return "__llvm_memcpy_element_unordered_atomic_safepoint_8"; + case 16: + return "__llvm_memcpy_element_unordered_atomic_safepoint_16"; + default: + llvm_unreachable("unexpected element size!"); + } + } + assert(IID == Intrinsic::memmove_element_unordered_atomic); + switch (ElementSize) { + case 1: + return "__llvm_memmove_element_unordered_atomic_safepoint_1"; + case 2: + return "__llvm_memmove_element_unordered_atomic_safepoint_2"; + case 4: + return "__llvm_memmove_element_unordered_atomic_safepoint_4"; + case 8: + return "__llvm_memmove_element_unordered_atomic_safepoint_8"; + case 16: + return "__llvm_memmove_element_unordered_atomic_safepoint_16"; + default: + llvm_unreachable("unexpected element size!"); + } + }; + + CallTarget = + F->getParent() + ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy) + .getCallee(); } } @@ -2036,7 +2036,7 @@ static void relocationViaAlloca( /// tests in ways which make them less useful in testing fused safepoints. template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) { SmallSet<T, 8> Seen; - erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; }); + erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; }); } /// Insert holders so that each Value is obviously live through the entire @@ -2108,10 +2108,10 @@ static Value* findRematerializableChainToBasePointer( // Helper function for the "rematerializeLiveValues". Compute cost of the use // chain we are going to rematerialize. -static InstructionCost -chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain, +static InstructionCost +chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain, TargetTransformInfo &TTI) { - InstructionCost Cost = 0; + InstructionCost Cost = 0; for (Instruction *Instr : Chain) { if (CastInst *CI = dyn_cast<CastInst>(Instr)) { @@ -2120,8 +2120,8 @@ chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain, Type *SrcTy = CI->getOperand(0)->getType(); Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, - TTI::getCastContextHint(CI), - TargetTransformInfo::TCK_SizeAndLatency, CI); + TTI::getCastContextHint(CI), + TargetTransformInfo::TCK_SizeAndLatency, CI); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) { // Cost of the address calculation @@ -2218,7 +2218,7 @@ static void rematerializeLiveValues(CallBase *Call, assert(Info.LiveSet.count(AlternateRootPhi)); } // Compute cost of this chain - InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI); + InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI); // TODO: We can also account for cases when we will be able to remove some // of the rematerialized values by later optimization passes. I.e if // we rematerialized several intersecting chains. Or if original values @@ -2499,7 +2499,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // That Value* no longer exists and we need to use the new gc_result. // Thankfully, the live set is embedded in the statepoint (and updated), so // we just grab that. - llvm::append_range(Live, Info.StatepointToken->gc_args()); + llvm::append_range(Live, Info.StatepointToken->gc_args()); #ifndef NDEBUG // Do some basic sanity checks on our liveness results before performing // relocation. Relocation can and will turn mistakes in liveness results @@ -2675,27 +2675,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision"); auto NeedsRewrite = [&TLI](Instruction &I) { - if (const auto *Call = dyn_cast<CallBase>(&I)) { - if (isa<GCStatepointInst>(Call)) - return false; - if (callsGCLeafFunction(Call, TLI)) - return false; - - // Normally it's up to the frontend to make sure that non-leaf calls also - // have proper deopt state if it is required. We make an exception for - // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics - // these are non-leaf by default. They might be generated by the optimizer - // which doesn't know how to produce a proper deopt state. So if we see a - // non-leaf memcpy/memmove without deopt state just treat it as a leaf - // copy and don't produce a statepoint. - if (!AllowStatepointWithNoDeoptInfo && - !Call->getOperandBundle(LLVMContext::OB_deopt)) { - assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) && - "Don't expect any other calls here!"); - return false; - } - return true; - } + if (const auto *Call = dyn_cast<CallBase>(&I)) { + if (isa<GCStatepointInst>(Call)) + return false; + if (callsGCLeafFunction(Call, TLI)) + return false; + + // Normally it's up to the frontend to make sure that non-leaf calls also + // have proper deopt state if it is required. We make an exception for + // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics + // these are non-leaf by default. They might be generated by the optimizer + // which doesn't know how to produce a proper deopt state. So if we see a + // non-leaf memcpy/memmove without deopt state just treat it as a leaf + // copy and don't produce a statepoint. + if (!AllowStatepointWithNoDeoptInfo && + !Call->getOperandBundle(LLVMContext::OB_deopt)) { + assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) && + "Don't expect any other calls here!"); + return false; + } + return true; + } return false; }; @@ -2733,8 +2733,8 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, // of liveness sets for no good reason. It may be harder to do this post // insertion since relocations and base phis can confuse things. for (BasicBlock &BB : F) - if (BB.getUniquePredecessor()) - MadeChange |= FoldSingleEntryPHINodes(&BB); + if (BB.getUniquePredecessor()) + MadeChange |= FoldSingleEntryPHINodes(&BB); // Before we start introducing relocations, we want to tweak the IR a bit to // avoid unfortunate code generation effects. The main example is that we diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp index 8feed9e9eb..97a5040300 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp @@ -23,7 +23,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -34,7 +34,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -105,7 +105,7 @@ bool isConstant(const ValueLatticeElement &LV) { // ValueLatticeElement::isOverdefined() and is intended to be used in the // transition to ValueLatticeElement. bool isOverdefined(const ValueLatticeElement &LV) { - return !LV.isUnknownOrUndef() && !isConstant(LV); + return !LV.isUnknownOrUndef() && !isConstant(LV); } //===----------------------------------------------------------------------===// @@ -234,7 +234,7 @@ public: for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) TrackedMultipleRetVals.insert( std::make_pair(std::make_pair(F, i), ValueLatticeElement())); - } else if (!F->getReturnType()->isVoidTy()) + } else if (!F->getReturnType()->isVoidTy()) TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement())); } @@ -276,7 +276,7 @@ public: // isEdgeFeasible - Return true if the control flow edge from the 'From' basic // block to the 'To' basic block is currently feasible. - bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const; + bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const; std::vector<ValueLatticeElement> getStructLatticeValueFor(Value *V) const { std::vector<ValueLatticeElement> StructValues; @@ -542,14 +542,14 @@ private: auto Iter = AdditionalUsers.find(I); if (Iter != AdditionalUsers.end()) { - // Copy additional users before notifying them of changes, because new - // users may be added, potentially invalidating the iterator. - SmallVector<Instruction *, 2> ToNotify; + // Copy additional users before notifying them of changes, because new + // users may be added, potentially invalidating the iterator. + SmallVector<Instruction *, 2> ToNotify; for (User *U : Iter->second) if (auto *UI = dyn_cast<Instruction>(U)) - ToNotify.push_back(UI); - for (Instruction *UI : ToNotify) - OperandChangedState(UI); + ToNotify.push_back(UI); + for (Instruction *UI : ToNotify) + OperandChangedState(UI); } } void handleCallOverdefined(CallBase &CB); @@ -654,30 +654,30 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI, Succs[0] = true; return; } - const ValueLatticeElement &SCValue = getValueState(SI->getCondition()); - if (ConstantInt *CI = getConstantInt(SCValue)) { - Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true; + const ValueLatticeElement &SCValue = getValueState(SI->getCondition()); + if (ConstantInt *CI = getConstantInt(SCValue)) { + Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true; + return; + } + + // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM + // is ready. + if (SCValue.isConstantRange(/*UndefAllowed=*/false)) { + const ConstantRange &Range = SCValue.getConstantRange(); + for (const auto &Case : SI->cases()) { + const APInt &CaseValue = Case.getCaseValue()->getValue(); + if (Range.contains(CaseValue)) + Succs[Case.getSuccessorIndex()] = true; + } + + // TODO: Determine whether default case is reachable. + Succs[SI->case_default()->getSuccessorIndex()] = true; return; } - // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM - // is ready. - if (SCValue.isConstantRange(/*UndefAllowed=*/false)) { - const ConstantRange &Range = SCValue.getConstantRange(); - for (const auto &Case : SI->cases()) { - const APInt &CaseValue = Case.getCaseValue()->getValue(); - if (Range.contains(CaseValue)) - Succs[Case.getSuccessorIndex()] = true; - } - - // TODO: Determine whether default case is reachable. - Succs[SI->case_default()->getSuccessorIndex()] = true; - return; - } - - // Overdefined or unknown condition? All destinations are executable! - if (!SCValue.isUnknownOrUndef()) - Succs.assign(TI.getNumSuccessors(), true); + // Overdefined or unknown condition? All destinations are executable! + if (!SCValue.isUnknownOrUndef()) + Succs.assign(TI.getNumSuccessors(), true); return; } @@ -723,7 +723,7 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI, // isEdgeFeasible - Return true if the control flow edge from the 'From' basic // block to the 'To' basic block is currently feasible. -bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const { +bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const { // Check if we've called markEdgeExecutable on the edge yet. (We could // be more aggressive and try to consider edges which haven't been marked // yet, but there isn't any need.) @@ -848,16 +848,16 @@ void SCCPSolver::visitCastInst(CastInst &I) { auto &LV = getValueState(&I); ConstantRange OpRange = OpSt.getConstantRange(); Type *DestTy = I.getDestTy(); - // Vectors where all elements have the same known constant range are treated - // as a single constant range in the lattice. When bitcasting such vectors, - // there is a mis-match between the width of the lattice value (single - // constant range) and the original operands (vector). Go to overdefined in - // that case. - if (I.getOpcode() == Instruction::BitCast && - I.getOperand(0)->getType()->isVectorTy() && - OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy)) - return (void)markOverdefined(&I); - + // Vectors where all elements have the same known constant range are treated + // as a single constant range in the lattice. When bitcasting such vectors, + // there is a mis-match between the width of the lattice value (single + // constant range) and the original operands (vector). Go to overdefined in + // that case. + if (I.getOpcode() == Instruction::BitCast && + I.getOperand(0)->getType()->isVectorTy() && + OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy)) + return (void)markOverdefined(&I); + ConstantRange Res = OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy)); mergeInValue(LV, &I, ValueLatticeElement::getRange(Res)); @@ -1138,9 +1138,9 @@ static ValueLatticeElement getValueFromMetadata(const Instruction *I) { if (I->getType()->isIntegerTy()) return ValueLatticeElement::getRange( getConstantRangeFromMetadata(*Ranges)); - if (I->hasMetadata(LLVMContext::MD_nonnull)) - return ValueLatticeElement::getNot( - ConstantPointerNull::get(cast<PointerType>(I->getType()))); + if (I->hasMetadata(LLVMContext::MD_nonnull)) + return ValueLatticeElement::getNot( + ConstantPointerNull::get(cast<PointerType>(I->getType()))); return ValueLatticeElement::getOverdefined(); } @@ -1293,33 +1293,33 @@ void SCCPSolver::handleCallResult(CallBase &CB) { auto *PI = getPredicateInfoFor(&CB); assert(PI && "Missing predicate info for ssa.copy"); - const Optional<PredicateConstraint> &Constraint = PI->getConstraint(); - if (!Constraint) { + const Optional<PredicateConstraint> &Constraint = PI->getConstraint(); + if (!Constraint) { mergeInValue(ValueState[&CB], &CB, CopyOfVal); return; } - CmpInst::Predicate Pred = Constraint->Predicate; - Value *OtherOp = Constraint->OtherOp; + CmpInst::Predicate Pred = Constraint->Predicate; + Value *OtherOp = Constraint->OtherOp; - // Wait until OtherOp is resolved. - if (getValueState(OtherOp).isUnknown()) { - addAdditionalUser(OtherOp, &CB); + // Wait until OtherOp is resolved. + if (getValueState(OtherOp).isUnknown()) { + addAdditionalUser(OtherOp, &CB); return; } - // TODO: Actually filp MayIncludeUndef for the created range to false, - // once most places in the optimizer respect the branches on - // undef/poison are UB rule. The reason why the new range cannot be - // undef is as follows below: - // The new range is based on a branch condition. That guarantees that - // neither of the compare operands can be undef in the branch targets, - // unless we have conditions that are always true/false (e.g. icmp ule - // i32, %a, i32_max). For the latter overdefined/empty range will be - // inferred, but the branch will get folded accordingly anyways. - bool MayIncludeUndef = !isa<PredicateAssume>(PI); - - ValueLatticeElement CondVal = getValueState(OtherOp); + // TODO: Actually filp MayIncludeUndef for the created range to false, + // once most places in the optimizer respect the branches on + // undef/poison are UB rule. The reason why the new range cannot be + // undef is as follows below: + // The new range is based on a branch condition. That guarantees that + // neither of the compare operands can be undef in the branch targets, + // unless we have conditions that are always true/false (e.g. icmp ule + // i32, %a, i32_max). For the latter overdefined/empty range will be + // inferred, but the branch will get folded accordingly anyways. + bool MayIncludeUndef = !isa<PredicateAssume>(PI); + + ValueLatticeElement CondVal = getValueState(OtherOp); ValueLatticeElement &IV = ValueState[&CB]; if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) { auto ImposedCR = @@ -1343,47 +1343,47 @@ void SCCPSolver::handleCallResult(CallBase &CB) { if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement()) NewCR = CopyOfCR; - addAdditionalUser(OtherOp, &CB); + addAdditionalUser(OtherOp, &CB); mergeInValue( IV, &CB, - ValueLatticeElement::getRange(NewCR, MayIncludeUndef)); + ValueLatticeElement::getRange(NewCR, MayIncludeUndef)); return; } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) { // For non-integer values or integer constant expressions, only // propagate equal constants. - addAdditionalUser(OtherOp, &CB); + addAdditionalUser(OtherOp, &CB); mergeInValue(IV, &CB, CondVal); return; - } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() && - !MayIncludeUndef) { - // Propagate inequalities. - addAdditionalUser(OtherOp, &CB); - mergeInValue(IV, &CB, - ValueLatticeElement::getNot(CondVal.getConstant())); - return; + } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() && + !MayIncludeUndef) { + // Propagate inequalities. + addAdditionalUser(OtherOp, &CB); + mergeInValue(IV, &CB, + ValueLatticeElement::getNot(CondVal.getConstant())); + return; } return (void)mergeInValue(IV, &CB, CopyOfVal); } - - if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { - // Compute result range for intrinsics supported by ConstantRange. - // Do this even if we don't know a range for all operands, as we may - // still know something about the result range, e.g. of abs(x). - SmallVector<ConstantRange, 2> OpRanges; - for (Value *Op : II->args()) { - const ValueLatticeElement &State = getValueState(Op); - if (State.isConstantRange()) - OpRanges.push_back(State.getConstantRange()); - else - OpRanges.push_back( - ConstantRange::getFull(Op->getType()->getScalarSizeInBits())); - } - - ConstantRange Result = - ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges); - return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); - } + + if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { + // Compute result range for intrinsics supported by ConstantRange. + // Do this even if we don't know a range for all operands, as we may + // still know something about the result range, e.g. of abs(x). + SmallVector<ConstantRange, 2> OpRanges; + for (Value *Op : II->args()) { + const ValueLatticeElement &State = getValueState(Op); + if (State.isConstantRange()) + OpRanges.push_back(State.getConstantRange()); + else + OpRanges.push_back( + ConstantRange::getFull(Op->getType()->getScalarSizeInBits())); + } + + ConstantRange Result = + ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges); + return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); + } } // The common case is that we aren't tracking the callee, either because we @@ -1453,7 +1453,7 @@ void SCCPSolver::Solve() { // Process the basic block work list. while (!BBWorkList.empty()) { - BasicBlock *BB = BBWorkList.pop_back_val(); + BasicBlock *BB = BBWorkList.pop_back_val(); LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n'); @@ -1481,7 +1481,7 @@ void SCCPSolver::Solve() { /// This scan also checks for values that use undefs. It conservatively marks /// them as overdefined. bool SCCPSolver::ResolvedUndefsIn(Function &F) { - bool MadeChange = false; + bool MadeChange = false; for (BasicBlock &BB : F) { if (!BBExecutable.count(&BB)) continue; @@ -1507,10 +1507,10 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // more precise than this but it isn't worth bothering. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { ValueLatticeElement &LV = getStructValueState(&I, i); - if (LV.isUnknownOrUndef()) { + if (LV.isUnknownOrUndef()) { markOverdefined(LV, &I); - MadeChange = true; - } + MadeChange = true; + } } continue; } @@ -1537,7 +1537,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { } markOverdefined(&I); - MadeChange = true; + MadeChange = true; } // Check to see if we have a branch or switch on an undefined value. If so @@ -1554,8 +1554,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { if (isa<UndefValue>(BI->getCondition())) { BI->setCondition(ConstantInt::getFalse(BI->getContext())); markEdgeExecutable(&BB, TI->getSuccessor(1)); - MadeChange = true; - continue; + MadeChange = true; + continue; } // Otherwise, it is a branch on a symbolic value which is currently @@ -1564,7 +1564,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // FIXME: Distinguish between dead code and an LLVM "undef" value. BasicBlock *DefaultSuccessor = TI->getSuccessor(1); if (markEdgeExecutable(&BB, DefaultSuccessor)) - MadeChange = true; + MadeChange = true; continue; } @@ -1583,8 +1583,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { if (isa<UndefValue>(IBR->getAddress())) { IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0))); markEdgeExecutable(&BB, IBR->getSuccessor(0)); - MadeChange = true; - continue; + MadeChange = true; + continue; } // Otherwise, it is a branch on a symbolic value which is currently @@ -1594,7 +1594,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // we can assume the branch has undefined behavior instead. BasicBlock *DefaultSuccessor = IBR->getSuccessor(0); if (markEdgeExecutable(&BB, DefaultSuccessor)) - MadeChange = true; + MadeChange = true; continue; } @@ -1609,8 +1609,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { if (isa<UndefValue>(SI->getCondition())) { SI->setCondition(SI->case_begin()->getCaseValue()); markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor()); - MadeChange = true; - continue; + MadeChange = true; + continue; } // Otherwise, it is a branch on a symbolic value which is currently @@ -1619,13 +1619,13 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // FIXME: Distinguish between dead code and an LLVM "undef" value. BasicBlock *DefaultSuccessor = SI->case_begin()->getCaseSuccessor(); if (markEdgeExecutable(&BB, DefaultSuccessor)) - MadeChange = true; + MadeChange = true; continue; } } - return MadeChange; + return MadeChange; } static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { @@ -1747,7 +1747,7 @@ static bool runSCCP(Function &F, const DataLayout &DL, LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB); ++NumDeadBlocks; - NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB).first; + NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB).first; MadeChanges = true; continue; @@ -1870,68 +1870,68 @@ static void findReturnsToZap(Function &F, } } -static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, - DomTreeUpdater &DTU) { - SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors; - bool HasNonFeasibleEdges = false; - for (BasicBlock *Succ : successors(BB)) { - if (Solver.isEdgeFeasible(BB, Succ)) - FeasibleSuccessors.insert(Succ); - else - HasNonFeasibleEdges = true; - } - - // All edges feasible, nothing to do. - if (!HasNonFeasibleEdges) - return false; - - // SCCP can only determine non-feasible edges for br, switch and indirectbr. - Instruction *TI = BB->getTerminator(); - assert((isa<BranchInst>(TI) || isa<SwitchInst>(TI) || - isa<IndirectBrInst>(TI)) && - "Terminator must be a br, switch or indirectbr"); - - if (FeasibleSuccessors.size() == 1) { - // Replace with an unconditional branch to the only feasible successor. - BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin(); - SmallVector<DominatorTree::UpdateType, 8> Updates; - bool HaveSeenOnlyFeasibleSuccessor = false; - for (BasicBlock *Succ : successors(BB)) { - if (Succ == OnlyFeasibleSuccessor && !HaveSeenOnlyFeasibleSuccessor) { - // Don't remove the edge to the only feasible successor the first time - // we see it. We still do need to remove any multi-edges to it though. - HaveSeenOnlyFeasibleSuccessor = true; - continue; - } - - Succ->removePredecessor(BB); - Updates.push_back({DominatorTree::Delete, BB, Succ}); +static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, + DomTreeUpdater &DTU) { + SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors; + bool HasNonFeasibleEdges = false; + for (BasicBlock *Succ : successors(BB)) { + if (Solver.isEdgeFeasible(BB, Succ)) + FeasibleSuccessors.insert(Succ); + else + HasNonFeasibleEdges = true; + } + + // All edges feasible, nothing to do. + if (!HasNonFeasibleEdges) + return false; + + // SCCP can only determine non-feasible edges for br, switch and indirectbr. + Instruction *TI = BB->getTerminator(); + assert((isa<BranchInst>(TI) || isa<SwitchInst>(TI) || + isa<IndirectBrInst>(TI)) && + "Terminator must be a br, switch or indirectbr"); + + if (FeasibleSuccessors.size() == 1) { + // Replace with an unconditional branch to the only feasible successor. + BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin(); + SmallVector<DominatorTree::UpdateType, 8> Updates; + bool HaveSeenOnlyFeasibleSuccessor = false; + for (BasicBlock *Succ : successors(BB)) { + if (Succ == OnlyFeasibleSuccessor && !HaveSeenOnlyFeasibleSuccessor) { + // Don't remove the edge to the only feasible successor the first time + // we see it. We still do need to remove any multi-edges to it though. + HaveSeenOnlyFeasibleSuccessor = true; + continue; + } + + Succ->removePredecessor(BB); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } - - BranchInst::Create(OnlyFeasibleSuccessor, BB); - TI->eraseFromParent(); - DTU.applyUpdatesPermissive(Updates); - } else if (FeasibleSuccessors.size() > 1) { - SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI)); - SmallVector<DominatorTree::UpdateType, 8> Updates; - for (auto CI = SI->case_begin(); CI != SI->case_end();) { - if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) { - ++CI; - continue; - } - - BasicBlock *Succ = CI->getCaseSuccessor(); - Succ->removePredecessor(BB); - Updates.push_back({DominatorTree::Delete, BB, Succ}); - SI.removeCase(CI); - // Don't increment CI, as we removed a case. + + BranchInst::Create(OnlyFeasibleSuccessor, BB); + TI->eraseFromParent(); + DTU.applyUpdatesPermissive(Updates); + } else if (FeasibleSuccessors.size() > 1) { + SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI)); + SmallVector<DominatorTree::UpdateType, 8> Updates; + for (auto CI = SI->case_begin(); CI != SI->case_end();) { + if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) { + ++CI; + continue; + } + + BasicBlock *Succ = CI->getCaseSuccessor(); + Succ->removePredecessor(BB); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + SI.removeCase(CI); + // Don't increment CI, as we removed a case. } - - DTU.applyUpdatesPermissive(Updates); + + DTU.applyUpdatesPermissive(Updates); } else { - llvm_unreachable("Must have at least one feasible successor"); + llvm_unreachable("Must have at least one feasible successor"); } - return true; + return true; } bool llvm::runIPSCCP( @@ -1983,12 +1983,12 @@ bool llvm::runIPSCCP( while (ResolvedUndefs) { LLVM_DEBUG(dbgs() << "RESOLVING UNDEFS\n"); ResolvedUndefs = false; - for (Function &F : M) { - if (Solver.ResolvedUndefsIn(F)) + for (Function &F : M) { + if (Solver.ResolvedUndefsIn(F)) ResolvedUndefs = true; - } - if (ResolvedUndefs) - Solver.Solve(); + } + if (ResolvedUndefs) + Solver.Solve(); } bool MadeChanges = false; @@ -2002,35 +2002,35 @@ bool llvm::runIPSCCP( SmallVector<BasicBlock *, 512> BlocksToErase; - if (Solver.isBlockExecutable(&F.front())) { - bool ReplacedPointerArg = false; - for (Argument &Arg : F.args()) { - if (!Arg.use_empty() && tryToReplaceWithConstant(Solver, &Arg)) { - ReplacedPointerArg |= Arg.getType()->isPointerTy(); + if (Solver.isBlockExecutable(&F.front())) { + bool ReplacedPointerArg = false; + for (Argument &Arg : F.args()) { + if (!Arg.use_empty() && tryToReplaceWithConstant(Solver, &Arg)) { + ReplacedPointerArg |= Arg.getType()->isPointerTy(); ++IPNumArgsElimed; } } - // If we replaced an argument, the argmemonly and - // inaccessiblemem_or_argmemonly attributes do not hold any longer. Remove - // them from both the function and callsites. - if (ReplacedPointerArg) { - AttrBuilder AttributesToRemove; - AttributesToRemove.addAttribute(Attribute::ArgMemOnly); - AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); - F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove); - - for (User *U : F.users()) { - auto *CB = dyn_cast<CallBase>(U); - if (!CB || CB->getCalledFunction() != &F) - continue; - - CB->removeAttributes(AttributeList::FunctionIndex, - AttributesToRemove); - } - } - } - + // If we replaced an argument, the argmemonly and + // inaccessiblemem_or_argmemonly attributes do not hold any longer. Remove + // them from both the function and callsites. + if (ReplacedPointerArg) { + AttrBuilder AttributesToRemove; + AttributesToRemove.addAttribute(Attribute::ArgMemOnly); + AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); + F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove); + + for (User *U : F.users()) { + auto *CB = dyn_cast<CallBase>(U); + if (!CB || CB->getCalledFunction() != &F) + continue; + + CB->removeAttributes(AttributeList::FunctionIndex, + AttributesToRemove); + } + } + } + SmallPtrSet<Value *, 32> InsertedValues; for (BasicBlock &BB : F) { if (!Solver.isBlockExecutable(&BB)) { @@ -2063,10 +2063,10 @@ bool llvm::runIPSCCP( /*UseLLVMTrap=*/false, /*PreserveLCSSA=*/false, &DTU); - for (BasicBlock &BB : F) - MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU); + for (BasicBlock &BB : F) + MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU); - for (BasicBlock *DeadBB : BlocksToErase) + for (BasicBlock *DeadBB : BlocksToErase) DTU.deleteBB(DeadBB); for (BasicBlock &BB : F) { @@ -2099,47 +2099,47 @@ bool llvm::runIPSCCP( for (const auto &I : Solver.getTrackedRetVals()) { Function *F = I.first; - const ValueLatticeElement &ReturnValue = I.second; - - // If there is a known constant range for the return value, add !range - // metadata to the function's call sites. - if (ReturnValue.isConstantRange() && - !ReturnValue.getConstantRange().isSingleElement()) { - // Do not add range metadata if the return value may include undef. - if (ReturnValue.isConstantRangeIncludingUndef()) - continue; - - auto &CR = ReturnValue.getConstantRange(); - for (User *User : F->users()) { - auto *CB = dyn_cast<CallBase>(User); - if (!CB || CB->getCalledFunction() != F) - continue; - - // Limit to cases where the return value is guaranteed to be neither - // poison nor undef. Poison will be outside any range and currently - // values outside of the specified range cause immediate undefined - // behavior. - if (!isGuaranteedNotToBeUndefOrPoison(CB, nullptr, CB)) - continue; - - // Do not touch existing metadata for now. - // TODO: We should be able to take the intersection of the existing - // metadata and the inferred range. - if (CB->getMetadata(LLVMContext::MD_range)) - continue; - - LLVMContext &Context = CB->getParent()->getContext(); - Metadata *RangeMD[] = { - ConstantAsMetadata::get(ConstantInt::get(Context, CR.getLower())), - ConstantAsMetadata::get(ConstantInt::get(Context, CR.getUpper()))}; - CB->setMetadata(LLVMContext::MD_range, MDNode::get(Context, RangeMD)); - } + const ValueLatticeElement &ReturnValue = I.second; + + // If there is a known constant range for the return value, add !range + // metadata to the function's call sites. + if (ReturnValue.isConstantRange() && + !ReturnValue.getConstantRange().isSingleElement()) { + // Do not add range metadata if the return value may include undef. + if (ReturnValue.isConstantRangeIncludingUndef()) + continue; + + auto &CR = ReturnValue.getConstantRange(); + for (User *User : F->users()) { + auto *CB = dyn_cast<CallBase>(User); + if (!CB || CB->getCalledFunction() != F) + continue; + + // Limit to cases where the return value is guaranteed to be neither + // poison nor undef. Poison will be outside any range and currently + // values outside of the specified range cause immediate undefined + // behavior. + if (!isGuaranteedNotToBeUndefOrPoison(CB, nullptr, CB)) + continue; + + // Do not touch existing metadata for now. + // TODO: We should be able to take the intersection of the existing + // metadata and the inferred range. + if (CB->getMetadata(LLVMContext::MD_range)) + continue; + + LLVMContext &Context = CB->getParent()->getContext(); + Metadata *RangeMD[] = { + ConstantAsMetadata::get(ConstantInt::get(Context, CR.getLower())), + ConstantAsMetadata::get(ConstantInt::get(Context, CR.getUpper()))}; + CB->setMetadata(LLVMContext::MD_range, MDNode::get(Context, RangeMD)); + } continue; - } - if (F->getReturnType()->isVoidTy()) - continue; - if (isConstant(ReturnValue) || ReturnValue.isUnknownOrUndef()) - findReturnsToZap(*F, ReturnsToZap, Solver); + } + if (F->getReturnType()->isVoidTy()) + continue; + if (isConstant(ReturnValue) || ReturnValue.isUnknownOrUndef()) + findReturnsToZap(*F, ReturnsToZap, Solver); } for (auto F : Solver.getMRVFunctionsTracked()) { @@ -2151,29 +2151,29 @@ bool llvm::runIPSCCP( } // Zap all returns which we've identified as zap to change. - SmallSetVector<Function *, 8> FuncZappedReturn; + SmallSetVector<Function *, 8> FuncZappedReturn; for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) { Function *F = ReturnsToZap[i]->getParent()->getParent(); ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); - // Record all functions that are zapped. - FuncZappedReturn.insert(F); - } - - // Remove the returned attribute for zapped functions and the - // corresponding call sites. - for (Function *F : FuncZappedReturn) { - for (Argument &A : F->args()) - F->removeParamAttr(A.getArgNo(), Attribute::Returned); - for (Use &U : F->uses()) { - // Skip over blockaddr users. - if (isa<BlockAddress>(U.getUser())) - continue; - CallBase *CB = cast<CallBase>(U.getUser()); - for (Use &Arg : CB->args()) - CB->removeParamAttr(CB->getArgOperandNo(&Arg), Attribute::Returned); - } - } - + // Record all functions that are zapped. + FuncZappedReturn.insert(F); + } + + // Remove the returned attribute for zapped functions and the + // corresponding call sites. + for (Function *F : FuncZappedReturn) { + for (Argument &A : F->args()) + F->removeParamAttr(A.getArgNo(), Attribute::Returned); + for (Use &U : F->uses()) { + // Skip over blockaddr users. + if (isa<BlockAddress>(U.getUser())) + continue; + CallBase *CB = cast<CallBase>(U.getUser()); + for (Use &Arg : CB->args()) + CB->removeParamAttr(CB->getArgOperandNo(&Arg), Attribute::Returned); + } + } + // If we inferred constant or undef values for globals variables, we can // delete the global and any stores that remain to it. for (auto &I : make_early_inc_range(Solver.getTrackedGlobals())) { diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp index af510f1a84..587c9e89d3 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp @@ -268,11 +268,11 @@ public: /// Access the dead users for this alloca. ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; } - /// Access Uses that should be dropped if the alloca is promotable. - ArrayRef<Use *> getDeadUsesIfPromotable() const { - return DeadUseIfPromotable; - } - + /// Access Uses that should be dropped if the alloca is promotable. + ArrayRef<Use *> getDeadUsesIfPromotable() const { + return DeadUseIfPromotable; + } + /// Access the dead operands referring to this alloca. /// /// These are operands which have cannot actually be used to refer to the @@ -327,9 +327,9 @@ private: /// they come from outside of the allocated space. SmallVector<Instruction *, 8> DeadUsers; - /// Uses which will become dead if can promote the alloca. - SmallVector<Use *, 8> DeadUseIfPromotable; - + /// Uses which will become dead if can promote the alloca. + SmallVector<Use *, 8> DeadUseIfPromotable; + /// Operands which will become dead if we rewrite the alloca. /// /// These are operands that in their particular use can be replaced with @@ -467,8 +467,8 @@ class AllocaSlices::partition_iterator // Remove the uses which have ended in the prior partition. This // cannot change the max split slice end because we just checked that // the prior partition ended prior to that max. - llvm::erase_if(P.SplitTails, - [&](Slice *S) { return S->endOffset() <= P.EndOffset; }); + llvm::erase_if(P.SplitTails, + [&](Slice *S) { return S->endOffset() <= P.EndOffset; }); assert(llvm::any_of(P.SplitTails, [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; @@ -784,9 +784,9 @@ private: LI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) return PI.setAborted(&LI); - if (isa<ScalableVectorType>(LI.getType())) - return PI.setAborted(&LI); - + if (isa<ScalableVectorType>(LI.getType())) + return PI.setAborted(&LI); + uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize(); return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } @@ -802,9 +802,9 @@ private: SI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) return PI.setAborted(&SI); - if (isa<ScalableVectorType>(ValOp->getType())) - return PI.setAborted(&SI); - + if (isa<ScalableVectorType>(ValOp->getType())) + return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize(); // If this memory access can be shown to *statically* extend outside the @@ -930,11 +930,11 @@ private: // FIXME: What about debug intrinsics? This matches old behavior, but // doesn't make sense. void visitIntrinsicInst(IntrinsicInst &II) { - if (II.isDroppable()) { - AS.DeadUseIfPromotable.push_back(U); - return; - } - + if (II.isDroppable()) { + AS.DeadUseIfPromotable.push_back(U); + return; + } + if (!IsOffsetKnown) return PI.setAborted(&II); @@ -1072,11 +1072,11 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) return; } - llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); }); + llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); }); // Sort the uses. This arranges for the offsets to be in ascending order, // and the sizes to be in descending order. - llvm::stable_sort(Slices); + llvm::stable_sort(Slices); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1122,9 +1122,9 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); } /// Walk the range of a partitioning looking for a common type to cover this /// sequence of slices. -static std::pair<Type *, IntegerType *> -findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, - uint64_t EndOffset) { +static std::pair<Type *, IntegerType *> +findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, + uint64_t EndOffset) { Type *Ty = nullptr; bool TyIsCommon = true; IntegerType *ITy = nullptr; @@ -1168,7 +1168,7 @@ findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, Ty = UserTy; } - return {TyIsCommon ? Ty : nullptr, ITy}; + return {TyIsCommon ? Ty : nullptr, ITy}; } /// PHI instructions that use an alloca and are subsequently loaded can be @@ -1392,8 +1392,8 @@ static void speculateSelectInstLoads(SelectInst &SI) { /// This will return the BasePtr if that is valid, or build a new GEP /// instruction using the IRBuilder if GEP-ing is needed. static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, - SmallVectorImpl<Value *> &Indices, - const Twine &NamePrefix) { + SmallVectorImpl<Value *> &Indices, + const Twine &NamePrefix) { if (Indices.empty()) return BasePtr; @@ -1418,7 +1418,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, Value *BasePtr, Type *Ty, Type *TargetTy, SmallVectorImpl<Value *> &Indices, - const Twine &NamePrefix) { + const Twine &NamePrefix) { if (Ty == TargetTy) return buildGEP(IRB, BasePtr, Indices, NamePrefix); @@ -1463,7 +1463,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, SmallVectorImpl<Value *> &Indices, - const Twine &NamePrefix) { + const Twine &NamePrefix) { if (Offset == 0) return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, NamePrefix); @@ -1538,7 +1538,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *TargetTy, SmallVectorImpl<Value *> &Indices, - const Twine &NamePrefix) { + const Twine &NamePrefix) { PointerType *Ty = cast<PointerType>(Ptr->getType()); // Don't consider any GEPs through an i8* as natural unless the TargetTy is @@ -1549,8 +1549,8 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Type *ElementTy = Ty->getElementType(); if (!ElementTy->isSized()) return nullptr; // We can't GEP through an unsized element. - if (isa<ScalableVectorType>(ElementTy)) - return nullptr; + if (isa<ScalableVectorType>(ElementTy)) + return nullptr; APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy).getFixedSize()); if (ElementSize == 0) @@ -1579,8 +1579,8 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, /// a single GEP as possible, thus making each GEP more independent of the /// surrounding code. static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, - APInt Offset, Type *PointerTy, - const Twine &NamePrefix) { + APInt Offset, Type *PointerTy, + const Twine &NamePrefix) { // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet<Value *, 4> Visited; @@ -1842,7 +1842,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, if (!S.isSplittable()) return false; // Skip any unsplittable intrinsics. } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { - if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) + if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) return false; } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. @@ -1926,9 +1926,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // do that until all the backends are known to produce good code for all // integer vector types. if (!HaveCommonEltTy) { - llvm::erase_if(CandidateTys, [](VectorType *VTy) { - return !VTy->getElementType()->isIntegerTy(); - }); + llvm::erase_if(CandidateTys, [](VectorType *VTy) { + return !VTy->getElementType()->isIntegerTy(); + }); // If there were no integer vector types, give up. if (CandidateTys.empty()) @@ -2072,7 +2072,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (!S.isSplittable()) return false; // Skip any unsplittable intrinsics. } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { - if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) + if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) return false; } else { return false; @@ -2113,7 +2113,7 @@ static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, // that we cover the alloca. // FIXME: We shouldn't consider split slices that happen to start in the // partition here... - bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits); + bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits); for (const Slice &S : P) if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL, @@ -2206,7 +2206,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, Mask.reserve(NumElements); for (unsigned i = BeginIndex; i != EndIndex; ++i) Mask.push_back(i); - V = IRB.CreateShuffleVector(V, Mask, Name + ".extract"); + V = IRB.CreateShuffleVector(V, Mask, Name + ".extract"); LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); return V; } @@ -2239,22 +2239,22 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, // use a shuffle vector to widen it with undef elements, and then // a second shuffle vector to select between the loaded vector and the // incoming vector. - SmallVector<int, 8> Mask; + SmallVector<int, 8> Mask; Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements()); for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) if (i >= BeginIndex && i < EndIndex) - Mask.push_back(i - BeginIndex); + Mask.push_back(i - BeginIndex); else - Mask.push_back(-1); - V = IRB.CreateShuffleVector(V, Mask, Name + ".expand"); + Mask.push_back(-1); + V = IRB.CreateShuffleVector(V, Mask, Name + ".expand"); LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); - SmallVector<Constant *, 8> Mask2; - Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements()); + SmallVector<Constant *, 8> Mask2; + Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements()); for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) - Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); + Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); - V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend"); + V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend"); LLVM_DEBUG(dbgs() << " blend: " << *V << "\n"); return V; @@ -2458,7 +2458,7 @@ private: void deleteIfTriviallyDead(Value *V) { Instruction *I = cast<Instruction>(V); if (isInstructionTriviallyDead(I)) - Pass.DeadInsts.push_back(I); + Pass.DeadInsts.push_back(I); } Value *rewriteVectorizedLoadInst() { @@ -2524,7 +2524,7 @@ private: NewAI.getAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); if (NewLI->isAtomic()) @@ -2563,7 +2563,7 @@ private: IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); @@ -2598,7 +2598,7 @@ private: LI.replaceAllUsesWith(V); } - Pass.DeadInsts.push_back(&LI); + Pass.DeadInsts.push_back(&LI); deleteIfTriviallyDead(OldOp); LLVM_DEBUG(dbgs() << " to: " << *V << "\n"); return !LI.isVolatile() && !IsPtrAdjusted; @@ -2626,8 +2626,8 @@ private: } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - Pass.DeadInsts.push_back(&SI); + Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2650,8 +2650,8 @@ private: Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - Pass.DeadInsts.push_back(&SI); + Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } @@ -2720,12 +2720,12 @@ private: NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) NewSI->setAlignment(SI.getAlign()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.push_back(&SI); deleteIfTriviallyDead(OldOp); LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n"); @@ -2786,11 +2786,11 @@ private: } // Record this instruction for deletion. - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.push_back(&II); Type *AllocaTy = NewAI.getAllocatedType(); Type *ScalarTy = AllocaTy->getScalarType(); - + const bool CanContinue = [&]() { if (VecTy || IntTy) return true; @@ -2816,7 +2816,7 @@ private: getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, MaybeAlign(getSliceAlign()), II.isVolatile()); if (AATags) - New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -2885,7 +2885,7 @@ private: StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile()); if (AATags) - New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); } @@ -2956,7 +2956,7 @@ private: return false; } // Record this instruction for deletion. - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.push_back(&II); // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. @@ -3006,7 +3006,7 @@ private: CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign, Size, II.isVolatile()); if (AATags) - New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -3060,7 +3060,7 @@ private: LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign, II.isVolatile(), "copyload"); if (AATags) - Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Src = Load; } @@ -3080,27 +3080,27 @@ private: StoreInst *Store = cast<StoreInst>( IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); } bool visitIntrinsicInst(IntrinsicInst &II) { - assert((II.isLifetimeStartOrEnd() || II.isDroppable()) && - "Unexpected intrinsic!"); + assert((II.isLifetimeStartOrEnd() || II.isDroppable()) && + "Unexpected intrinsic!"); LLVM_DEBUG(dbgs() << " original: " << II << "\n"); // Record this instruction for deletion. - Pass.DeadInsts.push_back(&II); - - if (II.isDroppable()) { - assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume"); - // TODO For now we forget assumed information, this can be improved. - OldPtr->dropDroppableUsesIn(II); - return true; - } - - assert(II.getArgOperand(1) == OldPtr); + Pass.DeadInsts.push_back(&II); + + if (II.isDroppable()) { + assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume"); + // TODO For now we forget assumed information, this can be improved. + OldPtr->dropDroppableUsesIn(II); + return true; + } + + assert(II.getArgOperand(1) == OldPtr); // Lifetime intrinsics are only promotable if they cover the whole alloca. // Therefore, we drop lifetime intrinsics which don't cover the whole // alloca. @@ -3381,13 +3381,13 @@ private: IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load"); - - APInt Offset( - DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); - if (AATags && - GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) - Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); - + + APInt Offset( + DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); + if (AATags && + GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) + Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); + Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } @@ -3433,13 +3433,13 @@ private: IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); StoreInst *Store = IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment); - - APInt Offset( - DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); - if (AATags && - GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) - Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); - + + APInt Offset( + DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); + if (AATags && + GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) + Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); + LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); } }; @@ -3485,7 +3485,7 @@ private: << "\n " << GEPI); IRBuilderTy Builder(&GEPI); - SmallVector<Value *, 4> Index(GEPI.indices()); + SmallVector<Value *, 4> Index(GEPI.indices()); bool IsInBounds = GEPI.isInBounds(); Value *True = Sel->getTrueValue(); @@ -3539,27 +3539,27 @@ private: << "\n " << GEPI << "\n to: "); - SmallVector<Value *, 4> Index(GEPI.indices()); + SmallVector<Value *, 4> Index(GEPI.indices()); bool IsInBounds = GEPI.isInBounds(); IRBuilderTy PHIBuilder(GEPI.getParent()->getFirstNonPHI()); PHINode *NewPN = PHIBuilder.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(), PHI->getName() + ".sroa.phi"); for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) { - BasicBlock *B = PHI->getIncomingBlock(I); - Value *NewVal = nullptr; - int Idx = NewPN->getBasicBlockIndex(B); - if (Idx >= 0) { - NewVal = NewPN->getIncomingValue(Idx); - } else { - Instruction *In = cast<Instruction>(PHI->getIncomingValue(I)); - - IRBuilderTy B(In->getParent(), std::next(In->getIterator())); - NewVal = IsInBounds - ? B.CreateInBoundsGEP(In, Index, In->getName() + ".sroa.gep") - : B.CreateGEP(In, Index, In->getName() + ".sroa.gep"); - } - NewPN->addIncoming(NewVal, B); + BasicBlock *B = PHI->getIncomingBlock(I); + Value *NewVal = nullptr; + int Idx = NewPN->getBasicBlockIndex(B); + if (Idx >= 0) { + NewVal = NewPN->getIncomingValue(Idx); + } else { + Instruction *In = cast<Instruction>(PHI->getIncomingValue(I)); + + IRBuilderTy B(In->getParent(), std::next(In->getIterator())); + NewVal = IsInBounds + ? B.CreateInBoundsGEP(In, Index, In->getName() + ".sroa.gep") + : B.CreateGEP(In, Index, In->getName() + ".sroa.gep"); + } + NewPN->addIncoming(NewVal, B); } Visited.erase(&GEPI); @@ -3901,53 +3901,53 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // such loads and stores, we can only pre-split them if their splits exactly // match relative to their starting offset. We have to verify this prior to // any rewriting. - llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { - // Lookup the load we are storing in our map of split - // offsets. - auto *LI = cast<LoadInst>(SI->getValueOperand()); - // If it was completely unsplittable, then we're done, - // and this store can't be pre-split. - if (UnsplittableLoads.count(LI)) - return true; - - auto LoadOffsetsI = SplitOffsetsMap.find(LI); - if (LoadOffsetsI == SplitOffsetsMap.end()) - return false; // Unrelated loads are definitely safe. - auto &LoadOffsets = LoadOffsetsI->second; - - // Now lookup the store's offsets. - auto &StoreOffsets = SplitOffsetsMap[SI]; - - // If the relative offsets of each split in the load and - // store match exactly, then we can split them and we - // don't need to remove them here. - if (LoadOffsets.Splits == StoreOffsets.Splits) - return false; - - LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n" - << " " << *LI << "\n" - << " " << *SI << "\n"); - - // We've found a store and load that we need to split - // with mismatched relative splits. Just give up on them - // and remove both instructions from our list of - // candidates. - UnsplittableLoads.insert(LI); - return true; - }); + llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { + // Lookup the load we are storing in our map of split + // offsets. + auto *LI = cast<LoadInst>(SI->getValueOperand()); + // If it was completely unsplittable, then we're done, + // and this store can't be pre-split. + if (UnsplittableLoads.count(LI)) + return true; + + auto LoadOffsetsI = SplitOffsetsMap.find(LI); + if (LoadOffsetsI == SplitOffsetsMap.end()) + return false; // Unrelated loads are definitely safe. + auto &LoadOffsets = LoadOffsetsI->second; + + // Now lookup the store's offsets. + auto &StoreOffsets = SplitOffsetsMap[SI]; + + // If the relative offsets of each split in the load and + // store match exactly, then we can split them and we + // don't need to remove them here. + if (LoadOffsets.Splits == StoreOffsets.Splits) + return false; + + LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n" + << " " << *LI << "\n" + << " " << *SI << "\n"); + + // We've found a store and load that we need to split + // with mismatched relative splits. Just give up on them + // and remove both instructions from our list of + // candidates. + UnsplittableLoads.insert(LI); + return true; + }); // Now we have to go *back* through all the stores, because a later store may // have caused an earlier store's load to become unsplittable and if it is // unsplittable for the later store, then we can't rely on it being split in // the earlier store either. - llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) { - auto *LI = cast<LoadInst>(SI->getValueOperand()); - return UnsplittableLoads.count(LI); - }); + llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) { + auto *LI = cast<LoadInst>(SI->getValueOperand()); + return UnsplittableLoads.count(LI); + }); // Once we've established all the loads that can't be split for some reason, // filter any that made it into our list out. - llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) { - return UnsplittableLoads.count(LI); - }); + llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) { + return UnsplittableLoads.count(LI); + }); // If no loads or stores are left, there is no pre-splitting to be done for // this alloca. @@ -4084,7 +4084,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { } // Mark the original store as dead. - DeadInsts.push_back(SI); + DeadInsts.push_back(SI); } // Save the split loads if there are deferred stores among the users. @@ -4092,7 +4092,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads))); // Mark the original load as dead and kill the original slice. - DeadInsts.push_back(LI); + DeadInsts.push_back(LI); Offsets.S->kill(); } @@ -4214,14 +4214,14 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // trivial CSE, including instcombine. if (LI->hasOneUse()) { assert(*LI->user_begin() == SI && "Single use isn't this store!"); - DeadInsts.push_back(LI); + DeadInsts.push_back(LI); } - DeadInsts.push_back(SI); + DeadInsts.push_back(SI); Offsets.S->kill(); } // Remove the killed slices that have ben pre-split. - llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); }); + llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); }); // Insert our new slices. This will sort and merge them into the sorted // sequence. @@ -4235,9 +4235,9 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // Finally, don't try to promote any allocas that new require re-splitting. // They have already been added to the worklist above. - llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) { - return ResplitPromotableAllocas.count(AI); - }); + llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) { + return ResplitPromotableAllocas.count(AI); + }); return true; } @@ -4259,21 +4259,21 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // or an i8 array of an appropriate size. Type *SliceTy = nullptr; const DataLayout &DL = AI.getModule()->getDataLayout(); - std::pair<Type *, IntegerType *> CommonUseTy = - findCommonType(P.begin(), P.end(), P.endOffset()); - // Do all uses operate on the same type? - if (CommonUseTy.first) - if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size()) - SliceTy = CommonUseTy.first; - // If not, can we find an appropriate subtype in the original allocated type? + std::pair<Type *, IntegerType *> CommonUseTy = + findCommonType(P.begin(), P.end(), P.endOffset()); + // Do all uses operate on the same type? + if (CommonUseTy.first) + if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size()) + SliceTy = CommonUseTy.first; + // If not, can we find an appropriate subtype in the original allocated type? if (!SliceTy) if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), P.beginOffset(), P.size())) SliceTy = TypePartitionTy; - // If still not, can we use the largest bitwidth integer type used? - if (!SliceTy && CommonUseTy.second) - if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size()) - SliceTy = CommonUseTy.second; + // If still not, can we use the largest bitwidth integer type used? + if (!SliceTy && CommonUseTy.second) + if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size()) + SliceTy = CommonUseTy.second; if ((!SliceTy || (SliceTy->isArrayTy() && SliceTy->getArrayElementType()->isIntegerTy())) && DL.isLegalInteger(P.size() * 8)) @@ -4363,13 +4363,13 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, } if (Promotable) { - for (Use *U : AS.getDeadUsesIfPromotable()) { - auto *OldInst = dyn_cast<Instruction>(U->get()); - Value::dropDroppableUse(*U); - if (OldInst) - if (isInstructionTriviallyDead(OldInst)) - DeadInsts.push_back(OldInst); - } + for (Use *U : AS.getDeadUsesIfPromotable()) { + auto *OldInst = dyn_cast<Instruction>(U->get()); + Value::dropDroppableUse(*U); + if (OldInst) + if (isInstructionTriviallyDead(OldInst)) + DeadInsts.push_back(OldInst); + } if (PHIUsers.empty() && SelectUsers.empty()) { // Promote the alloca. PromotableAllocas.push_back(NewAI); @@ -4504,8 +4504,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // Migrate debug information from the old alloca to the new alloca(s) // and the individual partitions. TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI); - for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) { - auto *Expr = DbgDeclare->getExpression(); + for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) { + auto *Expr = DbgDeclare->getExpression(); DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false); uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize(); @@ -4536,7 +4536,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } // The alloca may be larger than the variable. - auto VarSize = DbgDeclare->getVariable()->getSizeInBits(); + auto VarSize = DbgDeclare->getVariable()->getSizeInBits(); if (VarSize) { if (Size > *VarSize) Size = *VarSize; @@ -4554,21 +4554,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } } - // Remove any existing intrinsics on the new alloca describing - // the variable fragment. - for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) { - auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS, - const DbgVariableIntrinsic *RHS) { - return LHS->getVariable() == RHS->getVariable() && - LHS->getDebugLoc()->getInlinedAt() == - RHS->getDebugLoc()->getInlinedAt(); - }; - if (SameVariableFragment(OldDII, DbgDeclare)) - OldDII->eraseFromParent(); - } - - DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr, - DbgDeclare->getDebugLoc(), &AI); + // Remove any existing intrinsics on the new alloca describing + // the variable fragment. + for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) { + auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS, + const DbgVariableIntrinsic *RHS) { + return LHS->getVariable() == RHS->getVariable() && + LHS->getDebugLoc()->getInlinedAt() == + RHS->getDebugLoc()->getInlinedAt(); + }; + if (SameVariableFragment(OldDII, DbgDeclare)) + OldDII->eraseFromParent(); + } + + DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr, + DbgDeclare->getDebugLoc(), &AI); } } return Changed; @@ -4585,7 +4585,7 @@ void SROA::clobberUse(Use &U) { // minimal. if (Instruction *OldI = dyn_cast<Instruction>(OldV)) if (isInstructionTriviallyDead(OldI)) { - DeadInsts.push_back(OldI); + DeadInsts.push_back(OldI); } } @@ -4634,7 +4634,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType())); // And mark it for deletion. - DeadInsts.push_back(DeadUser); + DeadInsts.push_back(DeadUser); Changed = true; } for (Use *DeadOp : AS.getDeadOperands()) { @@ -4672,8 +4672,8 @@ bool SROA::deleteDeadInstructions( SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) { bool Changed = false; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); - if (!I) continue; + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); + if (!I) continue; LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); // If the instruction is an alloca, find the possible dbg.declare connected @@ -4692,7 +4692,7 @@ bool SROA::deleteDeadInstructions( // Zero out the operand and see if it becomes trivially dead. Operand = nullptr; if (isInstructionTriviallyDead(U)) - DeadInsts.push_back(U); + DeadInsts.push_back(U); } ++NumDeleted; @@ -4755,7 +4755,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); }; Worklist.remove_if(IsInSet); PostPromotionWorklist.remove_if(IsInSet); - llvm::erase_if(PromotableAllocas, IsInSet); + llvm::erase_if(PromotableAllocas, IsInSet); DeletedAllocas.clear(); } } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp index dba3dba24e..c897888295 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp @@ -34,12 +34,12 @@ using namespace llvm; /// ScalarOpts library. void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCELegacyPassPass(Registry); - initializeAnnotationRemarksLegacyPass(Registry); + initializeAnnotationRemarksLegacyPass(Registry); initializeBDCELegacyPassPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); initializeCallSiteSplittingLegacyPassPass(Registry); initializeConstantHoistingLegacyPassPass(Registry); - initializeConstraintEliminationPass(Registry); + initializeConstraintEliminationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); initializeDCELegacyPassPass(Registry); initializeDivRemPairsLegacyPassPass(Registry); @@ -67,24 +67,24 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopDeletionLegacyPassPass(Registry); initializeLoopAccessLegacyAnalysisPass(Registry); initializeLoopInstSimplifyLegacyPassPass(Registry); - initializeLoopInterchangeLegacyPassPass(Registry); - initializeLoopFlattenLegacyPassPass(Registry); + initializeLoopInterchangeLegacyPassPass(Registry); + initializeLoopFlattenLegacyPassPass(Registry); initializeLoopPredicationLegacyPassPass(Registry); initializeLoopRotateLegacyPassPass(Registry); initializeLoopStrengthReducePass(Registry); - initializeLoopRerollLegacyPassPass(Registry); + initializeLoopRerollLegacyPassPass(Registry); initializeLoopUnrollPass(Registry); initializeLoopUnrollAndJamPass(Registry); initializeLoopUnswitchPass(Registry); initializeWarnMissedTransformationsLegacyPass(Registry); - initializeLoopVersioningLICMLegacyPassPass(Registry); + initializeLoopVersioningLICMLegacyPassPass(Registry); initializeLoopIdiomRecognizeLegacyPassPass(Registry); initializeLowerAtomicLegacyPassPass(Registry); initializeLowerConstantIntrinsicsPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeLowerGuardIntrinsicLegacyPassPass(Registry); initializeLowerMatrixIntrinsicsLegacyPassPass(Registry); - initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(Registry); + initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(Registry); initializeLowerWidenableConditionLegacyPassPass(Registry); initializeMemCpyOptLegacyPassPass(Registry); initializeMergeICmpsLegacyPassPass(Registry); @@ -93,26 +93,26 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializePartiallyInlineLibCallsLegacyPassPass(Registry); initializeReassociateLegacyPassPass(Registry); initializeRedundantDbgInstEliminationPass(Registry); - initializeRegToMemLegacyPass(Registry); + initializeRegToMemLegacyPass(Registry); initializeRewriteStatepointsForGCLegacyPassPass(Registry); - initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); + initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSCCPLegacyPassPass(Registry); initializeSROALegacyPassPass(Registry); initializeCFGSimplifyPassPass(Registry); - initializeStructurizeCFGLegacyPassPass(Registry); + initializeStructurizeCFGLegacyPassPass(Registry); initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); - initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry); + initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry); initializeSpeculativeExecutionLegacyPassPass(Registry); - initializeStraightLineStrengthReduceLegacyPassPass(Registry); + initializeStraightLineStrengthReduceLegacyPassPass(Registry); initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceSafepointsPass(Registry); initializeFloat2IntLegacyPassPass(Registry); initializeLoopDistributeLegacyPass(Registry); initializeLoopLoadEliminationPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); - initializeLoopVersioningLegacyPassPass(Registry); + initializeLoopVersioningLegacyPassPass(Registry); initializeEntryExitInstrumenterPass(Registry); initializePostInlineEntryExitInstrumenterPass(Registry); } @@ -142,7 +142,7 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) { } void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCFGSimplificationPass()); + unwrap(PM)->add(createCFGSimplificationPass()); } void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { @@ -169,10 +169,10 @@ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createIndVarSimplifyPass()); } -void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createInstSimplifyLegacyPass()); -} - +void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createInstSimplifyLegacyPass()); +} + void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createJumpThreadingPass()); } @@ -189,10 +189,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopDeletionPass()); } -void LLVMAddLoopFlattenPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLoopFlattenPass()); -} - +void LLVMAddLoopFlattenPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopFlattenPass()); +} + void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopIdiomPass()); } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp index afa2d1bc79..c8da464a3b 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp @@ -1,948 +1,948 @@ -//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// -// instrinsics -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass replaces masked memory intrinsics - when unsupported by the target -// - with a chain of basic blocks, that deal with the elements one-by-one if the -// appropriate mask bit is set. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Transforms/Scalar.h" -#include <algorithm> -#include <cassert> - -using namespace llvm; - -#define DEBUG_TYPE "scalarize-masked-mem-intrin" - -namespace { - -class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - - explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) { - initializeScalarizeMaskedMemIntrinLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - StringRef getPassName() const override { - return "Scalarize Masked Memory Intrinsics"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetTransformInfoWrapperPass>(); - } -}; - -} // end anonymous namespace - -static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, - const TargetTransformInfo &TTI, const DataLayout &DL); -static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, - const TargetTransformInfo &TTI, - const DataLayout &DL); - -char ScalarizeMaskedMemIntrinLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, - "Scalarize unsupported masked memory intrinsics", false, - false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, - "Scalarize unsupported masked memory intrinsics", false, - false) - -FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() { - return new ScalarizeMaskedMemIntrinLegacyPass(); -} - -static bool isConstantIntVector(Value *Mask) { - Constant *C = dyn_cast<Constant>(Mask); - if (!C) - return false; - - unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *CElt = C->getAggregateElement(i); - if (!CElt || !isa<ConstantInt>(CElt)) - return false; - } - - return true; -} - -// Translate a masked load intrinsic like -// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, -// <16 x i1> %mask, <16 x i32> %passthru) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// br i1 %2, label %cond.load, label %else -// -// cond.load: ; preds = %0 -// %3 = getelementptr i32* %1, i32 0 -// %4 = load i32* %3 -// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 -// br label %else -// -// else: ; preds = %0, %cond.load -// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ] -// %6 = extractelement <16 x i1> %mask, i32 1 -// br i1 %6, label %cond.load1, label %else2 -// -// cond.load1: ; preds = %else -// %7 = getelementptr i32* %1, i32 1 -// %8 = load i32* %7 -// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 -// br label %else2 -// -// else2: ; preds = %else, %cond.load1 -// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] -// %10 = extractelement <16 x i1> %mask, i32 2 -// br i1 %10, label %cond.load4, label %else5 -// -static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { - Value *Ptr = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); - VectorType *VecType = cast<FixedVectorType>(CI->getType()); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { - Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - const Align AdjustedAlignVal = - commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); - // Bitcast %addr from i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); - - // The result vector - Value *VResult = Src0; - - if (isConstantIntVector(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, Idx); - } - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = and i16 %scalar_mask, i32 1 << Idx - // %cond = icmp ne i16 %mask_1, 0 - // br i1 %mask_1, label %cond.load, label %else - // - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx); - } - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), - "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); - Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - BasicBlock *PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - - // Create the phi to join the new and previous value. - PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(NewVResult, CondBlock); - Phi->addIncoming(VResult, PrevIfBlock); - VResult = Phi; - } - - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - - ModifiedDT = true; -} - -// Translate a masked store intrinsic, like -// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, -// <16 x i1> %mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// br i1 %2, label %cond.store, label %else -// -// cond.store: ; preds = %0 -// %3 = extractelement <16 x i32> %val, i32 0 -// %4 = getelementptr i32* %1, i32 0 -// store i32 %3, i32* %4 -// br label %else -// -// else: ; preds = %0, %cond.store -// %5 = extractelement <16 x i1> %mask, i32 1 -// br i1 %5, label %cond.store1, label %else2 -// -// cond.store1: ; preds = %else -// %6 = extractelement <16 x i32> %val, i32 1 -// %7 = getelementptr i32* %1, i32 1 -// store i32 %6, i32* %7 -// br label %else2 -// . . . -static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { - Value *Src = CI->getArgOperand(0); - Value *Ptr = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); - auto *VecType = cast<VectorType>(Src->getType()); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { - Builder.CreateAlignedStore(Src, Ptr, AlignVal); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - const Align AdjustedAlignVal = - commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); - // Bitcast %addr from i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); - - if (isConstantIntVector(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *OneElt = Builder.CreateExtractElement(Src, Idx); - Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); - } - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %mask_1 = and i16 %scalar_mask, i32 1 << Idx - // %cond = icmp ne i16 %mask_1, 0 - // br i1 %mask_1, label %cond.store, label %else - // - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx); - } - - // Create "cond" block - // - // %OneElt = extractelement <16 x i32> %Src, i32 Idx - // %EltAddr = getelementptr i32* %1, i32 0 - // %store i32 %OneElt, i32* %EltAddr - // - BasicBlock *CondBlock = - IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Idx); - Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); - - ModifiedDT = true; -} - -// Translate a masked gather intrinsic like -// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, -// <16 x i1> %Mask, <16 x i32> %Src) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind -// %Mask0 = extractelement <16 x i1> %Mask, i32 0 -// br i1 %Mask0, label %cond.load, label %else -// -// cond.load: -// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// %Load0 = load i32, i32* %Ptr0, align 4 -// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0 -// br label %else -// -// else: -// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0] -// %Mask1 = extractelement <16 x i1> %Mask, i32 1 -// br i1 %Mask1, label %cond.load1, label %else2 -// -// cond.load1: -// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// %Load1 = load i32, i32* %Ptr1, align 4 -// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 -// br label %else2 -// . . . -// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src -// ret <16 x i32> %Result -static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { - Value *Ptrs = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - auto *VecType = cast<FixedVectorType>(CI->getType()); - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); - - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // The result vector - Value *VResult = Src0; - unsigned VectorWidth = VecType->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - if (isConstantIntVector(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); - LoadInst *Load = - Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); - VResult = - Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); - } - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %Mask1 = and i16 %scalar_mask, i32 1 << Idx - // %cond = icmp ne i16 %mask_1, 0 - // br i1 %Mask1, label %cond.load, label %else - // - - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); - } - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); - LoadInst *Load = - Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); - Value *NewVResult = - Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - BasicBlock *PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - - PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(NewVResult, CondBlock); - Phi->addIncoming(VResult, PrevIfBlock); - VResult = Phi; - } - - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - - ModifiedDT = true; -} - -// Translate a masked scatter intrinsic, like -// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, -// <16 x i1> %Mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set. -// -// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind -// %Mask0 = extractelement <16 x i1> %Mask, i32 0 -// br i1 %Mask0, label %cond.store, label %else -// -// cond.store: -// %Elt0 = extractelement <16 x i32> %Src, i32 0 -// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// store i32 %Elt0, i32* %Ptr0, align 4 -// br label %else -// -// else: -// %Mask1 = extractelement <16 x i1> %Mask, i32 1 -// br i1 %Mask1, label %cond.store1, label %else2 -// -// cond.store1: -// %Elt1 = extractelement <16 x i32> %Src, i32 1 -// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// store i32 %Elt1, i32* %Ptr1, align 4 -// br label %else2 -// . . . -static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { - Value *Src = CI->getArgOperand(0); - Value *Ptrs = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); - - assert( - isa<VectorType>(Ptrs->getType()) && - isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) && - "Vector of pointers is expected in masked scatter intrinsic"); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); - unsigned VectorWidth = SrcFVTy->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - if (isConstantIntVector(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *OneElt = - Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - } - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %Mask1 = and i16 %scalar_mask, i32 1 << Idx - // %cond = icmp ne i16 %mask_1, 0 - // br i1 %Mask1, label %cond.store, label %else - // - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); - } - - // Create "cond" block - // - // %Elt1 = extractelement <16 x i32> %Src, i32 1 - // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 - // %store i32 %Elt1, i32* %Ptr1 - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); - - ModifiedDT = true; -} - -static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { - Value *Ptr = CI->getArgOperand(0); - Value *Mask = CI->getArgOperand(1); - Value *PassThru = CI->getArgOperand(2); - - auto *VecType = cast<FixedVectorType>(CI->getType()); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - unsigned VectorWidth = VecType->getNumElements(); - - // The result vector - Value *VResult = PassThru; - - // Shorten the way if the mask is a vector of constants. - // Create a build_vector pattern, with loads/undefs as necessary and then - // shuffle blend with the pass through value. - if (isConstantIntVector(Mask)) { - unsigned MemIndex = 0; - VResult = UndefValue::get(VecType); - SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem); - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - Value *InsertElt; - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) { - InsertElt = UndefValue::get(EltTy); - ShuffleMask[Idx] = Idx + VectorWidth; - } else { - Value *NewPtr = - Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); - InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1), - "Load" + Twine(Idx)); - ShuffleMask[Idx] = Idx; - ++MemIndex; - } - VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx, - "Res" + Twine(Idx)); - } - VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask); - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // br i1 %mask_1, label %cond.load, label %else - // - - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); - } - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), - "cond.load"); - Builder.SetInsertPoint(InsertPt); - - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1)); - Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); - - // Move the pointer if there are more blocks to come. - Value *NewPtr; - if ((Idx + 1) != VectorWidth) - NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - BasicBlock *PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - - // Create the phi to join the new and previous value. - PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - ResultPhi->addIncoming(NewVResult, CondBlock); - ResultPhi->addIncoming(VResult, PrevIfBlock); - VResult = ResultPhi; - - // Add a PHI for the pointer if this isn't the last iteration. - if ((Idx + 1) != VectorWidth) { - PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); - PtrPhi->addIncoming(NewPtr, CondBlock); - PtrPhi->addIncoming(Ptr, PrevIfBlock); - Ptr = PtrPhi; - } - } - - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - - ModifiedDT = true; -} - -static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { - Value *Src = CI->getArgOperand(0); - Value *Ptr = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - - auto *VecType = cast<FixedVectorType>(Src->getType()); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - Type *EltTy = VecType->getElementType(); - - unsigned VectorWidth = VecType->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - if (isConstantIntVector(Mask)) { - unsigned MemIndex = 0; - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *OneElt = - Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); - Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); - Builder.CreateAlignedStore(OneElt, NewPtr, Align(1)); - ++MemIndex; - } - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // br i1 %mask_1, label %cond.store, label %else - // - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); - } - - // Create "cond" block - // - // %OneElt = extractelement <16 x i32> %Src, i32 Idx - // %EltAddr = getelementptr i32* %1, i32 0 - // %store i32 %OneElt, i32* %EltAddr - // - BasicBlock *CondBlock = - IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Idx); - Builder.CreateAlignedStore(OneElt, Ptr, Align(1)); - - // Move the pointer if there are more blocks to come. - Value *NewPtr; - if ((Idx + 1) != VectorWidth) - NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - BasicBlock *PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - - // Add a PHI for the pointer if this isn't the last iteration. - if ((Idx + 1) != VectorWidth) { - PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); - PtrPhi->addIncoming(NewPtr, CondBlock); - PtrPhi->addIncoming(Ptr, PrevIfBlock); - Ptr = PtrPhi; - } - } - CI->eraseFromParent(); - - ModifiedDT = true; -} - -static bool runImpl(Function &F, const TargetTransformInfo &TTI) { - bool EverMadeChange = false; - bool MadeChange = true; - auto &DL = F.getParent()->getDataLayout(); - while (MadeChange) { - MadeChange = false; - for (Function::iterator I = F.begin(); I != F.end();) { - BasicBlock *BB = &*I++; - bool ModifiedDTOnIteration = false; - MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL); - - // Restart BB iteration if the dominator tree of the Function was changed - if (ModifiedDTOnIteration) - break; - } - - EverMadeChange |= MadeChange; - } - return EverMadeChange; -} - -bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) { - auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - return runImpl(F, TTI); -} - -PreservedAnalyses -ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &TTI = AM.getResult<TargetIRAnalysis>(F); - if (!runImpl(F, TTI)) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<TargetIRAnalysis>(); - return PA; -} - -static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, - const TargetTransformInfo &TTI, - const DataLayout &DL) { - bool MadeChange = false; - - BasicBlock::iterator CurInstIterator = BB.begin(); - while (CurInstIterator != BB.end()) { - if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++)) - MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL); - if (ModifiedDT) - return true; - } - - return MadeChange; -} - -static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, - const TargetTransformInfo &TTI, - const DataLayout &DL) { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); - if (II) { - // The scalarization code below does not work for scalable vectors. - if (isa<ScalableVectorType>(II->getType()) || - any_of(II->arg_operands(), - [](Value *V) { return isa<ScalableVectorType>(V->getType()); })) - return false; - - switch (II->getIntrinsicID()) { - default: - break; - case Intrinsic::masked_load: - // Scalarize unsupported vector masked load - if (TTI.isLegalMaskedLoad( - CI->getType(), - cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue())) - return false; - scalarizeMaskedLoad(CI, ModifiedDT); - return true; - case Intrinsic::masked_store: - if (TTI.isLegalMaskedStore( - CI->getArgOperand(0)->getType(), - cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue())) - return false; - scalarizeMaskedStore(CI, ModifiedDT); - return true; - case Intrinsic::masked_gather: { - unsigned AlignmentInt = - cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); - Type *LoadTy = CI->getType(); - Align Alignment = - DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); - if (TTI.isLegalMaskedGather(LoadTy, Alignment)) - return false; - scalarizeMaskedGather(CI, ModifiedDT); - return true; - } - case Intrinsic::masked_scatter: { - unsigned AlignmentInt = - cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); - Type *StoreTy = CI->getArgOperand(0)->getType(); - Align Alignment = - DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); - if (TTI.isLegalMaskedScatter(StoreTy, Alignment)) - return false; - scalarizeMaskedScatter(CI, ModifiedDT); - return true; - } - case Intrinsic::masked_expandload: - if (TTI.isLegalMaskedExpandLoad(CI->getType())) - return false; - scalarizeMaskedExpandLoad(CI, ModifiedDT); - return true; - case Intrinsic::masked_compressstore: - if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) - return false; - scalarizeMaskedCompressStore(CI, ModifiedDT); - return true; - } - } - - return false; -} +//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// +// instrinsics +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replaces masked memory intrinsics - when unsupported by the target +// - with a chain of basic blocks, that deal with the elements one-by-one if the +// appropriate mask bit is set. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Transforms/Scalar.h" +#include <algorithm> +#include <cassert> + +using namespace llvm; + +#define DEBUG_TYPE "scalarize-masked-mem-intrin" + +namespace { + +class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + + explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) { + initializeScalarizeMaskedMemIntrinLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Scalarize Masked Memory Intrinsics"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetTransformInfoWrapperPass>(); + } +}; + +} // end anonymous namespace + +static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, + const TargetTransformInfo &TTI, const DataLayout &DL); +static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, + const TargetTransformInfo &TTI, + const DataLayout &DL); + +char ScalarizeMaskedMemIntrinLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, + "Scalarize unsupported masked memory intrinsics", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, + "Scalarize unsupported masked memory intrinsics", false, + false) + +FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() { + return new ScalarizeMaskedMemIntrinLegacyPass(); +} + +static bool isConstantIntVector(Value *Mask) { + Constant *C = dyn_cast<Constant>(Mask); + if (!C) + return false; + + unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *CElt = C->getAggregateElement(i); + if (!CElt || !isa<ConstantInt>(CElt)) + return false; + } + + return true; +} + +// Translate a masked load intrinsic like +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i1> %mask, <16 x i32> %passthru) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// br i1 %2, label %cond.load, label %else +// +// cond.load: ; preds = %0 +// %3 = getelementptr i32* %1, i32 0 +// %4 = load i32* %3 +// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 +// br label %else +// +// else: ; preds = %0, %cond.load +// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ] +// %6 = extractelement <16 x i1> %mask, i32 1 +// br i1 %6, label %cond.load1, label %else2 +// +// cond.load1: ; preds = %else +// %7 = getelementptr i32* %1, i32 1 +// %8 = load i32* %7 +// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 +// br label %else2 +// +// else2: ; preds = %else, %cond.load1 +// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] +// %10 = extractelement <16 x i1> %mask, i32 2 +// br i1 %10, label %cond.load4, label %else5 +// +static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { + Value *Ptr = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); + VectorType *VecType = cast<FixedVectorType>(CI->getType()); + + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { + Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + const Align AdjustedAlignVal = + commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); + // Bitcast %addr from i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); + + // The result vector + Value *VResult = Src0; + + if (isConstantIntVector(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, Idx); + } + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 + // br i1 %mask_1, label %cond.load, label %else + // + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx); + } + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), + "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); + Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); + OldBr->eraseFromParent(); + BasicBlock *PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + + // Create the phi to join the new and previous value. + PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(NewVResult, CondBlock); + Phi->addIncoming(VResult, PrevIfBlock); + VResult = Phi; + } + + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + + ModifiedDT = true; +} + +// Translate a masked store intrinsic, like +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// <16 x i1> %mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// br i1 %2, label %cond.store, label %else +// +// cond.store: ; preds = %0 +// %3 = extractelement <16 x i32> %val, i32 0 +// %4 = getelementptr i32* %1, i32 0 +// store i32 %3, i32* %4 +// br label %else +// +// else: ; preds = %0, %cond.store +// %5 = extractelement <16 x i1> %mask, i32 1 +// br i1 %5, label %cond.store1, label %else2 +// +// cond.store1: ; preds = %else +// %6 = extractelement <16 x i32> %val, i32 1 +// %7 = getelementptr i32* %1, i32 1 +// store i32 %6, i32* %7 +// br label %else2 +// . . . +static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { + Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); + auto *VecType = cast<VectorType>(Src->getType()); + + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { + Builder.CreateAlignedStore(Src, Ptr, AlignVal); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + const Align AdjustedAlignVal = + commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); + // Bitcast %addr from i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); + + if (isConstantIntVector(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Idx); + Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); + Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); + } + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 + // br i1 %mask_1, label %cond.store, label %else + // + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx); + } + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Idx); + Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); + Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); + + ModifiedDT = true; +} + +// Translate a masked gather intrinsic like +// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, +// <16 x i1> %Mask, <16 x i32> %Src) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind +// %Mask0 = extractelement <16 x i1> %Mask, i32 0 +// br i1 %Mask0, label %cond.load, label %else +// +// cond.load: +// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// %Load0 = load i32, i32* %Ptr0, align 4 +// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0 +// br label %else +// +// else: +// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0] +// %Mask1 = extractelement <16 x i1> %Mask, i32 1 +// br i1 %Mask1, label %cond.load1, label %else2 +// +// cond.load1: +// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// %Load1 = load i32, i32* %Ptr1, align 4 +// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 +// br label %else2 +// . . . +// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src +// ret <16 x i32> %Result +static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { + Value *Ptrs = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + auto *VecType = cast<FixedVectorType>(CI->getType()); + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // The result vector + Value *VResult = Src0; + unsigned VectorWidth = VecType->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); + LoadInst *Load = + Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); + VResult = + Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); + } + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // %Mask1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 + // br i1 %Mask1, label %cond.load, label %else + // + + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); + LoadInst *Load = + Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); + Value *NewVResult = + Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); + OldBr->eraseFromParent(); + BasicBlock *PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + + PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(NewVResult, CondBlock); + Phi->addIncoming(VResult, PrevIfBlock); + VResult = Phi; + } + + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + + ModifiedDT = true; +} + +// Translate a masked scatter intrinsic, like +// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, +// <16 x i1> %Mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set. +// +// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind +// %Mask0 = extractelement <16 x i1> %Mask, i32 0 +// br i1 %Mask0, label %cond.store, label %else +// +// cond.store: +// %Elt0 = extractelement <16 x i32> %Src, i32 0 +// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// store i32 %Elt0, i32* %Ptr0, align 4 +// br label %else +// +// else: +// %Mask1 = extractelement <16 x i1> %Mask, i32 1 +// br i1 %Mask1, label %cond.store1, label %else2 +// +// cond.store1: +// %Elt1 = extractelement <16 x i32> %Src, i32 1 +// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// store i32 %Elt1, i32* %Ptr1, align 4 +// br label %else2 +// . . . +static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { + Value *Src = CI->getArgOperand(0); + Value *Ptrs = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); + + assert( + isa<VectorType>(Ptrs->getType()) && + isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) && + "Vector of pointers is expected in masked scatter intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); + unsigned VectorWidth = SrcFVTy->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *OneElt = + Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + } + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // %Mask1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 + // br i1 %Mask1, label %cond.store, label %else + // + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } + + // Create "cond" block + // + // %Elt1 = extractelement <16 x i32> %Src, i32 1 + // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 + // %store i32 %Elt1, i32* %Ptr1 + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); + + ModifiedDT = true; +} + +static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { + Value *Ptr = CI->getArgOperand(0); + Value *Mask = CI->getArgOperand(1); + Value *PassThru = CI->getArgOperand(2); + + auto *VecType = cast<FixedVectorType>(CI->getType()); + + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + unsigned VectorWidth = VecType->getNumElements(); + + // The result vector + Value *VResult = PassThru; + + // Shorten the way if the mask is a vector of constants. + // Create a build_vector pattern, with loads/undefs as necessary and then + // shuffle blend with the pass through value. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + VResult = UndefValue::get(VecType); + SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem); + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + Value *InsertElt; + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) { + InsertElt = UndefValue::get(EltTy); + ShuffleMask[Idx] = Idx + VectorWidth; + } else { + Value *NewPtr = + Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1), + "Load" + Twine(Idx)); + ShuffleMask[Idx] = Idx; + ++MemIndex; + } + VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx, + "Res" + Twine(Idx)); + } + VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask); + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // br i1 %mask_1, label %cond.load, label %else + // + + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), + "cond.load"); + Builder.SetInsertPoint(InsertPt); + + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1)); + Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); + + // Move the pointer if there are more blocks to come. + Value *NewPtr; + if ((Idx + 1) != VectorWidth) + NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); + OldBr->eraseFromParent(); + BasicBlock *PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + + // Create the phi to join the new and previous value. + PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + ResultPhi->addIncoming(NewVResult, CondBlock); + ResultPhi->addIncoming(VResult, PrevIfBlock); + VResult = ResultPhi; + + // Add a PHI for the pointer if this isn't the last iteration. + if ((Idx + 1) != VectorWidth) { + PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); + PtrPhi->addIncoming(NewPtr, CondBlock); + PtrPhi->addIncoming(Ptr, PrevIfBlock); + Ptr = PtrPhi; + } + } + + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + + ModifiedDT = true; +} + +static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { + Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + + auto *VecType = cast<FixedVectorType>(Src->getType()); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + Type *EltTy = VecType->getElementType(); + + unsigned VectorWidth = VecType->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *OneElt = + Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + Builder.CreateAlignedStore(OneElt, NewPtr, Align(1)); + ++MemIndex; + } + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // br i1 %mask_1, label %cond.store, label %else + // + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Idx); + Builder.CreateAlignedStore(OneElt, Ptr, Align(1)); + + // Move the pointer if there are more blocks to come. + Value *NewPtr; + if ((Idx + 1) != VectorWidth) + NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); + OldBr->eraseFromParent(); + BasicBlock *PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + + // Add a PHI for the pointer if this isn't the last iteration. + if ((Idx + 1) != VectorWidth) { + PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); + PtrPhi->addIncoming(NewPtr, CondBlock); + PtrPhi->addIncoming(Ptr, PrevIfBlock); + Ptr = PtrPhi; + } + } + CI->eraseFromParent(); + + ModifiedDT = true; +} + +static bool runImpl(Function &F, const TargetTransformInfo &TTI) { + bool EverMadeChange = false; + bool MadeChange = true; + auto &DL = F.getParent()->getDataLayout(); + while (MadeChange) { + MadeChange = false; + for (Function::iterator I = F.begin(); I != F.end();) { + BasicBlock *BB = &*I++; + bool ModifiedDTOnIteration = false; + MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL); + + // Restart BB iteration if the dominator tree of the Function was changed + if (ModifiedDTOnIteration) + break; + } + + EverMadeChange |= MadeChange; + } + return EverMadeChange; +} + +bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) { + auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + return runImpl(F, TTI); +} + +PreservedAnalyses +ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + if (!runImpl(F, TTI)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<TargetIRAnalysis>(); + return PA; +} + +static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, + const TargetTransformInfo &TTI, + const DataLayout &DL) { + bool MadeChange = false; + + BasicBlock::iterator CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) { + if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++)) + MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL); + if (ModifiedDT) + return true; + } + + return MadeChange; +} + +static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, + const TargetTransformInfo &TTI, + const DataLayout &DL) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); + if (II) { + // The scalarization code below does not work for scalable vectors. + if (isa<ScalableVectorType>(II->getType()) || + any_of(II->arg_operands(), + [](Value *V) { return isa<ScalableVectorType>(V->getType()); })) + return false; + + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::masked_load: + // Scalarize unsupported vector masked load + if (TTI.isLegalMaskedLoad( + CI->getType(), + cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue())) + return false; + scalarizeMaskedLoad(CI, ModifiedDT); + return true; + case Intrinsic::masked_store: + if (TTI.isLegalMaskedStore( + CI->getArgOperand(0)->getType(), + cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue())) + return false; + scalarizeMaskedStore(CI, ModifiedDT); + return true; + case Intrinsic::masked_gather: { + unsigned AlignmentInt = + cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); + Type *LoadTy = CI->getType(); + Align Alignment = + DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); + if (TTI.isLegalMaskedGather(LoadTy, Alignment)) + return false; + scalarizeMaskedGather(CI, ModifiedDT); + return true; + } + case Intrinsic::masked_scatter: { + unsigned AlignmentInt = + cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + Type *StoreTy = CI->getArgOperand(0)->getType(); + Align Alignment = + DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); + if (TTI.isLegalMaskedScatter(StoreTy, Alignment)) + return false; + scalarizeMaskedScatter(CI, ModifiedDT); + return true; + } + case Intrinsic::masked_expandload: + if (TTI.isLegalMaskedExpandLoad(CI->getType())) + return false; + scalarizeMaskedExpandLoad(CI, ModifiedDT); + return true; + case Intrinsic::masked_compressstore: + if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) + return false; + scalarizeMaskedCompressStore(CI, ModifiedDT); + return true; + } + } + + return false; +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp index c95984fe19..130793abff 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp @@ -398,8 +398,8 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) { continue; Instruction *Old = cast<Instruction>(V); - if (isa<Instruction>(CV[I])) - CV[I]->takeName(Old); + if (isa<Instruction>(CV[I])) + CV[I]->takeName(Old); Old->replaceAllUsesWith(CV[I]); PotentiallyDeadInstrs.emplace_back(Old); } @@ -733,7 +733,7 @@ bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) { auto *MidTy = FixedVectorType::get(SrcVT->getElementType(), FanIn); unsigned Op0I = 0; for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) { - Value *V = PoisonValue::get(MidTy); + Value *V = PoisonValue::get(MidTy); for (unsigned MidI = 0; MidI < FanIn; ++MidI) V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), BCI.getName() + ".i" + Twine(ResI) @@ -932,7 +932,7 @@ bool ScalarizerVisitor::finish() { if (!Op->use_empty()) { // The value is still needed, so recreate it using a series of // InsertElements. - Value *Res = PoisonValue::get(Op->getType()); + Value *Res = PoisonValue::get(Op->getType()); if (auto *Ty = dyn_cast<VectorType>(Op->getType())) { BasicBlock *BB = Op->getParent(); unsigned Count = cast<FixedVectorType>(Ty)->getNumElements(); @@ -942,7 +942,7 @@ bool ScalarizerVisitor::finish() { for (unsigned I = 0; I < Count; ++I) Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I), Op->getName() + ".upto" + Twine(I)); - Res->takeName(Op); + Res->takeName(Op); } else { assert(CV.size() == 1 && Op->getType() == CV[0]->getType()); Res = CV[0]; diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index f216956406..c63a069193 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -155,7 +155,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" +#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -178,7 +178,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -344,14 +344,14 @@ private: /// A pass that tries to split every GEP in the function into a variadic /// base and a constant offset. It is a FunctionPass because searching for the /// constant offset may inspect other basic blocks. -class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass { +class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass { public: static char ID; - SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false) + SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false) : FunctionPass(ID), LowerGEP(LowerGEP) { - initializeSeparateConstOffsetFromGEPLegacyPassPass( - *PassRegistry::getPassRegistry()); + initializeSeparateConstOffsetFromGEPLegacyPassPass( + *PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -366,23 +366,23 @@ public: bool runOnFunction(Function &F) override; private: - bool LowerGEP; -}; - -/// A pass that tries to split every GEP in the function into a variadic -/// base and a constant offset. It is a FunctionPass because searching for the -/// constant offset may inspect other basic blocks. -class SeparateConstOffsetFromGEP { -public: - SeparateConstOffsetFromGEP( - DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, - TargetLibraryInfo *TLI, - function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP) - : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {} - - bool run(Function &F); - -private: + bool LowerGEP; +}; + +/// A pass that tries to split every GEP in the function into a variadic +/// base and a constant offset. It is a FunctionPass because searching for the +/// constant offset may inspect other basic blocks. +class SeparateConstOffsetFromGEP { +public: + SeparateConstOffsetFromGEP( + DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, + TargetLibraryInfo *TLI, + function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP) + : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {} + + bool run(Function &F); + +private: /// Tries to split the given GEP into a variadic base and a constant offset, /// and returns true if the splitting succeeds. bool splitGEP(GetElementPtrInst *GEP); @@ -467,8 +467,8 @@ private: ScalarEvolution *SE; LoopInfo *LI; TargetLibraryInfo *TLI; - // Retrieved lazily since not always used. - function_ref<TargetTransformInfo &(Function &)> GetTTI; + // Retrieved lazily since not always used. + function_ref<TargetTransformInfo &(Function &)> GetTTI; /// Whether to lower a GEP with multiple indices into arithmetic operations or /// multiple GEPs with a single index. @@ -480,10 +480,10 @@ private: } // end anonymous namespace -char SeparateConstOffsetFromGEPLegacyPass::ID = 0; +char SeparateConstOffsetFromGEPLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN( - SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep", + SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) @@ -492,12 +492,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( - SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep", + SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, false) FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) { - return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP); + return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP); } bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, @@ -902,8 +902,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( // If we created a GEP with constant index, and the base is loop invariant, // then we swap the first one with it, so LICM can move constant GEP out // later. - auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult); - auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr); + auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult); + auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr); if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L)) swapGEPOperand(FirstGEP, SecondGEP); @@ -978,7 +978,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { if (!NeedsExtraction) return Changed; - TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); + TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); // If LowerGEP is disabled, before really splitting the GEP, check whether the // backend supports the addressing mode we are about to produce. If no, this @@ -1143,25 +1143,25 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { return true; } -bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) { +bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - auto GetTTI = [this](Function &F) -> TargetTransformInfo & { - return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - }; - SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); - return Impl.run(F); -} - -bool SeparateConstOffsetFromGEP::run(Function &F) { + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + }; + SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); + return Impl.run(F); +} + +bool SeparateConstOffsetFromGEP::run(Function &F) { if (DisableSeparateConstOffsetFromGEP) return false; - DL = &F.getParent()->getDataLayout(); + DL = &F.getParent()->getDataLayout(); bool Changed = false; for (BasicBlock &B : F) { for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;) @@ -1368,20 +1368,20 @@ void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First, } else First->setIsInBounds(true); } - -PreservedAnalyses -SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) { - auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); - auto *LI = &AM.getResult<LoopAnalysis>(F); - auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); - auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & { - return AM.getResult<TargetIRAnalysis>(F); - }; - SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); - if (!Impl.run(F)) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; -} + +PreservedAnalyses +SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); + auto *LI = &AM.getResult<LoopAnalysis>(F); + auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); + auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & { + return AM.getResult<TargetIRAnalysis>(F); + }; + SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); + if (!Impl.run(F)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 9d3c8d0f37..8318870308 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -26,14 +26,14 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -51,7 +51,7 @@ #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> @@ -96,11 +96,11 @@ static cl::opt<bool> UnswitchGuards( "simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates.")); -static cl::opt<bool> DropNonTrivialImplicitNullChecks( - "simple-loop-unswitch-drop-non-trivial-implicit-null-checks", - cl::init(false), cl::Hidden, - cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " - "null checks to save time analyzing if we can keep it.")); +static cl::opt<bool> DropNonTrivialImplicitNullChecks( + "simple-loop-unswitch-drop-non-trivial-implicit-null-checks", + cl::init(false), cl::Hidden, + cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " + "null checks to save time analyzing if we can keep it.")); /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. @@ -692,9 +692,9 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, // successor. BasicBlock *CommonSuccBB = nullptr; if (SI.getNumCases() > 0 && - all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) { - return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor(); - })) + all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) { + return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor(); + })) CommonSuccBB = SI.case_begin()->getCaseSuccessor(); if (!DefaultExitBB) { // If we're not unswitching the default, we need it to match any cases to @@ -855,11 +855,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, } if (MSSAU) { - MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); + MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); if (VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); - } else { - DT.applyUpdates(DTUpdates); + } else { + DT.applyUpdates(DTUpdates); } assert(DT.verify(DominatorTree::VerificationLevel::Fast)); @@ -1140,22 +1140,22 @@ static BasicBlock *buildClonedLoopBlocks( // Replace the cloned branch with an unconditional branch to the cloned // unswitched successor. auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB)); - Instruction *ClonedTerminator = ClonedParentBB->getTerminator(); - // Trivial Simplification. If Terminator is a conditional branch and - // condition becomes dead - erase it. - Value *ClonedConditionToErase = nullptr; - if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator)) - ClonedConditionToErase = BI->getCondition(); - else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator)) - ClonedConditionToErase = SI->getCondition(); - - ClonedTerminator->eraseFromParent(); + Instruction *ClonedTerminator = ClonedParentBB->getTerminator(); + // Trivial Simplification. If Terminator is a conditional branch and + // condition becomes dead - erase it. + Value *ClonedConditionToErase = nullptr; + if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator)) + ClonedConditionToErase = BI->getCondition(); + else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator)) + ClonedConditionToErase = SI->getCondition(); + + ClonedTerminator->eraseFromParent(); BranchInst::Create(ClonedSuccBB, ClonedParentBB); - if (ClonedConditionToErase) - RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr, - MSSAU); - + if (ClonedConditionToErase) + RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr, + MSSAU); + // If there are duplicate entries in the PHI nodes because of multiple edges // to the unswitched successor, we need to nuke all but one as we replaced it // with a direct branch. @@ -1214,7 +1214,7 @@ static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, LI.addTopLevelLoop(ClonedRootL); AddClonedBlocksToLoop(OrigRootL, *ClonedRootL); - if (OrigRootL.isInnermost()) + if (OrigRootL.isInnermost()) return ClonedRootL; // If we have a nest, we can quickly clone the entire loop nest using an @@ -2090,23 +2090,23 @@ static void unswitchNontrivialInvariants( DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU); } - // Drop metadata if we may break its semantics by moving this instr into the - // split block. - if (TI.getMetadata(LLVMContext::MD_make_implicit)) { - if (DropNonTrivialImplicitNullChecks) - // Do not spend time trying to understand if we can keep it, just drop it - // to save compile time. - TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); - else { - // It is only legal to preserve make.implicit metadata if we are - // guaranteed no reach implicit null check after following this branch. - ICFLoopSafetyInfo SafetyInfo; - SafetyInfo.computeLoopSafetyInfo(&L); - if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) - TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); - } - } - + // Drop metadata if we may break its semantics by moving this instr into the + // split block. + if (TI.getMetadata(LLVMContext::MD_make_implicit)) { + if (DropNonTrivialImplicitNullChecks) + // Do not spend time trying to understand if we can keep it, just drop it + // to save compile time. + TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); + else { + // It is only legal to preserve make.implicit metadata if we are + // guaranteed no reach implicit null check after following this branch. + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(&L); + if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) + TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); + } + } + // The stitching of the branched code back together depends on whether we're // doing full unswitching or not with the exception that we always want to // nuke the initial terminator placed in the split block. @@ -2353,12 +2353,12 @@ static void unswitchNontrivialInvariants( for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) { UpdateLoop(*UpdatedL); - if (UpdatedL->isOutermost()) + if (UpdatedL->isOutermost()) OuterExitL = nullptr; } if (IsStillLoop) { UpdateLoop(L); - if (L.isOutermost()) + if (L.isOutermost()) OuterExitL = nullptr; } @@ -2706,10 +2706,10 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, // (convergent, noduplicate, or cross-basic-block tokens). // FIXME: We might be able to safely handle some of these in non-duplicated // regions. - TargetTransformInfo::TargetCostKind CostKind = - L.getHeader()->getParent()->hasMinSize() - ? TargetTransformInfo::TCK_CodeSize - : TargetTransformInfo::TCK_SizeAndLatency; + TargetTransformInfo::TargetCostKind CostKind = + L.getHeader()->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; int LoopCost = 0; for (auto *BB : L.blocks()) { int Cost = 0; @@ -2723,7 +2723,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, if (CB->isConvergent() || CB->cannotDuplicate()) return false; - Cost += TTI.getUserCost(&I, CostKind); + Cost += TTI.getUserCost(&I, CostKind); } assert(Cost >= 0 && "Must not have negative costs!"); LoopCost += Cost; @@ -2904,10 +2904,10 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, if (!NonTrivial && !EnableNonTrivialUnswitch) return false; - // Skip non-trivial unswitching for optsize functions. - if (L.getHeader()->getParent()->hasOptSize()) - return false; - + // Skip non-trivial unswitching for optsize functions. + if (L.getHeader()->getParent()->hasOptSize()) + return false; + // For non-trivial unswitching, because it often creates new loops, we rely on // the pass manager to iterate on the loops rather than trying to immediately // reach a fixed point. There is no substantial advantage to iterating @@ -2920,7 +2920,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, return true; // No other opportunities to unswitch. - return false; + return false; } PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 38e7109ead..7fdd5c659d 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -25,25 +25,25 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" +#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include <utility> using namespace llvm; @@ -65,10 +65,10 @@ static cl::opt<bool> UserForwardSwitchCond( "forward-switch-cond", cl::Hidden, cl::init(false), cl::desc("Forward switch condition to phi ops (default = false)")); -static cl::opt<bool> UserHoistCommonInsts( - "hoist-common-insts", cl::Hidden, cl::init(false), - cl::desc("hoist common instructions (default = false)")); - +static cl::opt<bool> UserHoistCommonInsts( + "hoist-common-insts", cl::Hidden, cl::init(false), + cl::desc("hoist common instructions (default = false)")); + static cl::opt<bool> UserSinkCommonInsts( "sink-common-insts", cl::Hidden, cl::init(false), cl::desc("Sink common instructions (default = false)")); @@ -78,18 +78,18 @@ STATISTIC(NumSimpl, "Number of blocks simplified"); /// If we have more than one empty (other than phi node) return blocks, /// merge them together to promote recursive block merging. -static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) { +static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) { bool Changed = false; - std::vector<DominatorTree::UpdateType> Updates; - SmallVector<BasicBlock *, 8> DeadBlocks; - + std::vector<DominatorTree::UpdateType> Updates; + SmallVector<BasicBlock *, 8> DeadBlocks; + BasicBlock *RetBlock = nullptr; // Scan all the blocks in the function, looking for empty return blocks. - for (BasicBlock &BB : make_early_inc_range(F)) { - if (DTU && DTU->isBBPendingDeletion(&BB)) - continue; + for (BasicBlock &BB : make_early_inc_range(F)) { + if (DTU && DTU->isBBPendingDeletion(&BB)) + continue; // Only look at return blocks. ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); @@ -140,18 +140,18 @@ static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) { if (Ret->getNumOperands() == 0 || Ret->getOperand(0) == cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) { - // All predecessors of BB should now branch to RetBlock instead. - if (DTU) { - for (auto *Predecessor : predecessors(&BB)) { - // But, iff Predecessor already branches to RetBlock, - // don't (re-)add DomTree edge, because it already exists. - if (!is_contained(successors(Predecessor), RetBlock)) - Updates.push_back({DominatorTree::Insert, Predecessor, RetBlock}); - Updates.push_back({DominatorTree::Delete, Predecessor, &BB}); - } - } + // All predecessors of BB should now branch to RetBlock instead. + if (DTU) { + for (auto *Predecessor : predecessors(&BB)) { + // But, iff Predecessor already branches to RetBlock, + // don't (re-)add DomTree edge, because it already exists. + if (!is_contained(successors(Predecessor), RetBlock)) + Updates.push_back({DominatorTree::Insert, Predecessor, RetBlock}); + Updates.push_back({DominatorTree::Delete, Predecessor, &BB}); + } + } BB.replaceAllUsesWith(RetBlock); - DeadBlocks.emplace_back(&BB); + DeadBlocks.emplace_back(&BB); continue; } @@ -175,55 +175,55 @@ static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) { RetBlockPHI->addIncoming(Ret->getOperand(0), &BB); BB.getTerminator()->eraseFromParent(); BranchInst::Create(RetBlock, &BB); - if (DTU) - Updates.push_back({DominatorTree::Insert, &BB, RetBlock}); - } - - if (DTU) { - DTU->applyUpdates(Updates); - for (auto *BB : DeadBlocks) - DTU->deleteBB(BB); - } else { - for (auto *BB : DeadBlocks) - BB->eraseFromParent(); + if (DTU) + Updates.push_back({DominatorTree::Insert, &BB, RetBlock}); } + if (DTU) { + DTU->applyUpdates(Updates); + for (auto *BB : DeadBlocks) + DTU->deleteBB(BB); + } else { + for (auto *BB : DeadBlocks) + BB->eraseFromParent(); + } + return Changed; } /// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, - DomTreeUpdater *DTU, + DomTreeUpdater *DTU, const SimplifyCFGOptions &Options) { bool Changed = false; bool LocalChange = true; SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges; FindFunctionBackedges(F, Edges); - SmallPtrSet<BasicBlock *, 16> UniqueLoopHeaders; + SmallPtrSet<BasicBlock *, 16> UniqueLoopHeaders; for (unsigned i = 0, e = Edges.size(); i != e; ++i) - UniqueLoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second)); - - SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(), - UniqueLoopHeaders.end()); + UniqueLoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second)); + SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(), + UniqueLoopHeaders.end()); + while (LocalChange) { LocalChange = false; // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - BasicBlock &BB = *BBIt++; - if (DTU) { - assert( - !DTU->isBBPendingDeletion(&BB) && - "Should not end up trying to simplify blocks marked for removal."); - // Make sure that the advanced iterator does not point at the blocks - // that are marked for removal, skip over all such blocks. - while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt)) - ++BBIt; - } - if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) { + BasicBlock &BB = *BBIt++; + if (DTU) { + assert( + !DTU->isBBPendingDeletion(&BB) && + "Should not end up trying to simplify blocks marked for removal."); + // Make sure that the advanced iterator does not point at the blocks + // that are marked for removal, skip over all such blocks. + while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt)) + ++BBIt; + } + if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) { LocalChange = true; ++NumSimpl; } @@ -233,15 +233,15 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, return Changed; } -static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, - DominatorTree *DT, - const SimplifyCFGOptions &Options) { - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - - bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr); - EverChanged |= mergeEmptyReturnBlocks(F, DT ? &DTU : nullptr); - EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); +static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, + DominatorTree *DT, + const SimplifyCFGOptions &Options) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr); + EverChanged |= mergeEmptyReturnBlocks(F, DT ? &DTU : nullptr); + EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); + // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -250,75 +250,75 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, // iterate between the two optimizations. We structure the code like this to // avoid rerunning iterativelySimplifyCFG if the second pass of // removeUnreachableBlocks doesn't do anything. - if (!removeUnreachableBlocks(F, DT ? &DTU : nullptr)) + if (!removeUnreachableBlocks(F, DT ? &DTU : nullptr)) return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); - EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr); + EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); + EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr); } while (EverChanged); return true; } -static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, - DominatorTree *DT, - const SimplifyCFGOptions &Options) { - assert((!RequireAndPreserveDomTree || - (DT && DT->verify(DominatorTree::VerificationLevel::Full))) && - "Original domtree is invalid?"); - - bool Changed = simplifyFunctionCFGImpl(F, TTI, DT, Options); - - assert((!RequireAndPreserveDomTree || - (DT && DT->verify(DominatorTree::VerificationLevel::Full))) && - "Failed to maintain validity of domtree!"); - - return Changed; -} - +static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, + DominatorTree *DT, + const SimplifyCFGOptions &Options) { + assert((!RequireAndPreserveDomTree || + (DT && DT->verify(DominatorTree::VerificationLevel::Full))) && + "Original domtree is invalid?"); + + bool Changed = simplifyFunctionCFGImpl(F, TTI, DT, Options); + + assert((!RequireAndPreserveDomTree || + (DT && DT->verify(DominatorTree::VerificationLevel::Full))) && + "Failed to maintain validity of domtree!"); + + return Changed; +} + // Command-line settings override compile-time settings. -static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) { - if (UserBonusInstThreshold.getNumOccurrences()) - Options.BonusInstThreshold = UserBonusInstThreshold; - if (UserForwardSwitchCond.getNumOccurrences()) - Options.ForwardSwitchCondToPhi = UserForwardSwitchCond; - if (UserSwitchToLookup.getNumOccurrences()) - Options.ConvertSwitchToLookupTable = UserSwitchToLookup; - if (UserKeepLoops.getNumOccurrences()) - Options.NeedCanonicalLoop = UserKeepLoops; - if (UserHoistCommonInsts.getNumOccurrences()) - Options.HoistCommonInsts = UserHoistCommonInsts; - if (UserSinkCommonInsts.getNumOccurrences()) - Options.SinkCommonInsts = UserSinkCommonInsts; -} - -SimplifyCFGPass::SimplifyCFGPass() : Options() { - applyCommandLineOverridesToOptions(Options); -} - -SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) - : Options(Opts) { - applyCommandLineOverridesToOptions(Options); +static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) { + if (UserBonusInstThreshold.getNumOccurrences()) + Options.BonusInstThreshold = UserBonusInstThreshold; + if (UserForwardSwitchCond.getNumOccurrences()) + Options.ForwardSwitchCondToPhi = UserForwardSwitchCond; + if (UserSwitchToLookup.getNumOccurrences()) + Options.ConvertSwitchToLookupTable = UserSwitchToLookup; + if (UserKeepLoops.getNumOccurrences()) + Options.NeedCanonicalLoop = UserKeepLoops; + if (UserHoistCommonInsts.getNumOccurrences()) + Options.HoistCommonInsts = UserHoistCommonInsts; + if (UserSinkCommonInsts.getNumOccurrences()) + Options.SinkCommonInsts = UserSinkCommonInsts; } +SimplifyCFGPass::SimplifyCFGPass() : Options() { + applyCommandLineOverridesToOptions(Options); +} + +SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) + : Options(Opts) { + applyCommandLineOverridesToOptions(Options); +} + PreservedAnalyses SimplifyCFGPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult<TargetIRAnalysis>(F); Options.AC = &AM.getResult<AssumptionAnalysis>(F); - DominatorTree *DT = nullptr; - if (RequireAndPreserveDomTree) - DT = &AM.getResult<DominatorTreeAnalysis>(F); - if (F.hasFnAttribute(Attribute::OptForFuzzing)) { - Options.setSimplifyCondBranch(false).setFoldTwoEntryPHINode(false); - } else { - Options.setSimplifyCondBranch(true).setFoldTwoEntryPHINode(true); - } - if (!simplifyFunctionCFG(F, TTI, DT, Options)) + DominatorTree *DT = nullptr; + if (RequireAndPreserveDomTree) + DT = &AM.getResult<DominatorTreeAnalysis>(F); + if (F.hasFnAttribute(Attribute::OptForFuzzing)) { + Options.setSimplifyCondBranch(false).setFoldTwoEntryPHINode(false); + } else { + Options.setSimplifyCondBranch(true).setFoldTwoEntryPHINode(true); + } + if (!simplifyFunctionCFG(F, TTI, DT, Options)) return PreservedAnalyses::all(); PreservedAnalyses PA; - if (RequireAndPreserveDomTree) - PA.preserve<DominatorTreeAnalysis>(); + if (RequireAndPreserveDomTree) + PA.preserve<DominatorTreeAnalysis>(); PA.preserve<GlobalsAA>(); return PA; } @@ -329,14 +329,14 @@ struct CFGSimplifyPass : public FunctionPass { SimplifyCFGOptions Options; std::function<bool(const Function &)> PredicateFtor; - CFGSimplifyPass(SimplifyCFGOptions Options_ = SimplifyCFGOptions(), + CFGSimplifyPass(SimplifyCFGOptions Options_ = SimplifyCFGOptions(), std::function<bool(const Function &)> Ftor = nullptr) - : FunctionPass(ID), Options(Options_), PredicateFtor(std::move(Ftor)) { + : FunctionPass(ID), Options(Options_), PredicateFtor(std::move(Ftor)) { initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); // Check for command-line overrides of options for debug/customization. - applyCommandLineOverridesToOptions(Options); + applyCommandLineOverridesToOptions(Options); } bool runOnFunction(Function &F) override { @@ -344,9 +344,9 @@ struct CFGSimplifyPass : public FunctionPass { return false; Options.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - DominatorTree *DT = nullptr; - if (RequireAndPreserveDomTree) - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + DominatorTree *DT = nullptr; + if (RequireAndPreserveDomTree) + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); if (F.hasFnAttribute(Attribute::OptForFuzzing)) { Options.setSimplifyCondBranch(false) .setFoldTwoEntryPHINode(false); @@ -356,15 +356,15 @@ struct CFGSimplifyPass : public FunctionPass { } auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - return simplifyFunctionCFG(F, TTI, DT, Options); + return simplifyFunctionCFG(F, TTI, DT, Options); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); - if (RequireAndPreserveDomTree) - AU.addRequired<DominatorTreeWrapperPass>(); + if (RequireAndPreserveDomTree) + AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); - if (RequireAndPreserveDomTree) - AU.addPreserved<DominatorTreeWrapperPass>(); + if (RequireAndPreserveDomTree) + AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); } }; @@ -375,13 +375,13 @@ INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) // Public interface to the CFGSimplification pass FunctionPass * -llvm::createCFGSimplificationPass(SimplifyCFGOptions Options, +llvm::createCFGSimplificationPass(SimplifyCFGOptions Options, std::function<bool(const Function &)> Ftor) { - return new CFGSimplifyPass(Options, std::move(Ftor)); + return new CFGSimplifyPass(Options, std::move(Ftor)); } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp index 89cfbe384b..ffff0e605a 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp @@ -99,7 +99,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo, return false; } - return true; + return true; } /// SinkInstruction - Determine whether it is safe to sink the specified machine @@ -130,37 +130,37 @@ static bool SinkInstruction(Instruction *Inst, // decide. BasicBlock *SuccToSinkTo = nullptr; - // Find the nearest common dominator of all users as the candidate. - BasicBlock *BB = Inst->getParent(); - for (Use &U : Inst->uses()) { - Instruction *UseInst = cast<Instruction>(U.getUser()); - BasicBlock *UseBlock = UseInst->getParent(); - // Don't worry about dead users. - if (!DT.isReachableFromEntry(UseBlock)) - continue; - if (PHINode *PN = dyn_cast<PHINode>(UseInst)) { - // PHI nodes use the operand in the predecessor block, not the block with - // the PHI. - unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo()); - UseBlock = PN->getIncomingBlock(Num); - } - if (SuccToSinkTo) - SuccToSinkTo = DT.findNearestCommonDominator(SuccToSinkTo, UseBlock); - else - SuccToSinkTo = UseBlock; - // The current basic block needs to dominate the candidate. - if (!DT.dominates(BB, SuccToSinkTo)) - return false; + // Find the nearest common dominator of all users as the candidate. + BasicBlock *BB = Inst->getParent(); + for (Use &U : Inst->uses()) { + Instruction *UseInst = cast<Instruction>(U.getUser()); + BasicBlock *UseBlock = UseInst->getParent(); + // Don't worry about dead users. + if (!DT.isReachableFromEntry(UseBlock)) + continue; + if (PHINode *PN = dyn_cast<PHINode>(UseInst)) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo()); + UseBlock = PN->getIncomingBlock(Num); + } + if (SuccToSinkTo) + SuccToSinkTo = DT.findNearestCommonDominator(SuccToSinkTo, UseBlock); + else + SuccToSinkTo = UseBlock; + // The current basic block needs to dominate the candidate. + if (!DT.dominates(BB, SuccToSinkTo)) + return false; } - if (SuccToSinkTo) { - // The nearest common dominator may be in a parent loop of BB, which may not - // be beneficial. Find an ancestor. - while (SuccToSinkTo != BB && - !IsAcceptableTarget(Inst, SuccToSinkTo, DT, LI)) - SuccToSinkTo = DT.getNode(SuccToSinkTo)->getIDom()->getBlock(); - if (SuccToSinkTo == BB) - SuccToSinkTo = nullptr; + if (SuccToSinkTo) { + // The nearest common dominator may be in a parent loop of BB, which may not + // be beneficial. Find an ancestor. + while (SuccToSinkTo != BB && + !IsAcceptableTarget(Inst, SuccToSinkTo, DT, LI)) + SuccToSinkTo = DT.getNode(SuccToSinkTo)->getIDom()->getBlock(); + if (SuccToSinkTo == BB) + SuccToSinkTo = nullptr; } // If we couldn't find a block to sink to, ignore this instruction. diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp index 9b18c945d9..b201837ea6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -756,10 +756,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs, // For each PHI node in this block, check whether there are immediate folding // opportunities from speculation, and whether that speculation will be // valid. This determise the set of safe PHIs to speculate. - llvm::erase_if(PNs, [&](PHINode *PN) { - return !isSafeAndProfitableToSpeculateAroundPHI( - *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI); - }); + llvm::erase_if(PNs, [&](PHINode *PN) { + return !isSafeAndProfitableToSpeculateAroundPHI( + *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI); + }); // If no PHIs were profitable, skip. if (PNs.empty()) { LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n"); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp index c78185f2a6..4dbeb21638 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -245,13 +245,13 @@ static unsigned ComputeSpeculationCost(const Instruction *I, case Instruction::FNeg: case Instruction::ICmp: case Instruction::FCmp: - case Instruction::Trunc: - case Instruction::Freeze: - case Instruction::ExtractElement: - case Instruction::InsertElement: - case Instruction::ShuffleVector: - case Instruction::ExtractValue: - case Instruction::InsertValue: + case Instruction::Trunc: + case Instruction::Freeze: + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + case Instruction::InsertValue: return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); default: @@ -281,7 +281,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( for (const Value *V : U->operand_values()) { if (const Instruction *I = dyn_cast<Instruction>(V)) { - if (NotHoisted.contains(I)) + if (NotHoisted.contains(I)) return false; } } diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 577992ccb5..f8177f1f99 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -55,7 +55,7 @@ // - When (i' - i) is constant but i and i' are not, we could still perform // SLSR. -#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" +#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallVector.h" @@ -96,39 +96,39 @@ static const unsigned UnknownAddressSpace = namespace { -class StraightLineStrengthReduceLegacyPass : public FunctionPass { - const DataLayout *DL = nullptr; - -public: - static char ID; - - StraightLineStrengthReduceLegacyPass() : FunctionPass(ID) { - initializeStraightLineStrengthReduceLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - // We do not modify the shape of the CFG. - AU.setPreservesCFG(); - } - - bool doInitialization(Module &M) override { - DL = &M.getDataLayout(); - return false; - } - - bool runOnFunction(Function &F) override; -}; - -class StraightLineStrengthReduce { +class StraightLineStrengthReduceLegacyPass : public FunctionPass { + const DataLayout *DL = nullptr; + public: - StraightLineStrengthReduce(const DataLayout *DL, DominatorTree *DT, - ScalarEvolution *SE, TargetTransformInfo *TTI) - : DL(DL), DT(DT), SE(SE), TTI(TTI) {} - + static char ID; + + StraightLineStrengthReduceLegacyPass() : FunctionPass(ID) { + initializeStraightLineStrengthReduceLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + // We do not modify the shape of the CFG. + AU.setPreservesCFG(); + } + + bool doInitialization(Module &M) override { + DL = &M.getDataLayout(); + return false; + } + + bool runOnFunction(Function &F) override; +}; + +class StraightLineStrengthReduce { +public: + StraightLineStrengthReduce(const DataLayout *DL, DominatorTree *DT, + ScalarEvolution *SE, TargetTransformInfo *TTI) + : DL(DL), DT(DT), SE(SE), TTI(TTI) {} + // SLSR candidate. Such a candidate must be in one of the forms described in // the header comments. struct Candidate { @@ -176,7 +176,7 @@ public: Candidate *Basis = nullptr; }; - bool runOnFunction(Function &F); + bool runOnFunction(Function &F); private: // Returns true if Basis is a basis for C, i.e., Basis dominates C and they @@ -256,18 +256,18 @@ private: } // end anonymous namespace -char StraightLineStrengthReduceLegacyPass::ID = 0; +char StraightLineStrengthReduceLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr", +INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr", "Straight line strength reduction", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr", +INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr", "Straight line strength reduction", false, false) FunctionPass *llvm::createStraightLineStrengthReducePass() { - return new StraightLineStrengthReduceLegacyPass(); + return new StraightLineStrengthReduceLegacyPass(); } bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, @@ -285,7 +285,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI) { - SmallVector<const Value *, 4> Indices(GEP->indices()); + SmallVector<const Value *, 4> Indices(GEP->indices()); return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), Indices) == TargetTransformInfo::TCC_Free; } @@ -715,17 +715,17 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( UnlinkedInstructions.push_back(C.Ins); } -bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) { +bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; - auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - return StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F); -} - -bool StraightLineStrengthReduce::runOnFunction(Function &F) { + auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + return StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F); +} + +bool StraightLineStrengthReduce::runOnFunction(Function &F) { // Traverse the dominator tree in the depth-first order. This order makes sure // all bases of a candidate are in Candidates when we process it. for (const auto Node : depth_first(DT)) @@ -755,25 +755,25 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { UnlinkedInstructions.clear(); return Ret; } - -namespace llvm { - -PreservedAnalyses -StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { - const DataLayout *DL = &F.getParent()->getDataLayout(); - auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); - auto *TTI = &AM.getResult<TargetIRAnalysis>(F); - - if (!StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F)) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<ScalarEvolutionAnalysis>(); - PA.preserve<TargetIRAnalysis>(); - return PA; -} - -} // namespace llvm + +namespace llvm { + +PreservedAnalyses +StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { + const DataLayout *DL = &F.getParent()->getDataLayout(); + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); + auto *TTI = &AM.getResult<TargetIRAnalysis>(F); + + if (!StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<ScalarEvolutionAnalysis>(); + PA.preserve<TargetIRAnalysis>(); + return PA; +} + +} // namespace llvm diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp index 3e15cad5f3..ae83f06ead 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/StructurizeCFG.h" +#include "llvm/Transforms/Scalar/StructurizeCFG.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" @@ -29,7 +29,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -57,7 +57,7 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "structurizecfg" // The name for newly created blocks. -const char FlowBlockName[] = "Flow"; +const char FlowBlockName[] = "Flow"; namespace { @@ -236,7 +236,7 @@ public: /// consist of a network of PHI nodes where the true incoming values expresses /// breaks and the false values expresses continue states. -class StructurizeCFG { +class StructurizeCFG { Type *Boolean; ConstantInt *BoolTrue; ConstantInt *BoolFalse; @@ -245,7 +245,7 @@ class StructurizeCFG { Function *Func; Region *ParentRegion; - LegacyDivergenceAnalysis *DA = nullptr; + LegacyDivergenceAnalysis *DA = nullptr; DominatorTree *DT; SmallVector<RegionNode *, 8> Order; @@ -310,35 +310,35 @@ class StructurizeCFG { void rebuildSSA(); public: - void init(Region *R); - bool run(Region *R, DominatorTree *DT); - bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA); -}; - -class StructurizeCFGLegacyPass : public RegionPass { - bool SkipUniformRegions; - -public: + void init(Region *R); + bool run(Region *R, DominatorTree *DT); + bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA); +}; + +class StructurizeCFGLegacyPass : public RegionPass { + bool SkipUniformRegions; + +public: static char ID; - explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) - : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { + explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) + : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { if (ForceSkipUniformRegions.getNumOccurrences()) SkipUniformRegions = ForceSkipUniformRegions.getValue(); - initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); + initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); } - bool runOnRegion(Region *R, RGPassManager &RGM) override { - StructurizeCFG SCFG; - SCFG.init(R); - if (SkipUniformRegions) { - LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>(); - if (SCFG.makeUniformRegion(R, DA)) - return false; - } - DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - return SCFG.run(R, DT); - } + bool runOnRegion(Region *R, RGPassManager &RGM) override { + StructurizeCFG SCFG; + SCFG.init(R); + if (SkipUniformRegions) { + LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>(); + if (SCFG.makeUniformRegion(R, DA)) + return false; + } + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + return SCFG.run(R, DT); + } StringRef getPassName() const override { return "Structurize control flow"; } @@ -355,16 +355,16 @@ public: } // end anonymous namespace -char StructurizeCFGLegacyPass::ID = 0; +char StructurizeCFGLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", - "Structurize the CFG", false, false) +INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", + "Structurize the CFG", false, false) INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) -INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) -INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", - "Structurize the CFG", false, false) +INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", + "Structurize the CFG", false, false) /// Build up the general order of nodes, by performing a topological sort of the /// parent region's nodes, while ensuring that there is no outer cycle node @@ -1008,59 +1008,59 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); } -void StructurizeCFG::init(Region *R) { - LLVMContext &Context = R->getEntry()->getContext(); - - Boolean = Type::getInt1Ty(Context); - BoolTrue = ConstantInt::getTrue(Context); - BoolFalse = ConstantInt::getFalse(Context); - BoolUndef = UndefValue::get(Boolean); - - this->DA = nullptr; -} - -bool StructurizeCFG::makeUniformRegion(Region *R, - LegacyDivergenceAnalysis *DA) { +void StructurizeCFG::init(Region *R) { + LLVMContext &Context = R->getEntry()->getContext(); + + Boolean = Type::getInt1Ty(Context); + BoolTrue = ConstantInt::getTrue(Context); + BoolFalse = ConstantInt::getFalse(Context); + BoolUndef = UndefValue::get(Boolean); + + this->DA = nullptr; +} + +bool StructurizeCFG::makeUniformRegion(Region *R, + LegacyDivergenceAnalysis *DA) { if (R->isTopLevelRegion()) return false; - this->DA = DA; - // TODO: We could probably be smarter here with how we handle sub-regions. - // We currently rely on the fact that metadata is set by earlier invocations - // of the pass on sub-regions, and that this metadata doesn't get lost -- - // but we shouldn't rely on metadata for correctness! - unsigned UniformMDKindID = - R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); - - if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { - LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R - << '\n'); - - // Mark all direct child block terminators as having been treated as - // uniform. To account for a possible future in which non-uniform - // sub-regions are treated more cleverly, indirect children are not - // marked as uniform. - MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); - for (RegionNode *E : R->elements()) { - if (E->isSubRegion()) - continue; - - if (Instruction *Term = E->getEntry()->getTerminator()) - Term->setMetadata(UniformMDKindID, MD); - } - - return true; + this->DA = DA; + // TODO: We could probably be smarter here with how we handle sub-regions. + // We currently rely on the fact that metadata is set by earlier invocations + // of the pass on sub-regions, and that this metadata doesn't get lost -- + // but we shouldn't rely on metadata for correctness! + unsigned UniformMDKindID = + R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); + + if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { + LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R + << '\n'); + + // Mark all direct child block terminators as having been treated as + // uniform. To account for a possible future in which non-uniform + // sub-regions are treated more cleverly, indirect children are not + // marked as uniform. + MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); + for (RegionNode *E : R->elements()) { + if (E->isSubRegion()) + continue; + + if (Instruction *Term = E->getEntry()->getTerminator()) + Term->setMetadata(UniformMDKindID, MD); + } + + return true; } - return false; -} - -/// Run the transformation for each region found -bool StructurizeCFG::run(Region *R, DominatorTree *DT) { - if (R->isTopLevelRegion()) - return false; - - this->DT = DT; - + return false; +} + +/// Run the transformation for each region found +bool StructurizeCFG::run(Region *R, DominatorTree *DT) { + if (R->isTopLevelRegion()) + return false; + + this->DT = DT; + Func = R->getEntry()->getParent(); ParentRegion = R; @@ -1088,33 +1088,33 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { } Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { - return new StructurizeCFGLegacyPass(SkipUniformRegions); -} - -static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { - Regions.push_back(&R); - for (const auto &E : R) - addRegionIntoQueue(*E, Regions); -} - -PreservedAnalyses StructurizeCFGPass::run(Function &F, - FunctionAnalysisManager &AM) { - - bool Changed = false; - DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto &RI = AM.getResult<RegionInfoAnalysis>(F); - std::vector<Region *> Regions; - addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); - while (!Regions.empty()) { - Region *R = Regions.back(); - StructurizeCFG SCFG; - SCFG.init(R); - Changed |= SCFG.run(R, DT); - Regions.pop_back(); - } - if (!Changed) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - return PA; + return new StructurizeCFGLegacyPass(SkipUniformRegions); } + +static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { + Regions.push_back(&R); + for (const auto &E : R) + addRegionIntoQueue(*E, Regions); +} + +PreservedAnalyses StructurizeCFGPass::run(Function &F, + FunctionAnalysisManager &AM) { + + bool Changed = false; + DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto &RI = AM.getResult<RegionInfoAnalysis>(F); + std::vector<Region *> Regions; + addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); + while (!Regions.empty()) { + Region *R = Regions.back(); + StructurizeCFG SCFG; + SCFG.init(R); + Changed |= SCFG.run(R, DT); + Regions.pop_back(); + } + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp index 9e7cccc884..50f7ac0a31 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -92,10 +92,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); /// Scan the specified function for alloca instructions. /// If it contains any dynamic allocas, returns false. static bool canTRE(Function &F) { - // FIXME: The code generator produces really bad code when an 'escaping - // alloca' is changed from being a static alloca to being a dynamic alloca. - // Until this is resolved, disable this transformation if that would ever - // happen. This bug is PR962. + // FIXME: The code generator produces really bad code when an 'escaping + // alloca' is changed from being a static alloca to being a dynamic alloca. + // Until this is resolved, disable this transformation if that would ever + // happen. This bug is PR962. return llvm::all_of(instructions(F), [](Instruction &I) { auto *AI = dyn_cast<AllocaInst>(&I); return !AI || AI->isStaticAlloca(); @@ -240,11 +240,11 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls, Escaped = ESCAPED; CallInst *CI = dyn_cast<CallInst>(&I); - // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is - // considered accessing memory and will be marked as a tail call if we - // don't bail out here. - if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) || - isa<PseudoProbeInst>(&I)) + // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is + // considered accessing memory and will be marked as a tail call if we + // don't bail out here. + if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) || + isa<PseudoProbeInst>(&I)) continue; bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); @@ -286,7 +286,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls, } } - for (auto *SuccBB : successors(BB)) { + for (auto *SuccBB : successors(BB)) { auto &State = Visited[SuccBB]; if (State < Escaped) { State = Escaped; @@ -426,7 +426,7 @@ class TailRecursionEliminator { DomTreeUpdater &DTU) : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {} - CallInst *findTRECandidate(BasicBlock *BB, + CallInst *findTRECandidate(BasicBlock *BB, bool CannotTailCallElimCallsMarkedTail); void createTailRecurseLoopHeader(CallInst *CI); @@ -435,9 +435,9 @@ class TailRecursionEliminator { bool eliminateCall(CallInst *CI); - void cleanupAndFinalize(); + void cleanupAndFinalize(); - bool processBlock(BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail); + bool processBlock(BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail); public: static bool eliminate(Function &F, const TargetTransformInfo *TTI, @@ -447,8 +447,8 @@ public: } // namespace CallInst *TailRecursionEliminator::findTRECandidate( - BasicBlock *BB, bool CannotTailCallElimCallsMarkedTail) { - Instruction *TI = BB->getTerminator(); + BasicBlock *BB, bool CannotTailCallElimCallsMarkedTail) { + Instruction *TI = BB->getTerminator(); if (&BB->front() == TI) // Make sure there is something before the terminator. return nullptr; @@ -747,50 +747,50 @@ void TailRecursionEliminator::cleanupAndFinalize() { } } -bool TailRecursionEliminator::processBlock( - BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail) { - Instruction *TI = BB.getTerminator(); - - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - if (BI->isConditional()) - return false; - - BasicBlock *Succ = BI->getSuccessor(0); - ReturnInst *Ret = dyn_cast<ReturnInst>(Succ->getFirstNonPHIOrDbg(true)); - - if (!Ret) - return false; - - CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail); - - if (!CI) - return false; - - LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ - << "INTO UNCOND BRANCH PRED: " << BB); - FoldReturnIntoUncondBranch(Ret, Succ, &BB, &DTU); - ++NumRetDuped; - - // If all predecessors of Succ have been eliminated by - // FoldReturnIntoUncondBranch, delete it. It is important to empty it, - // because the ret instruction in there is still using a value which - // eliminateCall will attempt to remove. This block can only contain - // instructions that can't have uses, therefore it is safe to remove. - if (pred_empty(Succ)) - DTU.deleteBB(Succ); - - eliminateCall(CI); - return true; - } else if (isa<ReturnInst>(TI)) { - CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail); - - if (CI) - return eliminateCall(CI); - } - - return false; -} - +bool TailRecursionEliminator::processBlock( + BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail) { + Instruction *TI = BB.getTerminator(); + + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional()) + return false; + + BasicBlock *Succ = BI->getSuccessor(0); + ReturnInst *Ret = dyn_cast<ReturnInst>(Succ->getFirstNonPHIOrDbg(true)); + + if (!Ret) + return false; + + CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail); + + if (!CI) + return false; + + LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ + << "INTO UNCOND BRANCH PRED: " << BB); + FoldReturnIntoUncondBranch(Ret, Succ, &BB, &DTU); + ++NumRetDuped; + + // If all predecessors of Succ have been eliminated by + // FoldReturnIntoUncondBranch, delete it. It is important to empty it, + // because the ret instruction in there is still using a value which + // eliminateCall will attempt to remove. This block can only contain + // instructions that can't have uses, therefore it is safe to remove. + if (pred_empty(Succ)) + DTU.deleteBB(Succ); + + eliminateCall(CI); + return true; + } else if (isa<ReturnInst>(TI)) { + CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail); + + if (CI) + return eliminateCall(CI); + } + + return false; +} + bool TailRecursionEliminator::eliminate(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, @@ -815,11 +815,11 @@ bool TailRecursionEliminator::eliminate(Function &F, // TRE would deallocate variable sized allocas, TRE doesn't). bool CanTRETailMarkedCall = canTRE(F); - // Change any tail recursive calls to loops. + // Change any tail recursive calls to loops. TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU); - for (BasicBlock &BB : F) - MadeChange |= TRE.processBlock(BB, !CanTRETailMarkedCall); + for (BasicBlock &BB : F) + MadeChange |= TRE.processBlock(BB, !CanTRETailMarkedCall); TRE.cleanupAndFinalize(); diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp index 80a7d3a43a..ec00528465 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp @@ -48,12 +48,12 @@ static void warnAboutLeftoverTransformations(Loop *L, if (hasVectorizeTransformation(L) == TM_ForcedByUser) { LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n"); - Optional<ElementCount> VectorizeWidth = - getOptionalElementCountLoopAttribute(L); + Optional<ElementCount> VectorizeWidth = + getOptionalElementCountLoopAttribute(L); Optional<int> InterleaveCount = getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); - if (!VectorizeWidth || VectorizeWidth->isVector()) + if (!VectorizeWidth || VectorizeWidth->isVector()) ORE->emit( DiagnosticInfoOptimizationFailure(DEBUG_TYPE, "FailedRequestedVectorization", diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make b/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make index 75501ae81a..00b9ef5ca1 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make @@ -12,14 +12,14 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/Analysis - contrib/libs/llvm12/lib/IR - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine - contrib/libs/llvm12/lib/Transforms/InstCombine - contrib/libs/llvm12/lib/Transforms/Utils + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/Analysis + contrib/libs/llvm12/lib/IR + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine + contrib/libs/llvm12/lib/Transforms/InstCombine + contrib/libs/llvm12/lib/Transforms/Utils ) ADDINCL( @@ -33,11 +33,11 @@ NO_UTIL() SRCS( ADCE.cpp AlignmentFromAssumptions.cpp - AnnotationRemarks.cpp + AnnotationRemarks.cpp BDCE.cpp CallSiteSplitting.cpp ConstantHoisting.cpp - ConstraintElimination.cpp + ConstraintElimination.cpp CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp @@ -60,7 +60,7 @@ SRCS( LoopDataPrefetch.cpp LoopDeletion.cpp LoopDistribute.cpp - LoopFlatten.cpp + LoopFlatten.cpp LoopFuse.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp @@ -97,7 +97,7 @@ SRCS( SCCP.cpp SROA.cpp Scalar.cpp - ScalarizeMaskedMemIntrin.cpp + ScalarizeMaskedMemIntrin.cpp Scalarizer.cpp SeparateConstOffsetFromGEP.cpp SimpleLoopUnswitch.cpp |