diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Analysis | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Analysis')
84 files changed, 11167 insertions, 11167 deletions
diff --git a/contrib/libs/llvm12/lib/Analysis/AliasAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/AliasAnalysis.cpp index fae7a84332..aa1078a233 100644 --- a/contrib/libs/llvm12/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/AliasAnalysis.cpp @@ -24,7 +24,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" @@ -55,17 +55,17 @@ #include <functional> #include <iterator> -#define DEBUG_TYPE "aa" - +#define DEBUG_TYPE "aa" + using namespace llvm; -STATISTIC(NumNoAlias, "Number of NoAlias results"); -STATISTIC(NumMayAlias, "Number of MayAlias results"); -STATISTIC(NumMustAlias, "Number of MustAlias results"); - +STATISTIC(NumNoAlias, "Number of NoAlias results"); +STATISTIC(NumMayAlias, "Number of MayAlias results"); +STATISTIC(NumMustAlias, "Number of MustAlias results"); + /// Allow disabling BasicAA from the AA results. This is particularly useful /// when testing to isolate a single AA implementation. -cl::opt<bool> DisableBasicAA("disable-basic-aa", cl::Hidden, cl::init(false)); +cl::opt<bool> DisableBasicAA("disable-basic-aa", cl::Hidden, cl::init(false)); AAResults::AAResults(AAResults &&Arg) : TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) { @@ -116,25 +116,25 @@ AliasResult AAResults::alias(const MemoryLocation &LocA, AliasResult AAResults::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI) { - AliasResult Result = MayAlias; - - Depth++; + AliasResult Result = MayAlias; + + Depth++; for (const auto &AA : AAs) { - Result = AA->alias(LocA, LocB, AAQI); + Result = AA->alias(LocA, LocB, AAQI); if (Result != MayAlias) - break; - } - Depth--; - - if (Depth == 0) { - if (Result == NoAlias) - ++NumNoAlias; - else if (Result == MustAlias) - ++NumMustAlias; - else - ++NumMayAlias; + break; } - return Result; + Depth--; + + if (Depth == 0) { + if (Result == NoAlias) + ++NumNoAlias; + else if (Result == MustAlias) + ++NumMustAlias; + else + ++NumMayAlias; + } + return Result; } bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, @@ -234,7 +234,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call, unsigned ArgIdx = std::distance(Call->arg_begin(), AI); MemoryLocation ArgLoc = MemoryLocation::getForArgument(Call, ArgIdx, TLI); - AliasResult ArgAlias = alias(ArgLoc, Loc, AAQI); + AliasResult ArgAlias = alias(ArgLoc, Loc, AAQI); if (ArgAlias != NoAlias) { ModRefInfo ArgMask = getArgModRefInfo(Call, ArgIdx); AllArgsMask = unionModRef(AllArgsMask, ArgMask); @@ -254,7 +254,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call, // If Loc is a constant memory location, the call definitely could not // modify the memory location. - if (isModSet(Result) && pointsToConstantMemory(Loc, AAQI, /*OrLocal*/ false)) + if (isModSet(Result) && pointsToConstantMemory(Loc, AAQI, /*OrLocal*/ false)) Result = clearMod(Result); return Result; @@ -331,7 +331,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call1, // ModRefC1 indicates what Call1 might do to Call2ArgLoc, and we use // above ArgMask to update dependence info. - ModRefInfo ModRefC1 = getModRefInfo(Call1, Call2ArgLoc, AAQI); + ModRefInfo ModRefC1 = getModRefInfo(Call1, Call2ArgLoc, AAQI); ArgMask = intersectModRef(ArgMask, ModRefC1); // Conservatively clear IsMustAlias unless only MustAlias is found. @@ -372,7 +372,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call1, // might Mod Call1ArgLoc, then we care about either a Mod or a Ref by // Call2. If Call1 might Ref, then we care only about a Mod by Call2. ModRefInfo ArgModRefC1 = getArgModRefInfo(Call1, Call1ArgIdx); - ModRefInfo ModRefC2 = getModRefInfo(Call2, Call1ArgLoc, AAQI); + ModRefInfo ModRefC2 = getModRefInfo(Call2, Call1ArgLoc, AAQI); if ((isModSet(ArgModRefC1) && isModOrRefSet(ModRefC2)) || (isRefSet(ArgModRefC1) && isModSet(ModRefC2))) R = intersectModRef(unionModRef(R, ArgModRefC1), Result); @@ -647,43 +647,43 @@ ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, return ModRefInfo::ModRef; } -ModRefInfo AAResults::getModRefInfo(const Instruction *I, - const Optional<MemoryLocation> &OptLoc, - AAQueryInfo &AAQIP) { - if (OptLoc == None) { - if (const auto *Call = dyn_cast<CallBase>(I)) { - return createModRefInfo(getModRefBehavior(Call)); - } - } - - const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation()); - - switch (I->getOpcode()) { - case Instruction::VAArg: - return getModRefInfo((const VAArgInst *)I, Loc, AAQIP); - case Instruction::Load: - return getModRefInfo((const LoadInst *)I, Loc, AAQIP); - case Instruction::Store: - return getModRefInfo((const StoreInst *)I, Loc, AAQIP); - case Instruction::Fence: - return getModRefInfo((const FenceInst *)I, Loc, AAQIP); - case Instruction::AtomicCmpXchg: - return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP); - case Instruction::AtomicRMW: - return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP); - case Instruction::Call: - return getModRefInfo((const CallInst *)I, Loc, AAQIP); - case Instruction::Invoke: - return getModRefInfo((const InvokeInst *)I, Loc, AAQIP); - case Instruction::CatchPad: - return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP); - case Instruction::CatchRet: - return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP); - default: - return ModRefInfo::NoModRef; - } -} - +ModRefInfo AAResults::getModRefInfo(const Instruction *I, + const Optional<MemoryLocation> &OptLoc, + AAQueryInfo &AAQIP) { + if (OptLoc == None) { + if (const auto *Call = dyn_cast<CallBase>(I)) { + return createModRefInfo(getModRefBehavior(Call)); + } + } + + const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation()); + + switch (I->getOpcode()) { + case Instruction::VAArg: + return getModRefInfo((const VAArgInst *)I, Loc, AAQIP); + case Instruction::Load: + return getModRefInfo((const LoadInst *)I, Loc, AAQIP); + case Instruction::Store: + return getModRefInfo((const StoreInst *)I, Loc, AAQIP); + case Instruction::Fence: + return getModRefInfo((const FenceInst *)I, Loc, AAQIP); + case Instruction::AtomicCmpXchg: + return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP); + case Instruction::AtomicRMW: + return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP); + case Instruction::Call: + return getModRefInfo((const CallInst *)I, Loc, AAQIP); + case Instruction::Invoke: + return getModRefInfo((const InvokeInst *)I, Loc, AAQIP); + case Instruction::CatchPad: + return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP); + case Instruction::CatchRet: + return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP); + default: + return ModRefInfo::NoModRef; + } +} + /// Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I /// in a BasicBlock. @@ -697,7 +697,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, if (!DT) return ModRefInfo::ModRef; - const Value *Object = getUnderlyingObject(MemLoc.Ptr); + const Value *Object = getUnderlyingObject(MemLoc.Ptr); if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) || isa<Constant>(Object)) return ModRefInfo::ModRef; @@ -725,7 +725,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, !Call->isByValArgument(ArgNo))) continue; - AliasResult AR = alias(*CI, Object); + AliasResult AR = alias(*CI, Object); // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't @@ -883,8 +883,8 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) { void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequiredTransitive<BasicAAWrapperPass>(); - AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); + AU.addRequiredTransitive<BasicAAWrapperPass>(); + AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); // We also need to mark all the alias analysis passes we will potentially // probe in runOnFunction as used here to ensure the legacy pass manager @@ -900,13 +900,13 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addUsedIfAvailable<ExternalAAWrapperPass>(); } -AAManager::Result AAManager::run(Function &F, FunctionAnalysisManager &AM) { - Result R(AM.getResult<TargetLibraryAnalysis>(F)); - for (auto &Getter : ResultGetters) - (*Getter)(F, AM, R); - return R; -} - +AAManager::Result AAManager::run(Function &F, FunctionAnalysisManager &AM) { + Result R(AM.getResult<TargetLibraryAnalysis>(F)); + for (auto &Getter : ResultGetters) + (*Getter)(F, AM, R); + return R; +} + AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR) { AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)); @@ -943,9 +943,9 @@ bool llvm::isNoAliasCall(const Value *V) { return false; } -static bool isNoAliasOrByValArgument(const Value *V) { +static bool isNoAliasOrByValArgument(const Value *V) { if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasNoAliasAttr() || A->hasByValAttr(); + return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } @@ -956,13 +956,13 @@ bool llvm::isIdentifiedObject(const Value *V) { return true; if (isNoAliasCall(V)) return true; - if (isNoAliasOrByValArgument(V)) - return true; + if (isNoAliasOrByValArgument(V)) + return true; return false; } bool llvm::isIdentifiedFunctionLocal(const Value *V) { - return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V); + return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V); } void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) { diff --git a/contrib/libs/llvm12/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/libs/llvm12/lib/Analysis/AliasAnalysisEvaluator.cpp index bbfa82bcca..89d2641d72 100644 --- a/contrib/libs/llvm12/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/libs/llvm12/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -140,13 +140,13 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { // iterate over the worklist, and run the full (n^2)/2 disambiguations for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { - auto I1Size = LocationSize::afterPointer(); + auto I1Size = LocationSize::afterPointer(); Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); if (I1ElTy->isSized()) I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy)); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { - auto I2Size = LocationSize::afterPointer(); + auto I2Size = LocationSize::afterPointer(); Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType(); if (I2ElTy->isSized()) I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy)); @@ -231,7 +231,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { // Mod/ref alias analysis: compare all pairs of calls and values for (CallBase *Call : Calls) { for (auto Pointer : Pointers) { - auto Size = LocationSize::afterPointer(); + auto Size = LocationSize::afterPointer(); Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType(); if (ElTy->isSized()) Size = LocationSize::precise(DL.getTypeStoreSize(ElTy)); diff --git a/contrib/libs/llvm12/lib/Analysis/AliasSetTracker.cpp b/contrib/libs/llvm12/lib/Analysis/AliasSetTracker.cpp index 486b4d99df..118928f1d7 100644 --- a/contrib/libs/llvm12/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/libs/llvm12/lib/Analysis/AliasSetTracker.cpp @@ -23,7 +23,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -83,7 +83,7 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { addRef(); } } else if (ASHadUnknownInsts) { - llvm::append_range(UnknownInsts, AS.UnknownInsts); + llvm::append_range(UnknownInsts, AS.UnknownInsts); AS.UnknownInsts.clear(); } @@ -438,9 +438,9 @@ void AliasSetTracker::addUnknown(Instruction *Inst) { break; // FIXME: Add lifetime/invariant intrinsics (See: PR30807). case Intrinsic::assume: - case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::sideeffect: - case Intrinsic::pseudoprobe: + case Intrinsic::pseudoprobe: return; } } @@ -672,10 +672,10 @@ void AliasSet::print(raw_ostream &OS) const { for (iterator I = begin(), E = end(); I != E; ++I) { if (I != begin()) OS << ", "; I.getPointer()->printAsOperand(OS << "("); - if (I.getSize() == LocationSize::afterPointer()) - OS << ", unknown after)"; - else if (I.getSize() == LocationSize::beforeOrAfterPointer()) - OS << ", unknown before-or-after)"; + if (I.getSize() == LocationSize::afterPointer()) + OS << ", unknown after)"; + else if (I.getSize() == LocationSize::beforeOrAfterPointer()) + OS << ", unknown before-or-after)"; else OS << ", " << I.getSize() << ")"; } @@ -753,11 +753,11 @@ namespace { bool runOnFunction(Function &F) override { auto &AAWP = getAnalysis<AAResultsWrapperPass>(); - AliasSetTracker Tracker(AAWP.getAAResults()); + AliasSetTracker Tracker(AAWP.getAAResults()); errs() << "Alias sets for function '" << F.getName() << "':\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - Tracker.add(&*I); - Tracker.print(errs()); + Tracker.add(&*I); + Tracker.print(errs()); return false; } }; @@ -771,16 +771,16 @@ INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets", INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets", "Alias Set Printer", false, true) - -AliasSetsPrinterPass::AliasSetsPrinterPass(raw_ostream &OS) : OS(OS) {} - -PreservedAnalyses AliasSetsPrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &AA = AM.getResult<AAManager>(F); - AliasSetTracker Tracker(AA); - OS << "Alias sets for function '" << F.getName() << "':\n"; - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - Tracker.add(&*I); - Tracker.print(OS); - return PreservedAnalyses::all(); -} + +AliasSetsPrinterPass::AliasSetsPrinterPass(raw_ostream &OS) : OS(OS) {} + +PreservedAnalyses AliasSetsPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &AA = AM.getResult<AAManager>(F); + AliasSetTracker Tracker(AA); + OS << "Alias sets for function '" << F.getName() << "':\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + Tracker.add(&*I); + Tracker.print(OS); + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/Analysis.cpp b/contrib/libs/llvm12/lib/Analysis/Analysis.cpp index db51670615..848e52bd1b 100644 --- a/contrib/libs/llvm12/lib/Analysis/Analysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/Analysis.cpp @@ -50,20 +50,20 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeAAResultsWrapperPassPass(Registry); initializeGlobalsAAWrapperPassPass(Registry); initializeIVUsersWrapperPassPass(Registry); - initializeInstCountLegacyPassPass(Registry); + initializeInstCountLegacyPassPass(Registry); initializeIntervalPartitionPass(Registry); - initializeIRSimilarityIdentifierWrapperPassPass(Registry); + initializeIRSimilarityIdentifierWrapperPassPass(Registry); initializeLazyBranchProbabilityInfoPassPass(Registry); initializeLazyBlockFrequencyInfoPassPass(Registry); initializeLazyValueInfoWrapperPassPass(Registry); initializeLazyValueInfoPrinterPass(Registry); initializeLegacyDivergenceAnalysisPass(Registry); - initializeLintLegacyPassPass(Registry); + initializeLintLegacyPassPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemDerefPrinterPass(Registry); initializeMemoryDependenceWrapperPassPass(Registry); - initializeModuleDebugInfoLegacyPrinterPass(Registry); + initializeModuleDebugInfoLegacyPrinterPass(Registry); initializeModuleSummaryIndexWrapperPassPass(Registry); initializeMustExecutePrinterPass(Registry); initializeMustBeExecutedContextPrinterPass(Registry); diff --git a/contrib/libs/llvm12/lib/Analysis/AssumeBundleQueries.cpp b/contrib/libs/llvm12/lib/Analysis/AssumeBundleQueries.cpp index 0084e2f13f..eab23ee69a 100644 --- a/contrib/libs/llvm12/lib/Analysis/AssumeBundleQueries.cpp +++ b/contrib/libs/llvm12/lib/Analysis/AssumeBundleQueries.cpp @@ -108,17 +108,17 @@ llvm::getKnowledgeFromBundle(CallInst &Assume, Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey()); if (bundleHasArgument(BOI, ABA_WasOn)) Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn); - auto GetArgOr1 = [&](unsigned Idx) -> unsigned { - if (auto *ConstInt = dyn_cast<ConstantInt>( - getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx))) - return ConstInt->getZExtValue(); - return 1; - }; + auto GetArgOr1 = [&](unsigned Idx) -> unsigned { + if (auto *ConstInt = dyn_cast<ConstantInt>( + getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx))) + return ConstInt->getZExtValue(); + return 1; + }; if (BOI.End - BOI.Begin > ABA_Argument) - Result.ArgValue = GetArgOr1(0); - if (Result.AttrKind == Attribute::Alignment) - if (BOI.End - BOI.Begin > ABA_Argument + 1) - Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1)); + Result.ArgValue = GetArgOr1(0); + if (Result.AttrKind == Attribute::Alignment) + if (BOI.End - BOI.Begin > ABA_Argument + 1) + Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1)); return Result; } @@ -179,15 +179,15 @@ llvm::getKnowledgeForValue(const Value *V, if (!II || Elem.Index == AssumptionCache::ExprResultIdx) continue; if (RetainedKnowledge RK = getKnowledgeFromBundle( - *II, II->bundle_op_info_begin()[Elem.Index])) { - if (V != RK.WasOn) - continue; + *II, II->bundle_op_info_begin()[Elem.Index])) { + if (V != RK.WasOn) + continue; if (is_contained(AttrKinds, RK.AttrKind) && Filter(RK, II, &II->bundle_op_info_begin()[Elem.Index])) { NumUsefullAssumeQueries++; return RK; } - } + } } return RetainedKnowledge::none(); } diff --git a/contrib/libs/llvm12/lib/Analysis/AssumptionCache.cpp b/contrib/libs/llvm12/lib/Analysis/AssumptionCache.cpp index 70053fdf8d..9cd9f1df83 100644 --- a/contrib/libs/llvm12/lib/Analysis/AssumptionCache.cpp +++ b/contrib/libs/llvm12/lib/Analysis/AssumptionCache.cpp @@ -107,7 +107,7 @@ findAffectedValues(CallInst *CI, AddAffected(A); AddAffected(B); // (A << C) or (A >>_s C) or (A >>_u C) where C is some constant. - } else if (match(V, m_Shift(m_Value(A), m_ConstantInt()))) { + } else if (match(V, m_Shift(m_Value(A), m_ConstantInt()))) { AddAffected(A); } }; @@ -115,14 +115,14 @@ findAffectedValues(CallInst *CI, AddAffectedFromEq(A); AddAffectedFromEq(B); } - - Value *X; - // Handle (A + C1) u< C2, which is the canonical form of A > C3 && A < C4, - // and recognized by LVI at least. - if (Pred == ICmpInst::ICMP_ULT && - match(A, m_Add(m_Value(X), m_ConstantInt())) && - match(B, m_ConstantInt())) - AddAffected(X); + + Value *X; + // Handle (A + C1) u< C2, which is the canonical form of A > C3 && A < C4, + // and recognized by LVI at least. + if (Pred == ICmpInst::ICMP_ULT && + match(A, m_Add(m_Value(X), m_ConstantInt())) && + match(B, m_ConstantInt())) + AddAffected(X); } } @@ -163,11 +163,11 @@ void AssumptionCache::unregisterAssumption(CallInst *CI) { AffectedValues.erase(AVI); } - erase_value(AssumeHandles, CI); + erase_value(AssumeHandles, CI); } void AssumptionCache::AffectedValueCallbackVH::deleted() { - AC->AffectedValues.erase(getValPtr()); + AC->AffectedValues.erase(getValPtr()); // 'this' now dangles! } @@ -178,7 +178,7 @@ void AssumptionCache::transferAffectedValuesInCache(Value *OV, Value *NV) { return; for (auto &A : AVI->second) - if (!llvm::is_contained(NAVV, A)) + if (!llvm::is_contained(NAVV, A)) NAVV.push_back(A); AffectedValues.erase(OV); } diff --git a/contrib/libs/llvm12/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/BasicAliasAnalysis.cpp index 97d0cb63ef..18b0263ff6 100644 --- a/contrib/libs/llvm12/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/BasicAliasAnalysis.cpp @@ -14,7 +14,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -67,7 +67,7 @@ using namespace llvm; /// Enable analysis of recursive PHI nodes. static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden, - cl::init(true)); + cl::init(true)); /// By default, even on 32-bit architectures we use 64-bit integers for /// calculations. This will allow us to more-aggressively decompose indexing @@ -92,7 +92,7 @@ STATISTIC(SearchTimes, "Number of times a GEP is decomposed"); const unsigned MaxNumPhiBBsValueReachabilityCheck = 20; // The max limit of the search depth in DecomposeGEPExpression() and -// getUnderlyingObject(), both functions need to use the same search +// getUnderlyingObject(), both functions need to use the same search // depth otherwise the algorithm in aliasGEP will assert. static const unsigned MaxLookupSearchDepth = 6; @@ -412,22 +412,22 @@ static unsigned getMaxPointerSize(const DataLayout &DL) { /// specified amount, but which may have other unrepresented high bits. As /// such, the gep cannot necessarily be reconstructed from its decomposed form. /// -/// This function is capable of analyzing everything that getUnderlyingObject -/// can look through. To be able to do that getUnderlyingObject and -/// DecomposeGEPExpression must use the same search depth -/// (MaxLookupSearchDepth). -BasicAAResult::DecomposedGEP -BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, - AssumptionCache *AC, DominatorTree *DT) { +/// This function is capable of analyzing everything that getUnderlyingObject +/// can look through. To be able to do that getUnderlyingObject and +/// DecomposeGEPExpression must use the same search depth +/// (MaxLookupSearchDepth). +BasicAAResult::DecomposedGEP +BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, + AssumptionCache *AC, DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; SearchTimes++; - const Instruction *CxtI = dyn_cast<Instruction>(V); + const Instruction *CxtI = dyn_cast<Instruction>(V); unsigned MaxPointerSize = getMaxPointerSize(DL); - DecomposedGEP Decomposed; - Decomposed.Offset = APInt(MaxPointerSize, 0); - Decomposed.HasCompileTimeConstantScale = true; + DecomposedGEP Decomposed; + Decomposed.Offset = APInt(MaxPointerSize, 0); + Decomposed.HasCompileTimeConstantScale = true; do { // See if this is a bitcast or GEP. const Operator *Op = dyn_cast<Operator>(V); @@ -440,7 +440,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, } } Decomposed.Base = V; - return Decomposed; + return Decomposed; } if (Op->getOpcode() == Instruction::BitCast || @@ -474,13 +474,13 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, } Decomposed.Base = V; - return Decomposed; + return Decomposed; } // Don't attempt to analyze GEPs over unsized objects. if (!GEPOp->getSourceElementType()->isSized()) { Decomposed.Base = V; - return Decomposed; + return Decomposed; } // Don't attempt to analyze GEPs if index scale is not a compile-time @@ -488,7 +488,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, if (isa<ScalableVectorType>(GEPOp->getSourceElementType())) { Decomposed.Base = V; Decomposed.HasCompileTimeConstantScale = false; - return Decomposed; + return Decomposed; } unsigned AS = GEPOp->getPointerAddressSpace(); @@ -507,7 +507,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, if (FieldNo == 0) continue; - Decomposed.Offset += DL.getStructLayout(STy)->getElementOffset(FieldNo); + Decomposed.Offset += DL.getStructLayout(STy)->getElementOffset(FieldNo); continue; } @@ -515,9 +515,9 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { if (CIdx->isZero()) continue; - Decomposed.Offset += - DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() * - CIdx->getValue().sextOrTrunc(MaxPointerSize); + Decomposed.Offset += + DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() * + CIdx->getValue().sextOrTrunc(MaxPointerSize); continue; } @@ -550,10 +550,10 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, // FIXME: C1*Scale and the other operations in the decomposed // (C1*Scale)*V+C2*Scale can also overflow. We should check for this // possibility. - bool Overflow; - APInt ScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize) - .smul_ov(Scale, Overflow); - if (Overflow) { + bool Overflow; + APInt ScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize) + .smul_ov(Scale, Overflow); + if (Overflow) { Index = OrigIndex; IndexScale = 1; IndexOffset = 0; @@ -562,7 +562,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, if (PointerSize > Width) SExtBits += PointerSize - Width; } else { - Decomposed.Offset += ScaledOffset; + Decomposed.Offset += ScaledOffset; Scale *= IndexScale.sextOrTrunc(MaxPointerSize); } @@ -585,14 +585,14 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, Scale = adjustToPointerSize(Scale, PointerSize); if (!!Scale) { - VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale, CxtI}; + VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale, CxtI}; Decomposed.VarIndices.push_back(Entry); } } // Take care of wrap-arounds - if (GepHasConstantOffset) - Decomposed.Offset = adjustToPointerSize(Decomposed.Offset, PointerSize); + if (GepHasConstantOffset) + Decomposed.Offset = adjustToPointerSize(Decomposed.Offset, PointerSize); // Analyze the base pointer next. V = GEPOp->getOperand(0); @@ -601,7 +601,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, // If the chain of expressions is too deep, just return early. Decomposed.Base = V; SearchLimitReached++; - return Decomposed; + return Decomposed; } /// Returns whether the given pointer value points to memory that is local to @@ -615,7 +615,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, SmallVector<const Value *, 16> Worklist; Worklist.push_back(Loc.Ptr); do { - const Value *V = getUnderlyingObject(Worklist.pop_back_val()); + const Value *V = getUnderlyingObject(Worklist.pop_back_val()); if (!Visited.insert(V).second) { Visited.clear(); return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); @@ -652,7 +652,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, Visited.clear(); return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); } - append_range(Worklist, PN->incoming_values()); + append_range(Worklist, PN->incoming_values()); continue; } @@ -797,8 +797,8 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA, AAQueryInfo &AAQI) { assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && "BasicAliasAnalysis doesn't support interprocedural queries."); - return aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, LocB.Size, - LocB.AATags, AAQI); + return aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, LocB.Size, + LocB.AATags, AAQI); } /// Checks to see if the specified callsite can clobber the specified memory @@ -813,7 +813,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, assert(notDifferentParent(Call, Loc.Ptr) && "AliasAnalysis query involving multiple functions!"); - const Value *Object = getUnderlyingObject(Loc.Ptr); + const Value *Object = getUnderlyingObject(Loc.Ptr); // Calls marked 'tail' cannot read or write allocas from the current frame // because the current frame might be destroyed by the time they run. However, @@ -862,9 +862,9 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. - AliasResult AR = getBestAAResults().alias( - MemoryLocation::getBeforeOrAfter(*CI), - MemoryLocation::getBeforeOrAfter(Object), AAQI); + AliasResult AR = getBestAAResults().alias( + MemoryLocation::getBeforeOrAfter(*CI), + MemoryLocation::getBeforeOrAfter(Object), AAQI); if (AR != MustAlias) IsMustAlias = false; // Operand doesn't alias 'Object', continue looking for other aliases @@ -910,19 +910,19 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, if (isMallocOrCallocLikeFn(Call, &TLI)) { // Be conservative if the accessed pointer may alias the allocation - // fallback to the generic handling below. - if (getBestAAResults().alias(MemoryLocation::getBeforeOrAfter(Call), - Loc, AAQI) == NoAlias) + if (getBestAAResults().alias(MemoryLocation::getBeforeOrAfter(Call), + Loc, AAQI) == NoAlias) return ModRefInfo::NoModRef; } - // The semantics of memcpy intrinsics either exactly overlap or do not - // overlap, i.e., source and destination of any given memcpy are either - // no-alias or must-alias. + // The semantics of memcpy intrinsics either exactly overlap or do not + // overlap, i.e., source and destination of any given memcpy are either + // no-alias or must-alias. if (auto *Inst = dyn_cast<AnyMemCpyInst>(Call)) { - AliasResult SrcAA = - getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc, AAQI); - AliasResult DestAA = - getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc, AAQI); + AliasResult SrcAA = + getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc, AAQI); + AliasResult DestAA = + getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc, AAQI); // It's also possible for Loc to alias both src and dest, or neither. ModRefInfo rv = ModRefInfo::NoModRef; if (SrcAA != NoAlias) @@ -947,9 +947,9 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, // the guard invokes the "deopt" continuation. if (isIntrinsicCall(Call, Intrinsic::experimental_guard)) return ModRefInfo::Ref; - // The same applies to deoptimize which is essentially a guard(false). - if (isIntrinsicCall(Call, Intrinsic::experimental_deoptimize)) - return ModRefInfo::Ref; + // The same applies to deoptimize which is essentially a guard(false). + if (isIntrinsicCall(Call, Intrinsic::experimental_deoptimize)) + return ModRefInfo::Ref; // Like assumes, invariant.start intrinsics were also marked as arbitrarily // writing so that proper control dependencies are maintained but they never @@ -1051,7 +1051,7 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, LocationSize MaybeObjectAccessSize) { // If the object access size is unknown, or the GEP isn't inbounds, bail. - if (!MaybeObjectAccessSize.hasValue() || !GEPOp->isInBounds()) + if (!MaybeObjectAccessSize.hasValue() || !GEPOp->isInBounds()) return false; const uint64_t ObjectAccessSize = MaybeObjectAccessSize.getValue(); @@ -1071,21 +1071,21 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, if (!DecompGEP.VarIndices.empty()) return false; - return DecompGEP.Offset.sge(DecompObject.Offset + (int64_t)ObjectAccessSize); + return DecompGEP.Offset.sge(DecompObject.Offset + (int64_t)ObjectAccessSize); } /// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against /// another pointer. /// /// We know that V1 is a GEP, but we don't know anything about V2. -/// UnderlyingV1 is getUnderlyingObject(GEP1), UnderlyingV2 is the same for +/// UnderlyingV1 is getUnderlyingObject(GEP1), UnderlyingV2 is the same for /// V2. AliasResult BasicAAResult::aliasGEP( const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo, const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) { - DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT); - DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT); + DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT); + DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT); // Don't attempt to analyze the decomposed GEP if index scale is not a // compile-time constant. @@ -1095,12 +1095,12 @@ AliasResult BasicAAResult::aliasGEP( assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 && "DecomposeGEPExpression returned a result different from " - "getUnderlyingObject"); + "getUnderlyingObject"); // If the GEP's offset relative to its base is such that the base would // fall below the start of the object underlying V2, then the GEP and V2 // cannot alias. - if (isGEPBaseAtNegativeOffset(GEP1, DecompGEP1, DecompGEP2, V2Size)) + if (isGEPBaseAtNegativeOffset(GEP1, DecompGEP1, DecompGEP2, V2Size)) return NoAlias; // If we have two gep instructions with must-alias or not-alias'ing base // pointers, figure out if the indexes to the GEP tell us anything about the @@ -1108,22 +1108,22 @@ AliasResult BasicAAResult::aliasGEP( if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { // Check for the GEP base being at a negative offset, this time in the other // direction. - if (isGEPBaseAtNegativeOffset(GEP2, DecompGEP2, DecompGEP1, V1Size)) + if (isGEPBaseAtNegativeOffset(GEP2, DecompGEP2, DecompGEP1, V1Size)) return NoAlias; // Do the base pointers alias? - AliasResult BaseAlias = getBestAAResults().alias( - MemoryLocation::getBeforeOrAfter(UnderlyingV1), - MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI); - - // For GEPs with identical offsets, we can preserve the size and AAInfo - // when performing the alias check on the underlying objects. - if (BaseAlias == MayAlias && DecompGEP1.Offset == DecompGEP2.Offset && - DecompGEP1.VarIndices == DecompGEP2.VarIndices) { - AliasResult PreciseBaseAlias = getBestAAResults().alias( - MemoryLocation(UnderlyingV1, V1Size, V1AAInfo), - MemoryLocation(UnderlyingV2, V2Size, V2AAInfo), AAQI); - if (PreciseBaseAlias == NoAlias) - return NoAlias; + AliasResult BaseAlias = getBestAAResults().alias( + MemoryLocation::getBeforeOrAfter(UnderlyingV1), + MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI); + + // For GEPs with identical offsets, we can preserve the size and AAInfo + // when performing the alias check on the underlying objects. + if (BaseAlias == MayAlias && DecompGEP1.Offset == DecompGEP2.Offset && + DecompGEP1.VarIndices == DecompGEP2.VarIndices) { + AliasResult PreciseBaseAlias = getBestAAResults().alias( + MemoryLocation(UnderlyingV1, V1Size, V1AAInfo), + MemoryLocation(UnderlyingV2, V2Size, V2AAInfo), AAQI); + if (PreciseBaseAlias == NoAlias) + return NoAlias; } // If we get a No or May, then return it immediately, no amount of analysis @@ -1135,7 +1135,7 @@ AliasResult BasicAAResult::aliasGEP( // Subtract the GEP2 pointer from the GEP1 pointer to find out their // symbolic difference. - DecompGEP1.Offset -= DecompGEP2.Offset; + DecompGEP1.Offset -= DecompGEP2.Offset; GetIndexDifference(DecompGEP1.VarIndices, DecompGEP2.VarIndices); } else { @@ -1144,12 +1144,12 @@ AliasResult BasicAAResult::aliasGEP( // pointer, we know they cannot alias. // If both accesses are unknown size, we can't do anything useful here. - if (!V1Size.hasValue() && !V2Size.hasValue()) + if (!V1Size.hasValue() && !V2Size.hasValue()) return MayAlias; - AliasResult R = getBestAAResults().alias( - MemoryLocation::getBeforeOrAfter(UnderlyingV1), - MemoryLocation(V2, V2Size, V2AAInfo), AAQI); + AliasResult R = getBestAAResults().alias( + MemoryLocation::getBeforeOrAfter(UnderlyingV1), + MemoryLocation(V2, V2Size, V2AAInfo), AAQI); if (R != MustAlias) { // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values @@ -1167,17 +1167,17 @@ AliasResult BasicAAResult::aliasGEP( // // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 // must aliases the GEP, the end result is a must alias also. - if (DecompGEP1.Offset == 0 && DecompGEP1.VarIndices.empty()) + if (DecompGEP1.Offset == 0 && DecompGEP1.VarIndices.empty()) return MustAlias; // If there is a constant difference between the pointers, but the difference // is less than the size of the associated memory object, then we know // that the objects are partially overlapping. If the difference is // greater, we know they do not overlap. - if (DecompGEP1.Offset != 0 && DecompGEP1.VarIndices.empty()) { - if (DecompGEP1.Offset.sge(0)) { - if (V2Size.hasValue()) { - if (DecompGEP1.Offset.ult(V2Size.getValue())) + if (DecompGEP1.Offset != 0 && DecompGEP1.VarIndices.empty()) { + if (DecompGEP1.Offset.sge(0)) { + if (V2Size.hasValue()) { + if (DecompGEP1.Offset.ult(V2Size.getValue())) return PartialAlias; return NoAlias; } @@ -1188,8 +1188,8 @@ AliasResult BasicAAResult::aliasGEP( // ---------------->| // |-->V1Size |-------> V2Size // GEP1 V2 - if (V1Size.hasValue()) { - if ((-DecompGEP1.Offset).ult(V1Size.getValue())) + if (V1Size.hasValue()) { + if ((-DecompGEP1.Offset).ult(V1Size.getValue())) return PartialAlias; return NoAlias; } @@ -1197,24 +1197,24 @@ AliasResult BasicAAResult::aliasGEP( } if (!DecompGEP1.VarIndices.empty()) { - APInt GCD; - bool AllNonNegative = DecompGEP1.Offset.isNonNegative(); - bool AllNonPositive = DecompGEP1.Offset.isNonPositive(); + APInt GCD; + bool AllNonNegative = DecompGEP1.Offset.isNonNegative(); + bool AllNonPositive = DecompGEP1.Offset.isNonPositive(); for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { - const APInt &Scale = DecompGEP1.VarIndices[i].Scale; - if (i == 0) - GCD = Scale.abs(); - else - GCD = APIntOps::GreatestCommonDivisor(GCD, Scale.abs()); + const APInt &Scale = DecompGEP1.VarIndices[i].Scale; + if (i == 0) + GCD = Scale.abs(); + else + GCD = APIntOps::GreatestCommonDivisor(GCD, Scale.abs()); - if (AllNonNegative || AllNonPositive) { + if (AllNonNegative || AllNonPositive) { // If the Value could change between cycles, then any reasoning about // the Value this cycle may not hold in the next cycle. We'll just // give up if we can't determine conditions that hold for every cycle: const Value *V = DecompGEP1.VarIndices[i].V; - const Instruction *CxtI = DecompGEP1.VarIndices[i].CxtI; + const Instruction *CxtI = DecompGEP1.VarIndices[i].CxtI; - KnownBits Known = computeKnownBits(V, DL, 0, &AC, CxtI, DT); + KnownBits Known = computeKnownBits(V, DL, 0, &AC, CxtI, DT); bool SignKnownZero = Known.isNonNegative(); bool SignKnownOne = Known.isNegative(); @@ -1224,77 +1224,77 @@ AliasResult BasicAAResult::aliasGEP( SignKnownZero |= IsZExt; SignKnownOne &= !IsZExt; - AllNonNegative &= (SignKnownZero && Scale.isNonNegative()) || - (SignKnownOne && Scale.isNonPositive()); - AllNonPositive &= (SignKnownZero && Scale.isNonPositive()) || - (SignKnownOne && Scale.isNonNegative()); + AllNonNegative &= (SignKnownZero && Scale.isNonNegative()) || + (SignKnownOne && Scale.isNonPositive()); + AllNonPositive &= (SignKnownZero && Scale.isNonPositive()) || + (SignKnownOne && Scale.isNonNegative()); } } - // We now have accesses at two offsets from the same base: - // 1. (...)*GCD + DecompGEP1.Offset with size V1Size - // 2. 0 with size V2Size - // Using arithmetic modulo GCD, the accesses are at - // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits - // into the range [V2Size..GCD), then we know they cannot overlap. - APInt ModOffset = DecompGEP1.Offset.srem(GCD); - if (ModOffset.isNegative()) - ModOffset += GCD; // We want mod, not rem. - if (V1Size.hasValue() && V2Size.hasValue() && - ModOffset.uge(V2Size.getValue()) && - (GCD - ModOffset).uge(V1Size.getValue())) + // We now have accesses at two offsets from the same base: + // 1. (...)*GCD + DecompGEP1.Offset with size V1Size + // 2. 0 with size V2Size + // Using arithmetic modulo GCD, the accesses are at + // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits + // into the range [V2Size..GCD), then we know they cannot overlap. + APInt ModOffset = DecompGEP1.Offset.srem(GCD); + if (ModOffset.isNegative()) + ModOffset += GCD; // We want mod, not rem. + if (V1Size.hasValue() && V2Size.hasValue() && + ModOffset.uge(V2Size.getValue()) && + (GCD - ModOffset).uge(V1Size.getValue())) return NoAlias; - // If we know all the variables are non-negative, then the total offset is - // also non-negative and >= DecompGEP1.Offset. We have the following layout: - // [0, V2Size) ... [TotalOffset, TotalOffer+V1Size] - // If DecompGEP1.Offset >= V2Size, the accesses don't alias. - if (AllNonNegative && V2Size.hasValue() && - DecompGEP1.Offset.uge(V2Size.getValue())) - return NoAlias; - // Similarly, if the variables are non-positive, then the total offset is - // also non-positive and <= DecompGEP1.Offset. We have the following layout: - // [TotalOffset, TotalOffset+V1Size) ... [0, V2Size) - // If -DecompGEP1.Offset >= V1Size, the accesses don't alias. - if (AllNonPositive && V1Size.hasValue() && - (-DecompGEP1.Offset).uge(V1Size.getValue())) + // If we know all the variables are non-negative, then the total offset is + // also non-negative and >= DecompGEP1.Offset. We have the following layout: + // [0, V2Size) ... [TotalOffset, TotalOffer+V1Size] + // If DecompGEP1.Offset >= V2Size, the accesses don't alias. + if (AllNonNegative && V2Size.hasValue() && + DecompGEP1.Offset.uge(V2Size.getValue())) return NoAlias; - - if (V1Size.hasValue() && V2Size.hasValue()) { - // Try to determine whether abs(VarIndex) > 0. - Optional<APInt> MinAbsVarIndex; - if (DecompGEP1.VarIndices.size() == 1) { - // VarIndex = Scale*V. If V != 0 then abs(VarIndex) >= abs(Scale). - const VariableGEPIndex &Var = DecompGEP1.VarIndices[0]; - if (isKnownNonZero(Var.V, DL, 0, &AC, Var.CxtI, DT)) - MinAbsVarIndex = Var.Scale.abs(); - } else if (DecompGEP1.VarIndices.size() == 2) { - // VarIndex = Scale*V0 + (-Scale)*V1. - // If V0 != V1 then abs(VarIndex) >= abs(Scale). - // Check that VisitedPhiBBs is empty, to avoid reasoning about - // inequality of values across loop iterations. - const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0]; - const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1]; - if (Var0.Scale == -Var1.Scale && Var0.ZExtBits == Var1.ZExtBits && - Var0.SExtBits == Var1.SExtBits && VisitedPhiBBs.empty() && - isKnownNonEqual(Var0.V, Var1.V, DL, &AC, /* CxtI */ nullptr, DT)) - MinAbsVarIndex = Var0.Scale.abs(); - } - - if (MinAbsVarIndex) { - // The constant offset will have added at least +/-MinAbsVarIndex to it. - APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex; - APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex; - // Check that an access at OffsetLo or lower, and an access at OffsetHi - // or higher both do not alias. - if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) && - OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue())) - return NoAlias; - } - } - + // Similarly, if the variables are non-positive, then the total offset is + // also non-positive and <= DecompGEP1.Offset. We have the following layout: + // [TotalOffset, TotalOffset+V1Size) ... [0, V2Size) + // If -DecompGEP1.Offset >= V1Size, the accesses don't alias. + if (AllNonPositive && V1Size.hasValue() && + (-DecompGEP1.Offset).uge(V1Size.getValue())) + return NoAlias; + + if (V1Size.hasValue() && V2Size.hasValue()) { + // Try to determine whether abs(VarIndex) > 0. + Optional<APInt> MinAbsVarIndex; + if (DecompGEP1.VarIndices.size() == 1) { + // VarIndex = Scale*V. If V != 0 then abs(VarIndex) >= abs(Scale). + const VariableGEPIndex &Var = DecompGEP1.VarIndices[0]; + if (isKnownNonZero(Var.V, DL, 0, &AC, Var.CxtI, DT)) + MinAbsVarIndex = Var.Scale.abs(); + } else if (DecompGEP1.VarIndices.size() == 2) { + // VarIndex = Scale*V0 + (-Scale)*V1. + // If V0 != V1 then abs(VarIndex) >= abs(Scale). + // Check that VisitedPhiBBs is empty, to avoid reasoning about + // inequality of values across loop iterations. + const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0]; + const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1]; + if (Var0.Scale == -Var1.Scale && Var0.ZExtBits == Var1.ZExtBits && + Var0.SExtBits == Var1.SExtBits && VisitedPhiBBs.empty() && + isKnownNonEqual(Var0.V, Var1.V, DL, &AC, /* CxtI */ nullptr, DT)) + MinAbsVarIndex = Var0.Scale.abs(); + } + + if (MinAbsVarIndex) { + // The constant offset will have added at least +/-MinAbsVarIndex to it. + APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex; + APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex; + // Check that an access at OffsetLo or lower, and an access at OffsetHi + // or higher both do not alias. + if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) && + OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue())) + return NoAlias; + } + } + if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size, - DecompGEP1.Offset, &AC, DT)) + DecompGEP1.Offset, &AC, DT)) return NoAlias; } @@ -1322,33 +1322,33 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, LocationSize SISize, const AAMDNodes &SIAAInfo, const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, - AAQueryInfo &AAQI) { + AAQueryInfo &AAQI) { // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) if (SI->getCondition() == SI2->getCondition()) { - AliasResult Alias = getBestAAResults().alias( - MemoryLocation(SI->getTrueValue(), SISize, SIAAInfo), - MemoryLocation(SI2->getTrueValue(), V2Size, V2AAInfo), AAQI); + AliasResult Alias = getBestAAResults().alias( + MemoryLocation(SI->getTrueValue(), SISize, SIAAInfo), + MemoryLocation(SI2->getTrueValue(), V2Size, V2AAInfo), AAQI); if (Alias == MayAlias) return MayAlias; - AliasResult ThisAlias = getBestAAResults().alias( - MemoryLocation(SI->getFalseValue(), SISize, SIAAInfo), - MemoryLocation(SI2->getFalseValue(), V2Size, V2AAInfo), AAQI); + AliasResult ThisAlias = getBestAAResults().alias( + MemoryLocation(SI->getFalseValue(), SISize, SIAAInfo), + MemoryLocation(SI2->getFalseValue(), V2Size, V2AAInfo), AAQI); return MergeAliasResults(ThisAlias, Alias); } // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. - AliasResult Alias = getBestAAResults().alias( - MemoryLocation(V2, V2Size, V2AAInfo), - MemoryLocation(SI->getTrueValue(), SISize, SIAAInfo), AAQI); + AliasResult Alias = getBestAAResults().alias( + MemoryLocation(V2, V2Size, V2AAInfo), + MemoryLocation(SI->getTrueValue(), SISize, SIAAInfo), AAQI); if (Alias == MayAlias) return MayAlias; - AliasResult ThisAlias = getBestAAResults().alias( - MemoryLocation(V2, V2Size, V2AAInfo), - MemoryLocation(SI->getFalseValue(), SISize, SIAAInfo), AAQI); + AliasResult ThisAlias = getBestAAResults().alias( + MemoryLocation(V2, V2Size, V2AAInfo), + MemoryLocation(SI->getFalseValue(), SISize, SIAAInfo), AAQI); return MergeAliasResults(ThisAlias, Alias); } @@ -1358,41 +1358,41 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, const AAMDNodes &PNAAInfo, const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, - AAQueryInfo &AAQI) { + AAQueryInfo &AAQI) { // If the values are PHIs in the same block, we can do a more precise // as well as efficient check: just check for aliases between the values // on corresponding edges. if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) if (PN2->getParent() == PN->getParent()) { - Optional<AliasResult> Alias; + Optional<AliasResult> Alias; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - AliasResult ThisAlias = getBestAAResults().alias( - MemoryLocation(PN->getIncomingValue(i), PNSize, PNAAInfo), - MemoryLocation( - PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), V2Size, - V2AAInfo), - AAQI); - if (Alias) - *Alias = MergeAliasResults(*Alias, ThisAlias); - else - Alias = ThisAlias; - if (*Alias == MayAlias) + AliasResult ThisAlias = getBestAAResults().alias( + MemoryLocation(PN->getIncomingValue(i), PNSize, PNAAInfo), + MemoryLocation( + PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), V2Size, + V2AAInfo), + AAQI); + if (Alias) + *Alias = MergeAliasResults(*Alias, ThisAlias); + else + Alias = ThisAlias; + if (*Alias == MayAlias) break; } - return *Alias; + return *Alias; } SmallVector<Value *, 4> V1Srcs; - // If a phi operand recurses back to the phi, we can still determine NoAlias - // if we don't alias the underlying objects of the other phi operands, as we - // know that the recursive phi needs to be based on them in some way. + // If a phi operand recurses back to the phi, we can still determine NoAlias + // if we don't alias the underlying objects of the other phi operands, as we + // know that the recursive phi needs to be based on them in some way. bool isRecursive = false; auto CheckForRecPhi = [&](Value *PV) { if (!EnableRecPhiAnalysis) return false; - if (getUnderlyingObject(PV) == PN) { - isRecursive = true; - return true; + if (getUnderlyingObject(PV) == PN) { + isRecursive = true; + return true; } return false; }; @@ -1438,31 +1438,31 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, if (V1Srcs.empty()) return MayAlias; - // If this PHI node is recursive, indicate that the pointer may be moved - // across iterations. We can only prove NoAlias if different underlying - // objects are involved. + // If this PHI node is recursive, indicate that the pointer may be moved + // across iterations. We can only prove NoAlias if different underlying + // objects are involved. if (isRecursive) - PNSize = LocationSize::beforeOrAfterPointer(); - - // In the recursive alias queries below, we may compare values from two - // different loop iterations. Keep track of visited phi blocks, which will - // be used when determining value equivalence. - bool BlockInserted = VisitedPhiBBs.insert(PN->getParent()).second; - auto _ = make_scope_exit([&]() { - if (BlockInserted) - VisitedPhiBBs.erase(PN->getParent()); - }); - - // If we inserted a block into VisitedPhiBBs, alias analysis results that - // have been cached earlier may no longer be valid. Perform recursive queries - // with a new AAQueryInfo. - AAQueryInfo NewAAQI; - AAQueryInfo *UseAAQI = BlockInserted ? &NewAAQI : &AAQI; - - AliasResult Alias = getBestAAResults().alias( - MemoryLocation(V2, V2Size, V2AAInfo), - MemoryLocation(V1Srcs[0], PNSize, PNAAInfo), *UseAAQI); - + PNSize = LocationSize::beforeOrAfterPointer(); + + // In the recursive alias queries below, we may compare values from two + // different loop iterations. Keep track of visited phi blocks, which will + // be used when determining value equivalence. + bool BlockInserted = VisitedPhiBBs.insert(PN->getParent()).second; + auto _ = make_scope_exit([&]() { + if (BlockInserted) + VisitedPhiBBs.erase(PN->getParent()); + }); + + // If we inserted a block into VisitedPhiBBs, alias analysis results that + // have been cached earlier may no longer be valid. Perform recursive queries + // with a new AAQueryInfo. + AAQueryInfo NewAAQI; + AAQueryInfo *UseAAQI = BlockInserted ? &NewAAQI : &AAQI; + + AliasResult Alias = getBestAAResults().alias( + MemoryLocation(V2, V2Size, V2AAInfo), + MemoryLocation(V1Srcs[0], PNSize, PNAAInfo), *UseAAQI); + // Early exit if the check of the first PHI source against V2 is MayAlias. // Other results are not possible. if (Alias == MayAlias) @@ -1477,9 +1477,9 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - AliasResult ThisAlias = getBestAAResults().alias( - MemoryLocation(V2, V2Size, V2AAInfo), - MemoryLocation(V, PNSize, PNAAInfo), *UseAAQI); + AliasResult ThisAlias = getBestAAResults().alias( + MemoryLocation(V2, V2Size, V2AAInfo), + MemoryLocation(V, PNSize, PNAAInfo), *UseAAQI); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1491,10 +1491,10 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, /// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as /// array references. AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, - const AAMDNodes &V1AAInfo, - const Value *V2, LocationSize V2Size, - const AAMDNodes &V2AAInfo, - AAQueryInfo &AAQI) { + const AAMDNodes &V1AAInfo, + const Value *V2, LocationSize V2Size, + const AAMDNodes &V2AAInfo, + AAQueryInfo &AAQI) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. if (V1Size.isZero() || V2Size.isZero()) @@ -1522,8 +1522,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = getUnderlyingObject(V1, MaxLookupSearchDepth); - const Value *O2 = getUnderlyingObject(V2, MaxLookupSearchDepth); + const Value *O1 = getUnderlyingObject(V1, MaxLookupSearchDepth); + const Value *O2 = getUnderlyingObject(V2, MaxLookupSearchDepth); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1578,120 +1578,120 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, TLI, NullIsValidLocation))) return NoAlias; - // If one the accesses may be before the accessed pointer, canonicalize this - // by using unknown after-pointer sizes for both accesses. This is - // equivalent, because regardless of which pointer is lower, one of them - // will always came after the other, as long as the underlying objects aren't - // disjoint. We do this so that the rest of BasicAA does not have to deal - // with accesses before the base pointer, and to improve cache utilization by - // merging equivalent states. - if (V1Size.mayBeBeforePointer() || V2Size.mayBeBeforePointer()) { - V1Size = LocationSize::afterPointer(); - V2Size = LocationSize::afterPointer(); - } - + // If one the accesses may be before the accessed pointer, canonicalize this + // by using unknown after-pointer sizes for both accesses. This is + // equivalent, because regardless of which pointer is lower, one of them + // will always came after the other, as long as the underlying objects aren't + // disjoint. We do this so that the rest of BasicAA does not have to deal + // with accesses before the base pointer, and to improve cache utilization by + // merging equivalent states. + if (V1Size.mayBeBeforePointer() || V2Size.mayBeBeforePointer()) { + V1Size = LocationSize::afterPointer(); + V2Size = LocationSize::afterPointer(); + } + // Check the cache before climbing up use-def chains. This also terminates // otherwise infinitely recursive queries. AAQueryInfo::LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo), MemoryLocation(V2, V2Size, V2AAInfo)); if (V1 > V2) std::swap(Locs.first, Locs.second); - const auto &Pair = AAQI.AliasCache.try_emplace( - Locs, AAQueryInfo::CacheEntry{NoAlias, 0}); - if (!Pair.second) { - auto &Entry = Pair.first->second; - if (!Entry.isDefinitive()) { - // Remember that we used an assumption. - ++Entry.NumAssumptionUses; - ++AAQI.NumAssumptionUses; - } - return Entry.Result; - } - - int OrigNumAssumptionUses = AAQI.NumAssumptionUses; - unsigned OrigNumAssumptionBasedResults = AAQI.AssumptionBasedResults.size(); - AliasResult Result = aliasCheckRecursive(V1, V1Size, V1AAInfo, V2, V2Size, - V2AAInfo, AAQI, O1, O2); - - auto It = AAQI.AliasCache.find(Locs); - assert(It != AAQI.AliasCache.end() && "Must be in cache"); - auto &Entry = It->second; - - // Check whether a NoAlias assumption has been used, but disproven. - bool AssumptionDisproven = Entry.NumAssumptionUses > 0 && Result != NoAlias; - if (AssumptionDisproven) - Result = MayAlias; - - // This is a definitive result now, when considered as a root query. - AAQI.NumAssumptionUses -= Entry.NumAssumptionUses; - Entry.Result = Result; - Entry.NumAssumptionUses = -1; - - // If the assumption has been disproven, remove any results that may have - // been based on this assumption. Do this after the Entry updates above to - // avoid iterator invalidation. - if (AssumptionDisproven) - while (AAQI.AssumptionBasedResults.size() > OrigNumAssumptionBasedResults) - AAQI.AliasCache.erase(AAQI.AssumptionBasedResults.pop_back_val()); - - // The result may still be based on assumptions higher up in the chain. - // Remember it, so it can be purged from the cache later. - if (OrigNumAssumptionUses != AAQI.NumAssumptionUses && Result != MayAlias) - AAQI.AssumptionBasedResults.push_back(Locs); - return Result; -} - -AliasResult BasicAAResult::aliasCheckRecursive( - const Value *V1, LocationSize V1Size, const AAMDNodes &V1AAInfo, - const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, - AAQueryInfo &AAQI, const Value *O1, const Value *O2) { + const auto &Pair = AAQI.AliasCache.try_emplace( + Locs, AAQueryInfo::CacheEntry{NoAlias, 0}); + if (!Pair.second) { + auto &Entry = Pair.first->second; + if (!Entry.isDefinitive()) { + // Remember that we used an assumption. + ++Entry.NumAssumptionUses; + ++AAQI.NumAssumptionUses; + } + return Entry.Result; + } + + int OrigNumAssumptionUses = AAQI.NumAssumptionUses; + unsigned OrigNumAssumptionBasedResults = AAQI.AssumptionBasedResults.size(); + AliasResult Result = aliasCheckRecursive(V1, V1Size, V1AAInfo, V2, V2Size, + V2AAInfo, AAQI, O1, O2); + + auto It = AAQI.AliasCache.find(Locs); + assert(It != AAQI.AliasCache.end() && "Must be in cache"); + auto &Entry = It->second; + + // Check whether a NoAlias assumption has been used, but disproven. + bool AssumptionDisproven = Entry.NumAssumptionUses > 0 && Result != NoAlias; + if (AssumptionDisproven) + Result = MayAlias; + + // This is a definitive result now, when considered as a root query. + AAQI.NumAssumptionUses -= Entry.NumAssumptionUses; + Entry.Result = Result; + Entry.NumAssumptionUses = -1; + + // If the assumption has been disproven, remove any results that may have + // been based on this assumption. Do this after the Entry updates above to + // avoid iterator invalidation. + if (AssumptionDisproven) + while (AAQI.AssumptionBasedResults.size() > OrigNumAssumptionBasedResults) + AAQI.AliasCache.erase(AAQI.AssumptionBasedResults.pop_back_val()); + + // The result may still be based on assumptions higher up in the chain. + // Remember it, so it can be purged from the cache later. + if (OrigNumAssumptionUses != AAQI.NumAssumptionUses && Result != MayAlias) + AAQI.AssumptionBasedResults.push_back(Locs); + return Result; +} + +AliasResult BasicAAResult::aliasCheckRecursive( + const Value *V1, LocationSize V1Size, const AAMDNodes &V1AAInfo, + const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, + AAQueryInfo &AAQI, const Value *O1, const Value *O2) { if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2, AAQI); - if (Result != MayAlias) - return Result; - } else if (const GEPOperator *GV2 = dyn_cast<GEPOperator>(V2)) { - AliasResult Result = - aliasGEP(GV2, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, O2, O1, AAQI); - if (Result != MayAlias) + if (Result != MayAlias) return Result; + } else if (const GEPOperator *GV2 = dyn_cast<GEPOperator>(V2)) { + AliasResult Result = + aliasGEP(GV2, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, O2, O1, AAQI); + if (Result != MayAlias) + return Result; } if (const PHINode *PN = dyn_cast<PHINode>(V1)) { AliasResult Result = - aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, AAQI); - if (Result != MayAlias) - return Result; - } else if (const PHINode *PN = dyn_cast<PHINode>(V2)) { - AliasResult Result = - aliasPHI(PN, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, AAQI); - if (Result != MayAlias) - return Result; + aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, AAQI); + if (Result != MayAlias) + return Result; + } else if (const PHINode *PN = dyn_cast<PHINode>(V2)) { + AliasResult Result = + aliasPHI(PN, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, AAQI); + if (Result != MayAlias) + return Result; } if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { AliasResult Result = - aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, AAQI); - if (Result != MayAlias) - return Result; - } else if (const SelectInst *S2 = dyn_cast<SelectInst>(V2)) { - AliasResult Result = - aliasSelect(S2, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, AAQI); - if (Result != MayAlias) - return Result; + aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, AAQI); + if (Result != MayAlias) + return Result; + } else if (const SelectInst *S2 = dyn_cast<SelectInst>(V2)) { + AliasResult Result = + aliasSelect(S2, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, AAQI); + if (Result != MayAlias) + return Result; } // If both pointers are pointing into the same object and one of them // accesses the entire object, then the accesses must overlap in some way. - if (O1 == O2) { - bool NullIsValidLocation = NullPointerIsDefined(&F); + if (O1 == O2) { + bool NullIsValidLocation = NullPointerIsDefined(&F); if (V1Size.isPrecise() && V2Size.isPrecise() && (isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) || - isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation))) - return PartialAlias; - } + isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation))) + return PartialAlias; + } - return MayAlias; + return MayAlias; } /// Check whether two Values can be considered equivalent. @@ -1760,7 +1760,7 @@ void BasicAAResult::GetIndexDifference( // If we didn't consume this entry, add it to the end of the Dest list. if (!!Scale) { - VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale, Src[i].CxtI}; + VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale, Src[i].CxtI}; Dest.push_back(Entry); } } @@ -1770,8 +1770,8 @@ bool BasicAAResult::constantOffsetHeuristic( const SmallVectorImpl<VariableGEPIndex> &VarIndices, LocationSize MaybeV1Size, LocationSize MaybeV2Size, const APInt &BaseOffset, AssumptionCache *AC, DominatorTree *DT) { - if (VarIndices.size() != 2 || !MaybeV1Size.hasValue() || - !MaybeV2Size.hasValue()) + if (VarIndices.size() != 2 || !MaybeV1Size.hasValue() || + !MaybeV2Size.hasValue()) return false; const uint64_t V1Size = MaybeV1Size.getValue(); @@ -1831,12 +1831,12 @@ bool BasicAAResult::constantOffsetHeuristic( AnalysisKey BasicAA::Key; BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) { - auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); - auto &AC = AM.getResult<AssumptionAnalysis>(F); - auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto *LI = AM.getCachedResult<LoopAnalysis>(F); - auto *PV = AM.getCachedResult<PhiValuesAnalysis>(F); - return BasicAAResult(F.getParent()->getDataLayout(), F, TLI, AC, DT, LI, PV); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto *LI = AM.getCachedResult<LoopAnalysis>(F); + auto *PV = AM.getCachedResult<PhiValuesAnalysis>(F); + return BasicAAResult(F.getParent()->getDataLayout(), F, TLI, AC, DT, LI, PV); } BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { @@ -1878,9 +1878,9 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequiredTransitive<AssumptionCacheTracker>(); - AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); + AU.addRequiredTransitive<AssumptionCacheTracker>(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); AU.addUsedIfAvailable<PhiValuesWrapperPass>(); } diff --git a/contrib/libs/llvm12/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/libs/llvm12/lib/Analysis/BranchProbabilityInfo.cpp index 884ba484ae..e8f5791d37 100644 --- a/contrib/libs/llvm12/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/libs/llvm12/lib/Analysis/BranchProbabilityInfo.cpp @@ -61,7 +61,7 @@ INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -120,185 +120,185 @@ static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; /// exceptional case, so the result is unlikely. static const uint32_t FPH_UNO_WEIGHT = 1; -/// Set of dedicated "absolute" execution weights for a block. These weights are -/// meaningful relative to each other and their derivatives only. -enum class BlockExecWeight : std::uint32_t { - /// Special weight used for cases with exact zero probability. - ZERO = 0x0, - /// Minimal possible non zero weight. - LOWEST_NON_ZERO = 0x1, - /// Weight to an 'unreachable' block. - UNREACHABLE = ZERO, - /// Weight to a block containing non returning call. - NORETURN = LOWEST_NON_ZERO, - /// Weight to 'unwind' block of an invoke instruction. - UNWIND = LOWEST_NON_ZERO, - /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked - /// with attribute 'cold'. - COLD = 0xffff, - /// Default weight is used in cases when there is no dedicated execution - /// weight set. It is not propagated through the domination line either. - DEFAULT = 0xfffff -}; - -BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) { - // Record SCC numbers of blocks in the CFG to identify irreducible loops. - // FIXME: We could only calculate this if the CFG is known to be irreducible - // (perhaps cache this info in LoopInfo if we can easily calculate it there?). - int SccNum = 0; - for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd(); - ++It, ++SccNum) { - // Ignore single-block SCCs since they either aren't loops or LoopInfo will - // catch them. - const std::vector<const BasicBlock *> &Scc = *It; - if (Scc.size() == 1) - continue; - - LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":"); - for (const auto *BB : Scc) { - LLVM_DEBUG(dbgs() << " " << BB->getName()); - SccNums[BB] = SccNum; - calculateSccBlockType(BB, SccNum); - } - LLVM_DEBUG(dbgs() << "\n"); - } -} - -int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock *BB) const { - auto SccIt = SccNums.find(BB); - if (SccIt == SccNums.end()) - return -1; - return SccIt->second; +/// Set of dedicated "absolute" execution weights for a block. These weights are +/// meaningful relative to each other and their derivatives only. +enum class BlockExecWeight : std::uint32_t { + /// Special weight used for cases with exact zero probability. + ZERO = 0x0, + /// Minimal possible non zero weight. + LOWEST_NON_ZERO = 0x1, + /// Weight to an 'unreachable' block. + UNREACHABLE = ZERO, + /// Weight to a block containing non returning call. + NORETURN = LOWEST_NON_ZERO, + /// Weight to 'unwind' block of an invoke instruction. + UNWIND = LOWEST_NON_ZERO, + /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked + /// with attribute 'cold'. + COLD = 0xffff, + /// Default weight is used in cases when there is no dedicated execution + /// weight set. It is not propagated through the domination line either. + DEFAULT = 0xfffff +}; + +BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) { + // Record SCC numbers of blocks in the CFG to identify irreducible loops. + // FIXME: We could only calculate this if the CFG is known to be irreducible + // (perhaps cache this info in LoopInfo if we can easily calculate it there?). + int SccNum = 0; + for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd(); + ++It, ++SccNum) { + // Ignore single-block SCCs since they either aren't loops or LoopInfo will + // catch them. + const std::vector<const BasicBlock *> &Scc = *It; + if (Scc.size() == 1) + continue; + + LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":"); + for (const auto *BB : Scc) { + LLVM_DEBUG(dbgs() << " " << BB->getName()); + SccNums[BB] = SccNum; + calculateSccBlockType(BB, SccNum); + } + LLVM_DEBUG(dbgs() << "\n"); + } +} + +int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock *BB) const { + auto SccIt = SccNums.find(BB); + if (SccIt == SccNums.end()) + return -1; + return SccIt->second; } -void BranchProbabilityInfo::SccInfo::getSccEnterBlocks( - int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const { - - for (auto MapIt : SccBlocks[SccNum]) { - const auto *BB = MapIt.first; - if (isSCCHeader(BB, SccNum)) - for (const auto *Pred : predecessors(BB)) - if (getSCCNum(Pred) != SccNum) - Enters.push_back(const_cast<BasicBlock *>(BB)); +void BranchProbabilityInfo::SccInfo::getSccEnterBlocks( + int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const { + + for (auto MapIt : SccBlocks[SccNum]) { + const auto *BB = MapIt.first; + if (isSCCHeader(BB, SccNum)) + for (const auto *Pred : predecessors(BB)) + if (getSCCNum(Pred) != SccNum) + Enters.push_back(const_cast<BasicBlock *>(BB)); } -} - -void BranchProbabilityInfo::SccInfo::getSccExitBlocks( - int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const { - for (auto MapIt : SccBlocks[SccNum]) { - const auto *BB = MapIt.first; - if (isSCCExitingBlock(BB, SccNum)) - for (const auto *Succ : successors(BB)) - if (getSCCNum(Succ) != SccNum) - Exits.push_back(const_cast<BasicBlock *>(BB)); +} + +void BranchProbabilityInfo::SccInfo::getSccExitBlocks( + int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const { + for (auto MapIt : SccBlocks[SccNum]) { + const auto *BB = MapIt.first; + if (isSCCExitingBlock(BB, SccNum)) + for (const auto *Succ : successors(BB)) + if (getSCCNum(Succ) != SccNum) + Exits.push_back(const_cast<BasicBlock *>(BB)); } } -uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB, - int SccNum) const { - assert(getSCCNum(BB) == SccNum); +uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB, + int SccNum) const { + assert(getSCCNum(BB) == SccNum); - assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC"); - const auto &SccBlockTypes = SccBlocks[SccNum]; - - auto It = SccBlockTypes.find(BB); - if (It != SccBlockTypes.end()) { - return It->second; - } - return Inner; -} + assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC"); + const auto &SccBlockTypes = SccBlocks[SccNum]; -void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB, - int SccNum) { - assert(getSCCNum(BB) == SccNum); - uint32_t BlockType = Inner; - - if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) { - // Consider any block that is an entry point to the SCC as - // a header. - return getSCCNum(Pred) != SccNum; - })) - BlockType |= Header; - - if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) { - return getSCCNum(Succ) != SccNum; - })) - BlockType |= Exiting; - - // Lazily compute the set of headers for a given SCC and cache the results - // in the SccHeaderMap. - if (SccBlocks.size() <= static_cast<unsigned>(SccNum)) - SccBlocks.resize(SccNum + 1); - auto &SccBlockTypes = SccBlocks[SccNum]; - - if (BlockType != Inner) { - bool IsInserted; - std::tie(std::ignore, IsInserted) = - SccBlockTypes.insert(std::make_pair(BB, BlockType)); - assert(IsInserted && "Duplicated block in SCC"); + auto It = SccBlockTypes.find(BB); + if (It != SccBlockTypes.end()) { + return It->second; } + return Inner; } -BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB, - const LoopInfo &LI, - const SccInfo &SccI) - : BB(BB) { - LD.first = LI.getLoopFor(BB); - if (!LD.first) { - LD.second = SccI.getSCCNum(BB); - } -} - -bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const { - const auto &SrcBlock = Edge.first; - const auto &DstBlock = Edge.second; - return (DstBlock.getLoop() && - !DstBlock.getLoop()->contains(SrcBlock.getLoop())) || - // Assume that SCCs can't be nested. - (DstBlock.getSccNum() != -1 && - SrcBlock.getSccNum() != DstBlock.getSccNum()); -} - -bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const { - return isLoopEnteringEdge({Edge.second, Edge.first}); -} - -bool BranchProbabilityInfo::isLoopEnteringExitingEdge( - const LoopEdge &Edge) const { - return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge); -} - -bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const { - const auto &SrcBlock = Edge.first; - const auto &DstBlock = Edge.second; - return SrcBlock.belongsToSameLoop(DstBlock) && - ((DstBlock.getLoop() && - DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) || - (DstBlock.getSccNum() != -1 && - SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum()))); -} - -void BranchProbabilityInfo::getLoopEnterBlocks( - const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const { - if (LB.getLoop()) { - auto *Header = LB.getLoop()->getHeader(); - Enters.append(pred_begin(Header), pred_end(Header)); - } else { - assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?"); - SccI->getSccEnterBlocks(LB.getSccNum(), Enters); - } -} - -void BranchProbabilityInfo::getLoopExitBlocks( - const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const { - if (LB.getLoop()) { - LB.getLoop()->getExitBlocks(Exits); - } else { - assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?"); - SccI->getSccExitBlocks(LB.getSccNum(), Exits); +void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB, + int SccNum) { + assert(getSCCNum(BB) == SccNum); + uint32_t BlockType = Inner; + + if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) { + // Consider any block that is an entry point to the SCC as + // a header. + return getSCCNum(Pred) != SccNum; + })) + BlockType |= Header; + + if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) { + return getSCCNum(Succ) != SccNum; + })) + BlockType |= Exiting; + + // Lazily compute the set of headers for a given SCC and cache the results + // in the SccHeaderMap. + if (SccBlocks.size() <= static_cast<unsigned>(SccNum)) + SccBlocks.resize(SccNum + 1); + auto &SccBlockTypes = SccBlocks[SccNum]; + + if (BlockType != Inner) { + bool IsInserted; + std::tie(std::ignore, IsInserted) = + SccBlockTypes.insert(std::make_pair(BB, BlockType)); + assert(IsInserted && "Duplicated block in SCC"); } +} + +BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB, + const LoopInfo &LI, + const SccInfo &SccI) + : BB(BB) { + LD.first = LI.getLoopFor(BB); + if (!LD.first) { + LD.second = SccI.getSCCNum(BB); + } +} + +bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const { + const auto &SrcBlock = Edge.first; + const auto &DstBlock = Edge.second; + return (DstBlock.getLoop() && + !DstBlock.getLoop()->contains(SrcBlock.getLoop())) || + // Assume that SCCs can't be nested. + (DstBlock.getSccNum() != -1 && + SrcBlock.getSccNum() != DstBlock.getSccNum()); +} + +bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const { + return isLoopEnteringEdge({Edge.second, Edge.first}); } +bool BranchProbabilityInfo::isLoopEnteringExitingEdge( + const LoopEdge &Edge) const { + return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge); +} + +bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const { + const auto &SrcBlock = Edge.first; + const auto &DstBlock = Edge.second; + return SrcBlock.belongsToSameLoop(DstBlock) && + ((DstBlock.getLoop() && + DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) || + (DstBlock.getSccNum() != -1 && + SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum()))); +} + +void BranchProbabilityInfo::getLoopEnterBlocks( + const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const { + if (LB.getLoop()) { + auto *Header = LB.getLoop()->getHeader(); + Enters.append(pred_begin(Header), pred_end(Header)); + } else { + assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?"); + SccI->getSccEnterBlocks(LB.getSccNum(), Enters); + } +} + +void BranchProbabilityInfo::getLoopExitBlocks( + const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const { + if (LB.getLoop()) { + LB.getLoop()->getExitBlocks(Exits); + } else { + assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?"); + SccI->getSccExitBlocks(LB.getSccNum(), Exits); + } +} + // Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. Examine metadata against unreachable // heuristic. The probability of the edge coming to unreachable block is @@ -339,12 +339,12 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); WeightSum += Weights.back(); - const LoopBlock SrcLoopBB = getLoopBlock(BB); - const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1)); - auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); - if (EstimatedWeight && - EstimatedWeight.getValue() <= - static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) + const LoopBlock SrcLoopBB = getLoopBlock(BB); + const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1)); + auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); + if (EstimatedWeight && + EstimatedWeight.getValue() <= + static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) UnreachableIdxs.push_back(I - 1); else ReachableIdxs.push_back(I - 1); @@ -568,7 +568,7 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, // we can constant-evaluate the compare to see if it makes the branch be // taken or not. Constant *CmpLHSConst = dyn_cast<Constant>(V); - if (!CmpLHSConst || !llvm::is_contained(successors(BB), B)) + if (!CmpLHSConst || !llvm::is_contained(successors(BB), B)) continue; // First collapse InstChain for (Instruction *I : llvm::reverse(InstChain)) { @@ -592,323 +592,323 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, } } -Optional<uint32_t> -BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { - auto WeightIt = EstimatedBlockWeight.find(BB); - if (WeightIt == EstimatedBlockWeight.end()) - return None; - return WeightIt->second; -} - -Optional<uint32_t> -BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { - auto WeightIt = EstimatedLoopWeight.find(L); - if (WeightIt == EstimatedLoopWeight.end()) - return None; - return WeightIt->second; -} - -Optional<uint32_t> -BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const { - // For edges entering a loop take weight of a loop rather than an individual - // block in the loop. - return isLoopEnteringEdge(Edge) - ? getEstimatedLoopWeight(Edge.second.getLoopData()) - : getEstimatedBlockWeight(Edge.second.getBlock()); -} - -template <class IterT> -Optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight( - const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const { - SmallVector<uint32_t, 4> Weights; - Optional<uint32_t> MaxWeight; - for (const BasicBlock *DstBB : Successors) { - const LoopBlock DstLoopBB = getLoopBlock(DstBB); - auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); - - if (!Weight) - return None; - - if (!MaxWeight || MaxWeight.getValue() < Weight.getValue()) - MaxWeight = Weight; - } - - return MaxWeight; -} - -// Updates \p LoopBB's weight and returns true. If \p LoopBB has already -// an associated weight it is unchanged and false is returned. -// -// Please note by the algorithm the weight is not expected to change once set -// thus 'false' status is used to track visited blocks. -bool BranchProbabilityInfo::updateEstimatedBlockWeight( - LoopBlock &LoopBB, uint32_t BBWeight, - SmallVectorImpl<BasicBlock *> &BlockWorkList, - SmallVectorImpl<LoopBlock> &LoopWorkList) { - BasicBlock *BB = LoopBB.getBlock(); - - // In general, weight is assigned to a block when it has final value and - // can't/shouldn't be changed. However, there are cases when a block - // inherently has several (possibly "contradicting") weights. For example, - // "unwind" block may also contain "cold" call. In that case the first - // set weight is favored and all consequent weights are ignored. - if (!EstimatedBlockWeight.insert({BB, BBWeight}).second) - return false; - - for (BasicBlock *PredBlock : predecessors(BB)) { - LoopBlock PredLoop = getLoopBlock(PredBlock); - // Add affected block/loop to a working list. - if (isLoopExitingEdge({PredLoop, LoopBB})) { - if (!EstimatedLoopWeight.count(PredLoop.getLoopData())) - LoopWorkList.push_back(PredLoop); - } else if (!EstimatedBlockWeight.count(PredBlock)) - BlockWorkList.push_back(PredBlock); +Optional<uint32_t> +BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { + auto WeightIt = EstimatedBlockWeight.find(BB); + if (WeightIt == EstimatedBlockWeight.end()) + return None; + return WeightIt->second; +} + +Optional<uint32_t> +BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { + auto WeightIt = EstimatedLoopWeight.find(L); + if (WeightIt == EstimatedLoopWeight.end()) + return None; + return WeightIt->second; +} + +Optional<uint32_t> +BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const { + // For edges entering a loop take weight of a loop rather than an individual + // block in the loop. + return isLoopEnteringEdge(Edge) + ? getEstimatedLoopWeight(Edge.second.getLoopData()) + : getEstimatedBlockWeight(Edge.second.getBlock()); +} + +template <class IterT> +Optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight( + const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const { + SmallVector<uint32_t, 4> Weights; + Optional<uint32_t> MaxWeight; + for (const BasicBlock *DstBB : Successors) { + const LoopBlock DstLoopBB = getLoopBlock(DstBB); + auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); + + if (!Weight) + return None; + + if (!MaxWeight || MaxWeight.getValue() < Weight.getValue()) + MaxWeight = Weight; } - return true; -} - -// Starting from \p BB traverse through dominator blocks and assign \p BBWeight -// to all such blocks that are post dominated by \BB. In other words to all -// blocks that the one is executed if and only if another one is executed. -// Importantly, we skip loops here for two reasons. First weights of blocks in -// a loop should be scaled by trip count (yet possibly unknown). Second there is -// no any value in doing that because that doesn't give any additional -// information regarding distribution of probabilities inside the loop. -// Exception is loop 'enter' and 'exit' edges that are handled in a special way -// at calcEstimatedHeuristics. -// -// In addition, \p WorkList is populated with basic blocks if at leas one -// successor has updated estimated weight. -void BranchProbabilityInfo::propagateEstimatedBlockWeight( - const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, - uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList, - SmallVectorImpl<LoopBlock> &LoopWorkList) { - const BasicBlock *BB = LoopBB.getBlock(); - const auto *DTStartNode = DT->getNode(BB); - const auto *PDTStartNode = PDT->getNode(BB); - - // TODO: Consider propagating weight down the domination line as well. - for (const auto *DTNode = DTStartNode; DTNode != nullptr; - DTNode = DTNode->getIDom()) { - auto *DomBB = DTNode->getBlock(); - // Consider blocks which lie on one 'line'. - if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB))) - // If BB doesn't post dominate DomBB it will not post dominate dominators - // of DomBB as well. - break; - LoopBlock DomLoopBB = getLoopBlock(DomBB); - const LoopEdge Edge{DomLoopBB, LoopBB}; - // Don't propagate weight to blocks belonging to different loops. - if (!isLoopEnteringExitingEdge(Edge)) { - if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList, - LoopWorkList)) - // If DomBB has weight set then all it's predecessors are already - // processed (since we propagate weight up to the top of IR each time). - break; - } else if (isLoopExitingEdge(Edge)) { - LoopWorkList.push_back(DomLoopBB); + return MaxWeight; +} + +// Updates \p LoopBB's weight and returns true. If \p LoopBB has already +// an associated weight it is unchanged and false is returned. +// +// Please note by the algorithm the weight is not expected to change once set +// thus 'false' status is used to track visited blocks. +bool BranchProbabilityInfo::updateEstimatedBlockWeight( + LoopBlock &LoopBB, uint32_t BBWeight, + SmallVectorImpl<BasicBlock *> &BlockWorkList, + SmallVectorImpl<LoopBlock> &LoopWorkList) { + BasicBlock *BB = LoopBB.getBlock(); + + // In general, weight is assigned to a block when it has final value and + // can't/shouldn't be changed. However, there are cases when a block + // inherently has several (possibly "contradicting") weights. For example, + // "unwind" block may also contain "cold" call. In that case the first + // set weight is favored and all consequent weights are ignored. + if (!EstimatedBlockWeight.insert({BB, BBWeight}).second) + return false; + + for (BasicBlock *PredBlock : predecessors(BB)) { + LoopBlock PredLoop = getLoopBlock(PredBlock); + // Add affected block/loop to a working list. + if (isLoopExitingEdge({PredLoop, LoopBB})) { + if (!EstimatedLoopWeight.count(PredLoop.getLoopData())) + LoopWorkList.push_back(PredLoop); + } else if (!EstimatedBlockWeight.count(PredBlock)) + BlockWorkList.push_back(PredBlock); + } + return true; +} + +// Starting from \p BB traverse through dominator blocks and assign \p BBWeight +// to all such blocks that are post dominated by \BB. In other words to all +// blocks that the one is executed if and only if another one is executed. +// Importantly, we skip loops here for two reasons. First weights of blocks in +// a loop should be scaled by trip count (yet possibly unknown). Second there is +// no any value in doing that because that doesn't give any additional +// information regarding distribution of probabilities inside the loop. +// Exception is loop 'enter' and 'exit' edges that are handled in a special way +// at calcEstimatedHeuristics. +// +// In addition, \p WorkList is populated with basic blocks if at leas one +// successor has updated estimated weight. +void BranchProbabilityInfo::propagateEstimatedBlockWeight( + const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, + uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList, + SmallVectorImpl<LoopBlock> &LoopWorkList) { + const BasicBlock *BB = LoopBB.getBlock(); + const auto *DTStartNode = DT->getNode(BB); + const auto *PDTStartNode = PDT->getNode(BB); + + // TODO: Consider propagating weight down the domination line as well. + for (const auto *DTNode = DTStartNode; DTNode != nullptr; + DTNode = DTNode->getIDom()) { + auto *DomBB = DTNode->getBlock(); + // Consider blocks which lie on one 'line'. + if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB))) + // If BB doesn't post dominate DomBB it will not post dominate dominators + // of DomBB as well. + break; + + LoopBlock DomLoopBB = getLoopBlock(DomBB); + const LoopEdge Edge{DomLoopBB, LoopBB}; + // Don't propagate weight to blocks belonging to different loops. + if (!isLoopEnteringExitingEdge(Edge)) { + if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList, + LoopWorkList)) + // If DomBB has weight set then all it's predecessors are already + // processed (since we propagate weight up to the top of IR each time). + break; + } else if (isLoopExitingEdge(Edge)) { + LoopWorkList.push_back(DomLoopBB); } } -} - -Optional<uint32_t> BranchProbabilityInfo::getInitialEstimatedBlockWeight( - const BasicBlock *BB) { - // Returns true if \p BB has call marked with "NoReturn" attribute. - auto hasNoReturn = [&](const BasicBlock *BB) { - for (const auto &I : reverse(*BB)) - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - if (CI->hasFnAttr(Attribute::NoReturn)) - return true; - +} + +Optional<uint32_t> BranchProbabilityInfo::getInitialEstimatedBlockWeight( + const BasicBlock *BB) { + // Returns true if \p BB has call marked with "NoReturn" attribute. + auto hasNoReturn = [&](const BasicBlock *BB) { + for (const auto &I : reverse(*BB)) + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->hasFnAttr(Attribute::NoReturn)) + return true; + return false; - }; - - // Important note regarding the order of checks. They are ordered by weight - // from lowest to highest. Doing that allows to avoid "unstable" results - // when several conditions heuristics can be applied simultaneously. - if (isa<UnreachableInst>(BB->getTerminator()) || - // If this block is terminated by a call to - // @llvm.experimental.deoptimize then treat it like an unreachable - // since it is expected to practically never execute. - // TODO: Should we actually treat as never returning call? - BB->getTerminatingDeoptimizeCall()) - return hasNoReturn(BB) - ? static_cast<uint32_t>(BlockExecWeight::NORETURN) - : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE); - - // Check if the block is 'unwind' handler of some invoke instruction. - for (const auto *Pred : predecessors(BB)) - if (Pred) - if (const auto *II = dyn_cast<InvokeInst>(Pred->getTerminator())) - if (II->getUnwindDest() == BB) - return static_cast<uint32_t>(BlockExecWeight::UNWIND); - - // Check if the block contains 'cold' call. - for (const auto &I : *BB) - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - if (CI->hasFnAttr(Attribute::Cold)) - return static_cast<uint32_t>(BlockExecWeight::COLD); - - return None; -} - -// Does RPO traversal over all blocks in \p F and assigns weights to -// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its -// best to propagate the weight to up/down the IR. -void BranchProbabilityInfo::computeEestimateBlockWeight( - const Function &F, DominatorTree *DT, PostDominatorTree *PDT) { - SmallVector<BasicBlock *, 8> BlockWorkList; - SmallVector<LoopBlock, 8> LoopWorkList; - - // By doing RPO we make sure that all predecessors already have weights - // calculated before visiting theirs successors. - ReversePostOrderTraversal<const Function *> RPOT(&F); - for (const auto *BB : RPOT) - if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) - // If we were able to find estimated weight for the block set it to this - // block and propagate up the IR. - propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, - BBWeight.getValue(), BlockWorkList, - LoopWorkList); - - // BlockWorklist/LoopWorkList contains blocks/loops with at least one - // successor/exit having estimated weight. Try to propagate weight to such - // blocks/loops from successors/exits. - // Process loops and blocks. Order is not important. - do { - while (!LoopWorkList.empty()) { - const LoopBlock LoopBB = LoopWorkList.pop_back_val(); - - if (EstimatedLoopWeight.count(LoopBB.getLoopData())) - continue; - - SmallVector<BasicBlock *, 4> Exits; - getLoopExitBlocks(LoopBB, Exits); - auto LoopWeight = getMaxEstimatedEdgeWeight( - LoopBB, make_range(Exits.begin(), Exits.end())); - - if (LoopWeight) { - // If we never exit the loop then we can enter it once at maximum. - if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) - LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); - - EstimatedLoopWeight.insert( - {LoopBB.getLoopData(), LoopWeight.getValue()}); - // Add all blocks entering the loop into working list. - getLoopEnterBlocks(LoopBB, BlockWorkList); - } - } - - while (!BlockWorkList.empty()) { - // We can reach here only if BlockWorkList is not empty. - const BasicBlock *BB = BlockWorkList.pop_back_val(); - if (EstimatedBlockWeight.count(BB)) - continue; - - // We take maximum over all weights of successors. In other words we take - // weight of "hot" path. In theory we can probably find a better function - // which gives higher accuracy results (comparing to "maximum") but I - // can't - // think of any right now. And I doubt it will make any difference in - // practice. - const LoopBlock LoopBB = getLoopBlock(BB); - auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB)); - - if (MaxWeight) - propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(), - BlockWorkList, LoopWorkList); - } - } while (!BlockWorkList.empty() || !LoopWorkList.empty()); -} - -// Calculate edge probabilities based on block's estimated weight. -// Note that gathered weights were not scaled for loops. Thus edges entering -// and exiting loops requires special processing. -bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { - assert(BB->getTerminator()->getNumSuccessors() > 1 && - "expected more than one successor!"); - - const LoopBlock LoopBB = getLoopBlock(BB); - - SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks; - uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; - if (LoopBB.getLoop()) - computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks); - - // Changed to 'true' if at least one successor has estimated weight. - bool FoundEstimatedWeight = false; - SmallVector<uint32_t, 4> SuccWeights; - uint64_t TotalWeight = 0; - // Go over all successors of BB and put their weights into SuccWeights. - for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - const BasicBlock *SuccBB = *I; - Optional<uint32_t> Weight; - const LoopBlock SuccLoopBB = getLoopBlock(SuccBB); - const LoopEdge Edge{LoopBB, SuccLoopBB}; - - Weight = getEstimatedEdgeWeight(Edge); - - if (isLoopExitingEdge(Edge) && - // Avoid adjustment of ZERO weight since it should remain unchanged. - Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { - // Scale down loop exiting weight by trip count. - Weight = std::max( - static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), - Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / - TC); - } - bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB); - if (IsUnlikelyEdge && - // Avoid adjustment of ZERO weight since it should remain unchanged. - Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { - // 'Unlikely' blocks have twice lower weight. - Weight = std::max( - static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), - Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / - 2); - } - - if (Weight) - FoundEstimatedWeight = true; - - auto WeightVal = - Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)); - TotalWeight += WeightVal; - SuccWeights.push_back(WeightVal); + }; + + // Important note regarding the order of checks. They are ordered by weight + // from lowest to highest. Doing that allows to avoid "unstable" results + // when several conditions heuristics can be applied simultaneously. + if (isa<UnreachableInst>(BB->getTerminator()) || + // If this block is terminated by a call to + // @llvm.experimental.deoptimize then treat it like an unreachable + // since it is expected to practically never execute. + // TODO: Should we actually treat as never returning call? + BB->getTerminatingDeoptimizeCall()) + return hasNoReturn(BB) + ? static_cast<uint32_t>(BlockExecWeight::NORETURN) + : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE); + + // Check if the block is 'unwind' handler of some invoke instruction. + for (const auto *Pred : predecessors(BB)) + if (Pred) + if (const auto *II = dyn_cast<InvokeInst>(Pred->getTerminator())) + if (II->getUnwindDest() == BB) + return static_cast<uint32_t>(BlockExecWeight::UNWIND); + + // Check if the block contains 'cold' call. + for (const auto &I : *BB) + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->hasFnAttr(Attribute::Cold)) + return static_cast<uint32_t>(BlockExecWeight::COLD); + + return None; +} + +// Does RPO traversal over all blocks in \p F and assigns weights to +// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its +// best to propagate the weight to up/down the IR. +void BranchProbabilityInfo::computeEestimateBlockWeight( + const Function &F, DominatorTree *DT, PostDominatorTree *PDT) { + SmallVector<BasicBlock *, 8> BlockWorkList; + SmallVector<LoopBlock, 8> LoopWorkList; + + // By doing RPO we make sure that all predecessors already have weights + // calculated before visiting theirs successors. + ReversePostOrderTraversal<const Function *> RPOT(&F); + for (const auto *BB : RPOT) + if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) + // If we were able to find estimated weight for the block set it to this + // block and propagate up the IR. + propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, + BBWeight.getValue(), BlockWorkList, + LoopWorkList); + + // BlockWorklist/LoopWorkList contains blocks/loops with at least one + // successor/exit having estimated weight. Try to propagate weight to such + // blocks/loops from successors/exits. + // Process loops and blocks. Order is not important. + do { + while (!LoopWorkList.empty()) { + const LoopBlock LoopBB = LoopWorkList.pop_back_val(); + + if (EstimatedLoopWeight.count(LoopBB.getLoopData())) + continue; + + SmallVector<BasicBlock *, 4> Exits; + getLoopExitBlocks(LoopBB, Exits); + auto LoopWeight = getMaxEstimatedEdgeWeight( + LoopBB, make_range(Exits.begin(), Exits.end())); + + if (LoopWeight) { + // If we never exit the loop then we can enter it once at maximum. + if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) + LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); + + EstimatedLoopWeight.insert( + {LoopBB.getLoopData(), LoopWeight.getValue()}); + // Add all blocks entering the loop into working list. + getLoopEnterBlocks(LoopBB, BlockWorkList); + } + } + + while (!BlockWorkList.empty()) { + // We can reach here only if BlockWorkList is not empty. + const BasicBlock *BB = BlockWorkList.pop_back_val(); + if (EstimatedBlockWeight.count(BB)) + continue; + + // We take maximum over all weights of successors. In other words we take + // weight of "hot" path. In theory we can probably find a better function + // which gives higher accuracy results (comparing to "maximum") but I + // can't + // think of any right now. And I doubt it will make any difference in + // practice. + const LoopBlock LoopBB = getLoopBlock(BB); + auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB)); + + if (MaxWeight) + propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(), + BlockWorkList, LoopWorkList); + } + } while (!BlockWorkList.empty() || !LoopWorkList.empty()); +} + +// Calculate edge probabilities based on block's estimated weight. +// Note that gathered weights were not scaled for loops. Thus edges entering +// and exiting loops requires special processing. +bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { + assert(BB->getTerminator()->getNumSuccessors() > 1 && + "expected more than one successor!"); + + const LoopBlock LoopBB = getLoopBlock(BB); + + SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks; + uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; + if (LoopBB.getLoop()) + computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks); + + // Changed to 'true' if at least one successor has estimated weight. + bool FoundEstimatedWeight = false; + SmallVector<uint32_t, 4> SuccWeights; + uint64_t TotalWeight = 0; + // Go over all successors of BB and put their weights into SuccWeights. + for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + const BasicBlock *SuccBB = *I; + Optional<uint32_t> Weight; + const LoopBlock SuccLoopBB = getLoopBlock(SuccBB); + const LoopEdge Edge{LoopBB, SuccLoopBB}; + + Weight = getEstimatedEdgeWeight(Edge); + + if (isLoopExitingEdge(Edge) && + // Avoid adjustment of ZERO weight since it should remain unchanged. + Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { + // Scale down loop exiting weight by trip count. + Weight = std::max( + static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), + Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / + TC); + } + bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB); + if (IsUnlikelyEdge && + // Avoid adjustment of ZERO weight since it should remain unchanged. + Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { + // 'Unlikely' blocks have twice lower weight. + Weight = std::max( + static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), + Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / + 2); + } + + if (Weight) + FoundEstimatedWeight = true; + + auto WeightVal = + Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)); + TotalWeight += WeightVal; + SuccWeights.push_back(WeightVal); } - // If non of blocks have estimated weight bail out. - // If TotalWeight is 0 that means weight of each successor is 0 as well and - // equally likely. Bail out early to not deal with devision by zero. - if (!FoundEstimatedWeight || TotalWeight == 0) - return false; - - assert(SuccWeights.size() == succ_size(BB) && "Missed successor?"); - const unsigned SuccCount = SuccWeights.size(); - - // If the sum of weights does not fit in 32 bits, scale every weight down - // accordingly. - if (TotalWeight > UINT32_MAX) { - uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; - TotalWeight = 0; - for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { - SuccWeights[Idx] /= ScalingFactor; - if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO)) - SuccWeights[Idx] = - static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); - TotalWeight += SuccWeights[Idx]; - } - assert(TotalWeight <= UINT32_MAX && "Total weight overflows"); + // If non of blocks have estimated weight bail out. + // If TotalWeight is 0 that means weight of each successor is 0 as well and + // equally likely. Bail out early to not deal with devision by zero. + if (!FoundEstimatedWeight || TotalWeight == 0) + return false; + + assert(SuccWeights.size() == succ_size(BB) && "Missed successor?"); + const unsigned SuccCount = SuccWeights.size(); + + // If the sum of weights does not fit in 32 bits, scale every weight down + // accordingly. + if (TotalWeight > UINT32_MAX) { + uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; + TotalWeight = 0; + for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { + SuccWeights[Idx] /= ScalingFactor; + if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO)) + SuccWeights[Idx] = + static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); + TotalWeight += SuccWeights[Idx]; + } + assert(TotalWeight <= UINT32_MAX && "Total weight overflows"); } - // Finally set probabilities to edges according to estimated block weights. - SmallVector<BranchProbability, 4> EdgeProbabilities( - SuccCount, BranchProbability::getUnknown()); - - for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { - EdgeProbabilities[Idx] = - BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); + // Finally set probabilities to edges according to estimated block weights. + SmallVector<BranchProbability, 4> EdgeProbabilities( + SuccCount, BranchProbability::getUnknown()); + + for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { + EdgeProbabilities[Idx] = + BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); } setEdgeProbability(BB, EdgeProbabilities); return true; @@ -940,7 +940,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, // we don't have information about probabilities. if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0))) if (LHS->getOpcode() == Instruction::And) - if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1))) + if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1))) if (AndRHS->getValue().isPowerOf2()) return false; @@ -956,8 +956,8 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, Func == LibFunc_strcmp || Func == LibFunc_strncasecmp || Func == LibFunc_strncmp || - Func == LibFunc_memcmp || - Func == LibFunc_bcmp) { + Func == LibFunc_memcmp || + Func == LibFunc_bcmp) { // strcmp and similar functions return zero, negative, or positive, if the // first string is equal, less, or greater than the second. We consider it // likely that the strings are not equal, so a comparison with zero is @@ -1114,7 +1114,7 @@ BranchProbabilityInfo::getHotSucc(const BasicBlock *BB) const { auto MaxProb = BranchProbability::getZero(); const BasicBlock *MaxSucc = nullptr; - for (const auto *Succ : successors(BB)) { + for (const auto *Succ : successors(BB)) { auto Prob = getEdgeProbability(BB, Succ); if (Prob > MaxProb) { MaxProb = Prob; @@ -1137,10 +1137,10 @@ BranchProbability BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const { auto I = Probs.find(std::make_pair(Src, IndexInSuccessors)); - assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) == - (Probs.end() == I) && - "Probability for I-th successor must always be defined along with the " - "probability for the first successor"); + assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) == + (Probs.end() == I) && + "Probability for I-th successor must always be defined along with the " + "probability for the first successor"); if (I != Probs.end()) return I->second; @@ -1159,32 +1159,32 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, BranchProbability BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { - if (!Probs.count(std::make_pair(Src, 0))) - return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src)); - + if (!Probs.count(std::make_pair(Src, 0))) + return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src)); + auto Prob = BranchProbability::getZero(); for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) - if (*I == Dst) - Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second; + if (*I == Dst) + Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second; - return Prob; + return Prob; } /// Set the edge probability for all edges at once. void BranchProbabilityInfo::setEdgeProbability( const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) { assert(Src->getTerminator()->getNumSuccessors() == Probs.size()); - eraseBlock(Src); // Erase stale data if any. + eraseBlock(Src); // Erase stale data if any. if (Probs.size() == 0) return; // Nothing to set. - Handles.insert(BasicBlockCallbackVH(Src, this)); + Handles.insert(BasicBlockCallbackVH(Src, this)); uint64_t TotalNumerator = 0; for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) { - this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx]; - LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx - << " successor probability to " << Probs[SuccIdx] - << "\n"); + this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx]; + LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx + << " successor probability to " << Probs[SuccIdx] + << "\n"); TotalNumerator += Probs[SuccIdx].getNumerator(); } @@ -1197,25 +1197,25 @@ void BranchProbabilityInfo::setEdgeProbability( assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size()); } -void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src, - BasicBlock *Dst) { - eraseBlock(Dst); // Erase stale data if any. - unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors(); - assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors()); - if (NumSuccessors == 0) - return; // Nothing to set. - if (this->Probs.find(std::make_pair(Src, 0)) == this->Probs.end()) - return; // No probability is set for edges from Src. Keep the same for Dst. - - Handles.insert(BasicBlockCallbackVH(Dst, this)); - for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) { - auto Prob = this->Probs[std::make_pair(Src, SuccIdx)]; - this->Probs[std::make_pair(Dst, SuccIdx)] = Prob; - LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx - << " successor probability to " << Prob << "\n"); - } -} - +void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src, + BasicBlock *Dst) { + eraseBlock(Dst); // Erase stale data if any. + unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors(); + assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors()); + if (NumSuccessors == 0) + return; // Nothing to set. + if (this->Probs.find(std::make_pair(Src, 0)) == this->Probs.end()) + return; // No probability is set for edges from Src. Keep the same for Dst. + + Handles.insert(BasicBlockCallbackVH(Dst, this)); + for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) { + auto Prob = this->Probs[std::make_pair(Src, SuccIdx)]; + this->Probs[std::make_pair(Dst, SuccIdx)] = Prob; + LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx + << " successor probability to " << Prob << "\n"); + } +} + raw_ostream & BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, @@ -1229,55 +1229,55 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, } void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { - LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n"); - - // Note that we cannot use successors of BB because the terminator of BB may - // have changed when eraseBlock is called as a BasicBlockCallbackVH callback. - // Instead we remove prob data for the block by iterating successors by their - // indices from 0 till the last which exists. There could not be prob data for - // a pair (BB, N) if there is no data for (BB, N-1) because the data is always - // set for all successors from 0 to M at once by the method - // setEdgeProbability(). - Handles.erase(BasicBlockCallbackVH(BB, this)); - for (unsigned I = 0;; ++I) { - auto MapI = Probs.find(std::make_pair(BB, I)); - if (MapI == Probs.end()) { - assert(Probs.count(std::make_pair(BB, I + 1)) == 0 && - "Must be no more successors"); - return; - } - Probs.erase(MapI); + LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n"); + + // Note that we cannot use successors of BB because the terminator of BB may + // have changed when eraseBlock is called as a BasicBlockCallbackVH callback. + // Instead we remove prob data for the block by iterating successors by their + // indices from 0 till the last which exists. There could not be prob data for + // a pair (BB, N) if there is no data for (BB, N-1) because the data is always + // set for all successors from 0 to M at once by the method + // setEdgeProbability(). + Handles.erase(BasicBlockCallbackVH(BB, this)); + for (unsigned I = 0;; ++I) { + auto MapI = Probs.find(std::make_pair(BB, I)); + if (MapI == Probs.end()) { + assert(Probs.count(std::make_pair(BB, I + 1)) == 0 && + "Must be no more successors"); + return; + } + Probs.erase(MapI); } } -void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, +void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, const TargetLibraryInfo *TLI, - DominatorTree *DT, + DominatorTree *DT, PostDominatorTree *PDT) { LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. - LI = &LoopI; + LI = &LoopI; - SccI = std::make_unique<SccInfo>(F); + SccI = std::make_unique<SccInfo>(F); - assert(EstimatedBlockWeight.empty()); - assert(EstimatedLoopWeight.empty()); + assert(EstimatedBlockWeight.empty()); + assert(EstimatedLoopWeight.empty()); - std::unique_ptr<DominatorTree> DTPtr; + std::unique_ptr<DominatorTree> DTPtr; std::unique_ptr<PostDominatorTree> PDTPtr; - if (!DT) { - DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F)); - DT = DTPtr.get(); - } - + if (!DT) { + DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F)); + DT = DTPtr.get(); + } + if (!PDT) { PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F)); PDT = PDTPtr.get(); } - computeEestimateBlockWeight(F, DT, PDT); + computeEestimateBlockWeight(F, DT, PDT); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. @@ -1289,7 +1289,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, continue; if (calcMetadataWeights(BB)) continue; - if (calcEstimatedHeuristics(BB)) + if (calcEstimatedHeuristics(BB)) continue; if (calcPointerHeuristics(BB)) continue; @@ -1299,9 +1299,9 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, continue; } - EstimatedLoopWeight.clear(); - EstimatedBlockWeight.clear(); - SccI.reset(); + EstimatedLoopWeight.clear(); + EstimatedBlockWeight.clear(); + SccI.reset(); if (PrintBranchProb && (PrintBranchProbFuncName.empty() || @@ -1318,7 +1318,7 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<PostDominatorTreeWrapperPass>(); AU.setPreservesAll(); } @@ -1327,10 +1327,10 @@ bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); PostDominatorTree &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); - BPI.calculate(F, LI, &TLI, &DT, &PDT); + BPI.calculate(F, LI, &TLI, &DT, &PDT); return false; } @@ -1347,7 +1347,7 @@ BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; BPI.calculate(F, AM.getResult<LoopAnalysis>(F), &AM.getResult<TargetLibraryAnalysis>(F), - &AM.getResult<DominatorTreeAnalysis>(F), + &AM.getResult<DominatorTreeAnalysis>(F), &AM.getResult<PostDominatorTreeAnalysis>(F)); return BPI; } diff --git a/contrib/libs/llvm12/lib/Analysis/CFG.cpp b/contrib/libs/llvm12/lib/Analysis/CFG.cpp index 33602ed716..9ddc7e9d3d 100644 --- a/contrib/libs/llvm12/lib/Analysis/CFG.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CFG.cpp @@ -14,18 +14,18 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; -// The max number of basic blocks explored during reachability analysis between -// two basic blocks. This is kept reasonably small to limit compile time when -// repeatedly used by clients of this analysis (such as captureTracking). -static cl::opt<unsigned> DefaultMaxBBsToExplore( - "dom-tree-reachability-max-bbs-to-explore", cl::Hidden, - cl::desc("Max number of BBs to explore for reachability analysis"), - cl::init(32)); - +// The max number of basic blocks explored during reachability analysis between +// two basic blocks. This is kept reasonably small to limit compile time when +// repeatedly used by clients of this analysis (such as captureTracking). +static cl::opt<unsigned> DefaultMaxBBsToExplore( + "dom-tree-reachability-max-bbs-to-explore", cl::Hidden, + cl::desc("Max number of BBs to explore for reachability analysis"), + cl::init(32)); + /// FindFunctionBackedges - Analyze the specified function to find all of the /// loop backedges in the function and return them. This is a relatively cheap /// (compared to computing dominators and loop info) analysis. @@ -103,7 +103,7 @@ bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest, assert(TI->isTerminator() && "Must be a terminator to have successors!"); if (TI->getNumSuccessors() == 1) return false; - assert(is_contained(predecessors(Dest), TI->getParent()) && + assert(is_contained(predecessors(Dest), TI->getParent()) && "No edge between TI's block and Dest."); const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); @@ -161,7 +161,7 @@ bool llvm::isPotentiallyReachableFromMany( const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr; - unsigned Limit = DefaultMaxBBsToExplore; + unsigned Limit = DefaultMaxBBsToExplore; SmallPtrSet<const BasicBlock*, 32> Visited; do { BasicBlock *BB = Worklist.pop_back_val(); diff --git a/contrib/libs/llvm12/lib/Analysis/CFGPrinter.cpp b/contrib/libs/llvm12/lib/Analysis/CFGPrinter.cpp index 33b5a46032..735a345f98 100644 --- a/contrib/libs/llvm12/lib/Analysis/CFGPrinter.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CFGPrinter.cpp @@ -272,17 +272,17 @@ FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass() { void DOTGraphTraits<DOTFuncInfo *>::computeHiddenNodes(const Function *F) { auto evaluateBB = [&](const BasicBlock *Node) { - if (succ_empty(Node)) { + if (succ_empty(Node)) { const Instruction *TI = Node->getTerminator(); isHiddenBasicBlock[Node] = (HideUnreachablePaths && isa<UnreachableInst>(TI)) || (HideDeoptimizePaths && Node->getTerminatingDeoptimizeCall()); return; } - isHiddenBasicBlock[Node] = - llvm::all_of(successors(Node), [this](const BasicBlock *BB) { - return isHiddenBasicBlock[BB]; - }); + isHiddenBasicBlock[Node] = + llvm::all_of(successors(Node), [this](const BasicBlock *BB) { + return isHiddenBasicBlock[BB]; + }); }; /// The post order traversal iteration is done to know the status of /// isHiddenBasicBlock for all the successors on the current BB. @@ -290,8 +290,8 @@ void DOTGraphTraits<DOTFuncInfo *>::computeHiddenNodes(const Function *F) { evaluateBB); } -bool DOTGraphTraits<DOTFuncInfo *>::isNodeHidden(const BasicBlock *Node, - const DOTFuncInfo *CFGInfo) { +bool DOTGraphTraits<DOTFuncInfo *>::isNodeHidden(const BasicBlock *Node, + const DOTFuncInfo *CFGInfo) { // If both restricting flags are false, all nodes are displayed. if (!HideUnreachablePaths && !HideDeoptimizePaths) return false; diff --git a/contrib/libs/llvm12/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/CFLAndersAliasAnalysis.cpp index 2be23a56cc..9cc8c52c30 100644 --- a/contrib/libs/llvm12/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -559,7 +559,7 @@ bool CFLAndersAAResult::FunctionInfo::mayAlias( if (RangePair.first != RangePair.second) { // Be conservative about unknown sizes - if (!MaybeLHSSize.hasValue() || !MaybeRHSSize.hasValue()) + if (!MaybeLHSSize.hasValue() || !MaybeRHSSize.hasValue()) return true; const uint64_t LHSSize = MaybeLHSSize.getValue(); diff --git a/contrib/libs/llvm12/lib/Analysis/CGSCCPassManager.cpp b/contrib/libs/llvm12/lib/Analysis/CGSCCPassManager.cpp index 3230e9036b..6f1c0be4c0 100644 --- a/contrib/libs/llvm12/lib/Analysis/CGSCCPassManager.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CGSCCPassManager.cpp @@ -20,12 +20,12 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PassManagerImpl.h" -#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> @@ -39,11 +39,11 @@ using namespace llvm; // template typedefs. namespace llvm { -static cl::opt<bool> AbortOnMaxDevirtIterationsReached( - "abort-on-max-devirt-iterations-reached", - cl::desc("Abort when the max iterations for devirtualization CGSCC repeat " - "pass is reached")); - +static cl::opt<bool> AbortOnMaxDevirtIterationsReached( + "abort-on-max-devirt-iterations-reached", + cl::desc("Abort when the max iterations for devirtualization CGSCC repeat " + "pass is reached")); + // Explicit instantiations for the core proxy templates. template class AllAnalysesOn<LazyCallGraph::SCC>; template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; @@ -93,9 +93,9 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, } if (UR.InvalidatedSCCs.count(C)) - PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA); + PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA); else - PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA); + PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA); // Update the SCC if necessary. C = UR.UpdatedC ? UR.UpdatedC : C; @@ -148,452 +148,452 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, return PA; } -PreservedAnalyses -ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { - // Setup the CGSCC analysis manager from its proxy. - CGSCCAnalysisManager &CGAM = - AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); - - // Get the call graph for this module. - LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); - - // Get Function analysis manager from its proxy. - FunctionAnalysisManager &FAM = - AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager(); - - // We keep worklists to allow us to push more work onto the pass manager as - // the passes are run. - SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist; - SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist; - - // Keep sets for invalidated SCCs and RefSCCs that should be skipped when - // iterating off the worklists. - SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet; - SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet; - - SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> - InlinedInternalEdges; - - CGSCCUpdateResult UR = { - RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet, - nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges, - {}}; - - // Request PassInstrumentation from analysis manager, will use it to run - // instrumenting callbacks for the passes later. - PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M); - - PreservedAnalyses PA = PreservedAnalyses::all(); - CG.buildRefSCCs(); - for (auto RCI = CG.postorder_ref_scc_begin(), - RCE = CG.postorder_ref_scc_end(); - RCI != RCE;) { - assert(RCWorklist.empty() && - "Should always start with an empty RefSCC worklist"); - // The postorder_ref_sccs range we are walking is lazily constructed, so - // we only push the first one onto the worklist. The worklist allows us - // to capture *new* RefSCCs created during transformations. - // - // We really want to form RefSCCs lazily because that makes them cheaper - // to update as the program is simplified and allows us to have greater - // cache locality as forming a RefSCC touches all the parts of all the - // functions within that RefSCC. - // - // We also eagerly increment the iterator to the next position because - // the CGSCC passes below may delete the current RefSCC. - RCWorklist.insert(&*RCI++); - - do { - LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); - if (InvalidRefSCCSet.count(RC)) { - LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n"); - continue; - } - - assert(CWorklist.empty() && - "Should always start with an empty SCC worklist"); - - LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC - << "\n"); - - // The top of the worklist may *also* be the same SCC we just ran over - // (and invalidated for). Keep track of that last SCC we processed due - // to SCC update to avoid redundant processing when an SCC is both just - // updated itself and at the top of the worklist. - LazyCallGraph::SCC *LastUpdatedC = nullptr; - - // Push the initial SCCs in reverse post-order as we'll pop off the - // back and so see this in post-order. - for (LazyCallGraph::SCC &C : llvm::reverse(*RC)) - CWorklist.insert(&C); - - do { - LazyCallGraph::SCC *C = CWorklist.pop_back_val(); - // Due to call graph mutations, we may have invalid SCCs or SCCs from - // other RefSCCs in the worklist. The invalid ones are dead and the - // other RefSCCs should be queued above, so we just need to skip both - // scenarios here. - if (InvalidSCCSet.count(C)) { - LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n"); - continue; - } - if (LastUpdatedC == C) { - LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n"); - continue; - } - if (&C->getOuterRefSCC() != RC) { - LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other " - "RefSCC...\n"); - continue; - } - - // Ensure we can proxy analysis updates from the CGSCC analysis manager - // into the the Function analysis manager by getting a proxy here. - // This also needs to update the FunctionAnalysisManager, as this may be - // the first time we see this SCC. - CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM( - FAM); - - // Each time we visit a new SCC pulled off the worklist, - // a transformation of a child SCC may have also modified this parent - // and invalidated analyses. So we invalidate using the update record's - // cross-SCC preserved set. This preserved set is intersected by any - // CGSCC pass that handles invalidation (primarily pass managers) prior - // to marking its SCC as preserved. That lets us track everything that - // might need invalidation across SCCs without excessive invalidations - // on a single SCC. - // - // This essentially allows SCC passes to freely invalidate analyses - // of any ancestor SCC. If this becomes detrimental to successfully - // caching analyses, we could force each SCC pass to manually - // invalidate the analyses for any SCCs other than themselves which - // are mutated. However, that seems to lose the robustness of the - // pass-manager driven invalidation scheme. - CGAM.invalidate(*C, UR.CrossSCCPA); - - do { - // Check that we didn't miss any update scenario. - assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!"); - assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - assert(&C->getOuterRefSCC() == RC && - "Processing an SCC in a different RefSCC!"); - - LastUpdatedC = UR.UpdatedC; - UR.UpdatedRC = nullptr; - UR.UpdatedC = nullptr; - - // Check the PassInstrumentation's BeforePass callbacks before - // running the pass, skip its execution completely if asked to - // (callback returns false). - if (!PI.runBeforePass<LazyCallGraph::SCC>(*Pass, *C)) - continue; - - PreservedAnalyses PassPA; - { - TimeTraceScope TimeScope(Pass->name()); - PassPA = Pass->run(*C, CGAM, CG, UR); - } - - if (UR.InvalidatedSCCs.count(C)) - PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA); - else - PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA); - - // Update the SCC and RefSCC if necessary. - C = UR.UpdatedC ? UR.UpdatedC : C; - RC = UR.UpdatedRC ? UR.UpdatedRC : RC; - - if (UR.UpdatedC) { - // If we're updating the SCC, also update the FAM inside the proxy's - // result. - CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM( - FAM); - } - - // If the CGSCC pass wasn't able to provide a valid updated SCC, - // the current SCC may simply need to be skipped if invalid. - if (UR.InvalidatedSCCs.count(C)) { - LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); - break; - } - // Check that we didn't miss any update scenario. - assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - - // We handle invalidating the CGSCC analysis manager's information - // for the (potentially updated) SCC here. Note that any other SCCs - // whose structure has changed should have been invalidated by - // whatever was updating the call graph. This SCC gets invalidated - // late as it contains the nodes that were actively being - // processed. - CGAM.invalidate(*C, PassPA); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - // Also intersect with the cross-SCC preserved set to capture any - // cross-SCC invalidation. - UR.CrossSCCPA.intersect(PassPA); - PA.intersect(std::move(PassPA)); - - // The pass may have restructured the call graph and refined the - // current SCC and/or RefSCC. We need to update our current SCC and - // RefSCC pointers to follow these. Also, when the current SCC is - // refined, re-run the SCC pass over the newly refined SCC in order - // to observe the most precise SCC model available. This inherently - // cannot cycle excessively as it only happens when we split SCCs - // apart, at most converging on a DAG of single nodes. - // FIXME: If we ever start having RefSCC passes, we'll want to - // iterate there too. - if (UR.UpdatedC) - LLVM_DEBUG(dbgs() - << "Re-running SCC passes after a refinement of the " - "current SCC: " - << *UR.UpdatedC << "\n"); - - // Note that both `C` and `RC` may at this point refer to deleted, - // invalid SCC and RefSCCs respectively. But we will short circuit - // the processing when we check them in the loop above. - } while (UR.UpdatedC); - } while (!CWorklist.empty()); - - // We only need to keep internal inlined edge information within - // a RefSCC, clear it to save on space and let the next time we visit - // any of these functions have a fresh start. - InlinedInternalEdges.clear(); - } while (!RCWorklist.empty()); - } - - // By definition we preserve the call garph, all SCC analyses, and the - // analysis proxies by handling them above and in any nested pass managers. - PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); - PA.preserve<LazyCallGraphAnalysis>(); - PA.preserve<CGSCCAnalysisManagerModuleProxy>(); - PA.preserve<FunctionAnalysisManagerModuleProxy>(); - return PA; -} - -PreservedAnalyses DevirtSCCRepeatedPass::run(LazyCallGraph::SCC &InitialC, - CGSCCAnalysisManager &AM, - LazyCallGraph &CG, - CGSCCUpdateResult &UR) { - PreservedAnalyses PA = PreservedAnalyses::all(); - PassInstrumentation PI = - AM.getResult<PassInstrumentationAnalysis>(InitialC, CG); - - // The SCC may be refined while we are running passes over it, so set up - // a pointer that we can update. - LazyCallGraph::SCC *C = &InitialC; - - // Struct to track the counts of direct and indirect calls in each function - // of the SCC. - struct CallCount { - int Direct; - int Indirect; - }; - - // Put value handles on all of the indirect calls and return the number of - // direct calls for each function in the SCC. - auto ScanSCC = [](LazyCallGraph::SCC &C, - SmallMapVector<Value *, WeakTrackingVH, 16> &CallHandles) { - assert(CallHandles.empty() && "Must start with a clear set of handles."); - - SmallDenseMap<Function *, CallCount> CallCounts; - CallCount CountLocal = {0, 0}; - for (LazyCallGraph::Node &N : C) { - CallCount &Count = - CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) - .first->second; - for (Instruction &I : instructions(N.getFunction())) - if (auto *CB = dyn_cast<CallBase>(&I)) { - if (CB->getCalledFunction()) { - ++Count.Direct; - } else { - ++Count.Indirect; - CallHandles.insert({CB, WeakTrackingVH(CB)}); - } - } - } - - return CallCounts; - }; - - UR.IndirectVHs.clear(); - // Populate the initial call handles and get the initial call counts. - auto CallCounts = ScanSCC(*C, UR.IndirectVHs); - - for (int Iteration = 0;; ++Iteration) { - if (!PI.runBeforePass<LazyCallGraph::SCC>(*Pass, *C)) - continue; - - PreservedAnalyses PassPA = Pass->run(*C, AM, CG, UR); - - if (UR.InvalidatedSCCs.count(C)) - PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA); - else - PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA); - - // If the SCC structure has changed, bail immediately and let the outer - // CGSCC layer handle any iteration to reflect the refined structure. - if (UR.UpdatedC && UR.UpdatedC != C) { - PA.intersect(std::move(PassPA)); - break; - } - - // Check that we didn't miss any update scenario. - assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); - assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - - // Check whether any of the handles were devirtualized. - bool Devirt = llvm::any_of(UR.IndirectVHs, [](auto &P) -> bool { - if (P.second) { - if (CallBase *CB = dyn_cast<CallBase>(P.second)) { - if (CB->getCalledFunction()) { - LLVM_DEBUG(dbgs() << "Found devirtualized call: " << *CB << "\n"); - return true; - } - } - } - return false; - }); - - // Rescan to build up a new set of handles and count how many direct - // calls remain. If we decide to iterate, this also sets up the input to - // the next iteration. - UR.IndirectVHs.clear(); - auto NewCallCounts = ScanSCC(*C, UR.IndirectVHs); - - // If we haven't found an explicit devirtualization already see if we - // have decreased the number of indirect calls and increased the number - // of direct calls for any function in the SCC. This can be fooled by all - // manner of transformations such as DCE and other things, but seems to - // work well in practice. - if (!Devirt) - // Iterate over the keys in NewCallCounts, if Function also exists in - // CallCounts, make the check below. - for (auto &Pair : NewCallCounts) { - auto &CallCountNew = Pair.second; - auto CountIt = CallCounts.find(Pair.first); - if (CountIt != CallCounts.end()) { - const auto &CallCountOld = CountIt->second; - if (CallCountOld.Indirect > CallCountNew.Indirect && - CallCountOld.Direct < CallCountNew.Direct) { - Devirt = true; - break; - } - } - } - - if (!Devirt) { - PA.intersect(std::move(PassPA)); - break; - } - - // Otherwise, if we've already hit our max, we're done. - if (Iteration >= MaxIterations) { - if (AbortOnMaxDevirtIterationsReached) - report_fatal_error("Max devirtualization iterations reached"); - LLVM_DEBUG( - dbgs() << "Found another devirtualization after hitting the max " - "number of repetitions (" - << MaxIterations << ") on SCC: " << *C << "\n"); - PA.intersect(std::move(PassPA)); - break; - } - - LLVM_DEBUG( - dbgs() << "Repeating an SCC pass after finding a devirtualization in: " - << *C << "\n"); - - // Move over the new call counts in preparation for iterating. - CallCounts = std::move(NewCallCounts); - - // Update the analysis manager with each run and intersect the total set - // of preserved analyses so we're ready to iterate. - AM.invalidate(*C, PassPA); - - PA.intersect(std::move(PassPA)); - } - - // Note that we don't add any preserved entries here unlike a more normal - // "pass manager" because we only handle invalidation *between* iterations, - // not after the last iteration. - return PA; -} - -PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C, - CGSCCAnalysisManager &AM, - LazyCallGraph &CG, - CGSCCUpdateResult &UR) { - // Setup the function analysis manager from its proxy. - FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); - - SmallVector<LazyCallGraph::Node *, 4> Nodes; - for (LazyCallGraph::Node &N : C) - Nodes.push_back(&N); - - // The SCC may get split while we are optimizing functions due to deleting - // edges. If this happens, the current SCC can shift, so keep track of - // a pointer we can overwrite. - LazyCallGraph::SCC *CurrentC = &C; - - LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C << "\n"); - - PreservedAnalyses PA = PreservedAnalyses::all(); - for (LazyCallGraph::Node *N : Nodes) { - // Skip nodes from other SCCs. These may have been split out during - // processing. We'll eventually visit those SCCs and pick up the nodes - // there. - if (CG.lookupSCC(*N) != CurrentC) - continue; - - Function &F = N->getFunction(); - - PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F); - if (!PI.runBeforePass<Function>(*Pass, F)) - continue; - - PreservedAnalyses PassPA; - { - TimeTraceScope TimeScope(Pass->name()); - PassPA = Pass->run(F, FAM); - } - - PI.runAfterPass<Function>(*Pass, F, PassPA); - - // We know that the function pass couldn't have invalidated any other - // function's analyses (that's the contract of a function pass), so - // directly handle the function analysis manager's invalidation here. - FAM.invalidate(F, PassPA); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - PA.intersect(std::move(PassPA)); - - // If the call graph hasn't been preserved, update it based on this - // function pass. This may also update the current SCC to point to - // a smaller, more refined SCC. - auto PAC = PA.getChecker<LazyCallGraphAnalysis>(); - if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { - CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N, - AM, UR, FAM); - assert(CG.lookupSCC(*N) == CurrentC && - "Current SCC not updated to the SCC containing the current node!"); - } - } - - // By definition we preserve the proxy. And we preserve all analyses on - // Functions. This precludes *any* invalidation of function analyses by the - // proxy, but that's OK because we've taken care to invalidate analyses in - // the function analysis manager incrementally above. - PA.preserveSet<AllAnalysesOn<Function>>(); - PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); - - // We've also ensured that we updated the call graph along the way. - PA.preserve<LazyCallGraphAnalysis>(); - - return PA; -} - +PreservedAnalyses +ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { + // Setup the CGSCC analysis manager from its proxy. + CGSCCAnalysisManager &CGAM = + AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); + + // Get the call graph for this module. + LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); + + // Get Function analysis manager from its proxy. + FunctionAnalysisManager &FAM = + AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager(); + + // We keep worklists to allow us to push more work onto the pass manager as + // the passes are run. + SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist; + SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist; + + // Keep sets for invalidated SCCs and RefSCCs that should be skipped when + // iterating off the worklists. + SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet; + SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet; + + SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> + InlinedInternalEdges; + + CGSCCUpdateResult UR = { + RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet, + nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges, + {}}; + + // Request PassInstrumentation from analysis manager, will use it to run + // instrumenting callbacks for the passes later. + PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M); + + PreservedAnalyses PA = PreservedAnalyses::all(); + CG.buildRefSCCs(); + for (auto RCI = CG.postorder_ref_scc_begin(), + RCE = CG.postorder_ref_scc_end(); + RCI != RCE;) { + assert(RCWorklist.empty() && + "Should always start with an empty RefSCC worklist"); + // The postorder_ref_sccs range we are walking is lazily constructed, so + // we only push the first one onto the worklist. The worklist allows us + // to capture *new* RefSCCs created during transformations. + // + // We really want to form RefSCCs lazily because that makes them cheaper + // to update as the program is simplified and allows us to have greater + // cache locality as forming a RefSCC touches all the parts of all the + // functions within that RefSCC. + // + // We also eagerly increment the iterator to the next position because + // the CGSCC passes below may delete the current RefSCC. + RCWorklist.insert(&*RCI++); + + do { + LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); + if (InvalidRefSCCSet.count(RC)) { + LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n"); + continue; + } + + assert(CWorklist.empty() && + "Should always start with an empty SCC worklist"); + + LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC + << "\n"); + + // The top of the worklist may *also* be the same SCC we just ran over + // (and invalidated for). Keep track of that last SCC we processed due + // to SCC update to avoid redundant processing when an SCC is both just + // updated itself and at the top of the worklist. + LazyCallGraph::SCC *LastUpdatedC = nullptr; + + // Push the initial SCCs in reverse post-order as we'll pop off the + // back and so see this in post-order. + for (LazyCallGraph::SCC &C : llvm::reverse(*RC)) + CWorklist.insert(&C); + + do { + LazyCallGraph::SCC *C = CWorklist.pop_back_val(); + // Due to call graph mutations, we may have invalid SCCs or SCCs from + // other RefSCCs in the worklist. The invalid ones are dead and the + // other RefSCCs should be queued above, so we just need to skip both + // scenarios here. + if (InvalidSCCSet.count(C)) { + LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n"); + continue; + } + if (LastUpdatedC == C) { + LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n"); + continue; + } + if (&C->getOuterRefSCC() != RC) { + LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other " + "RefSCC...\n"); + continue; + } + + // Ensure we can proxy analysis updates from the CGSCC analysis manager + // into the the Function analysis manager by getting a proxy here. + // This also needs to update the FunctionAnalysisManager, as this may be + // the first time we see this SCC. + CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM( + FAM); + + // Each time we visit a new SCC pulled off the worklist, + // a transformation of a child SCC may have also modified this parent + // and invalidated analyses. So we invalidate using the update record's + // cross-SCC preserved set. This preserved set is intersected by any + // CGSCC pass that handles invalidation (primarily pass managers) prior + // to marking its SCC as preserved. That lets us track everything that + // might need invalidation across SCCs without excessive invalidations + // on a single SCC. + // + // This essentially allows SCC passes to freely invalidate analyses + // of any ancestor SCC. If this becomes detrimental to successfully + // caching analyses, we could force each SCC pass to manually + // invalidate the analyses for any SCCs other than themselves which + // are mutated. However, that seems to lose the robustness of the + // pass-manager driven invalidation scheme. + CGAM.invalidate(*C, UR.CrossSCCPA); + + do { + // Check that we didn't miss any update scenario. + assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + assert(&C->getOuterRefSCC() == RC && + "Processing an SCC in a different RefSCC!"); + + LastUpdatedC = UR.UpdatedC; + UR.UpdatedRC = nullptr; + UR.UpdatedC = nullptr; + + // Check the PassInstrumentation's BeforePass callbacks before + // running the pass, skip its execution completely if asked to + // (callback returns false). + if (!PI.runBeforePass<LazyCallGraph::SCC>(*Pass, *C)) + continue; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass->name()); + PassPA = Pass->run(*C, CGAM, CG, UR); + } + + if (UR.InvalidatedSCCs.count(C)) + PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA); + else + PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA); + + // Update the SCC and RefSCC if necessary. + C = UR.UpdatedC ? UR.UpdatedC : C; + RC = UR.UpdatedRC ? UR.UpdatedRC : RC; + + if (UR.UpdatedC) { + // If we're updating the SCC, also update the FAM inside the proxy's + // result. + CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM( + FAM); + } + + // If the CGSCC pass wasn't able to provide a valid updated SCC, + // the current SCC may simply need to be skipped if invalid. + if (UR.InvalidatedSCCs.count(C)) { + LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); + break; + } + // Check that we didn't miss any update scenario. + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + + // We handle invalidating the CGSCC analysis manager's information + // for the (potentially updated) SCC here. Note that any other SCCs + // whose structure has changed should have been invalidated by + // whatever was updating the call graph. This SCC gets invalidated + // late as it contains the nodes that were actively being + // processed. + CGAM.invalidate(*C, PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + // Also intersect with the cross-SCC preserved set to capture any + // cross-SCC invalidation. + UR.CrossSCCPA.intersect(PassPA); + PA.intersect(std::move(PassPA)); + + // The pass may have restructured the call graph and refined the + // current SCC and/or RefSCC. We need to update our current SCC and + // RefSCC pointers to follow these. Also, when the current SCC is + // refined, re-run the SCC pass over the newly refined SCC in order + // to observe the most precise SCC model available. This inherently + // cannot cycle excessively as it only happens when we split SCCs + // apart, at most converging on a DAG of single nodes. + // FIXME: If we ever start having RefSCC passes, we'll want to + // iterate there too. + if (UR.UpdatedC) + LLVM_DEBUG(dbgs() + << "Re-running SCC passes after a refinement of the " + "current SCC: " + << *UR.UpdatedC << "\n"); + + // Note that both `C` and `RC` may at this point refer to deleted, + // invalid SCC and RefSCCs respectively. But we will short circuit + // the processing when we check them in the loop above. + } while (UR.UpdatedC); + } while (!CWorklist.empty()); + + // We only need to keep internal inlined edge information within + // a RefSCC, clear it to save on space and let the next time we visit + // any of these functions have a fresh start. + InlinedInternalEdges.clear(); + } while (!RCWorklist.empty()); + } + + // By definition we preserve the call garph, all SCC analyses, and the + // analysis proxies by handling them above and in any nested pass managers. + PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); + PA.preserve<LazyCallGraphAnalysis>(); + PA.preserve<CGSCCAnalysisManagerModuleProxy>(); + PA.preserve<FunctionAnalysisManagerModuleProxy>(); + return PA; +} + +PreservedAnalyses DevirtSCCRepeatedPass::run(LazyCallGraph::SCC &InitialC, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + PreservedAnalyses PA = PreservedAnalyses::all(); + PassInstrumentation PI = + AM.getResult<PassInstrumentationAnalysis>(InitialC, CG); + + // The SCC may be refined while we are running passes over it, so set up + // a pointer that we can update. + LazyCallGraph::SCC *C = &InitialC; + + // Struct to track the counts of direct and indirect calls in each function + // of the SCC. + struct CallCount { + int Direct; + int Indirect; + }; + + // Put value handles on all of the indirect calls and return the number of + // direct calls for each function in the SCC. + auto ScanSCC = [](LazyCallGraph::SCC &C, + SmallMapVector<Value *, WeakTrackingVH, 16> &CallHandles) { + assert(CallHandles.empty() && "Must start with a clear set of handles."); + + SmallDenseMap<Function *, CallCount> CallCounts; + CallCount CountLocal = {0, 0}; + for (LazyCallGraph::Node &N : C) { + CallCount &Count = + CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) + .first->second; + for (Instruction &I : instructions(N.getFunction())) + if (auto *CB = dyn_cast<CallBase>(&I)) { + if (CB->getCalledFunction()) { + ++Count.Direct; + } else { + ++Count.Indirect; + CallHandles.insert({CB, WeakTrackingVH(CB)}); + } + } + } + + return CallCounts; + }; + + UR.IndirectVHs.clear(); + // Populate the initial call handles and get the initial call counts. + auto CallCounts = ScanSCC(*C, UR.IndirectVHs); + + for (int Iteration = 0;; ++Iteration) { + if (!PI.runBeforePass<LazyCallGraph::SCC>(*Pass, *C)) + continue; + + PreservedAnalyses PassPA = Pass->run(*C, AM, CG, UR); + + if (UR.InvalidatedSCCs.count(C)) + PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA); + else + PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA); + + // If the SCC structure has changed, bail immediately and let the outer + // CGSCC layer handle any iteration to reflect the refined structure. + if (UR.UpdatedC && UR.UpdatedC != C) { + PA.intersect(std::move(PassPA)); + break; + } + + // Check that we didn't miss any update scenario. + assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + + // Check whether any of the handles were devirtualized. + bool Devirt = llvm::any_of(UR.IndirectVHs, [](auto &P) -> bool { + if (P.second) { + if (CallBase *CB = dyn_cast<CallBase>(P.second)) { + if (CB->getCalledFunction()) { + LLVM_DEBUG(dbgs() << "Found devirtualized call: " << *CB << "\n"); + return true; + } + } + } + return false; + }); + + // Rescan to build up a new set of handles and count how many direct + // calls remain. If we decide to iterate, this also sets up the input to + // the next iteration. + UR.IndirectVHs.clear(); + auto NewCallCounts = ScanSCC(*C, UR.IndirectVHs); + + // If we haven't found an explicit devirtualization already see if we + // have decreased the number of indirect calls and increased the number + // of direct calls for any function in the SCC. This can be fooled by all + // manner of transformations such as DCE and other things, but seems to + // work well in practice. + if (!Devirt) + // Iterate over the keys in NewCallCounts, if Function also exists in + // CallCounts, make the check below. + for (auto &Pair : NewCallCounts) { + auto &CallCountNew = Pair.second; + auto CountIt = CallCounts.find(Pair.first); + if (CountIt != CallCounts.end()) { + const auto &CallCountOld = CountIt->second; + if (CallCountOld.Indirect > CallCountNew.Indirect && + CallCountOld.Direct < CallCountNew.Direct) { + Devirt = true; + break; + } + } + } + + if (!Devirt) { + PA.intersect(std::move(PassPA)); + break; + } + + // Otherwise, if we've already hit our max, we're done. + if (Iteration >= MaxIterations) { + if (AbortOnMaxDevirtIterationsReached) + report_fatal_error("Max devirtualization iterations reached"); + LLVM_DEBUG( + dbgs() << "Found another devirtualization after hitting the max " + "number of repetitions (" + << MaxIterations << ") on SCC: " << *C << "\n"); + PA.intersect(std::move(PassPA)); + break; + } + + LLVM_DEBUG( + dbgs() << "Repeating an SCC pass after finding a devirtualization in: " + << *C << "\n"); + + // Move over the new call counts in preparation for iterating. + CallCounts = std::move(NewCallCounts); + + // Update the analysis manager with each run and intersect the total set + // of preserved analyses so we're ready to iterate. + AM.invalidate(*C, PassPA); + + PA.intersect(std::move(PassPA)); + } + + // Note that we don't add any preserved entries here unlike a more normal + // "pass manager" because we only handle invalidation *between* iterations, + // not after the last iteration. + return PA; +} + +PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + // Setup the function analysis manager from its proxy. + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); + + SmallVector<LazyCallGraph::Node *, 4> Nodes; + for (LazyCallGraph::Node &N : C) + Nodes.push_back(&N); + + // The SCC may get split while we are optimizing functions due to deleting + // edges. If this happens, the current SCC can shift, so keep track of + // a pointer we can overwrite. + LazyCallGraph::SCC *CurrentC = &C; + + LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C << "\n"); + + PreservedAnalyses PA = PreservedAnalyses::all(); + for (LazyCallGraph::Node *N : Nodes) { + // Skip nodes from other SCCs. These may have been split out during + // processing. We'll eventually visit those SCCs and pick up the nodes + // there. + if (CG.lookupSCC(*N) != CurrentC) + continue; + + Function &F = N->getFunction(); + + PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F); + if (!PI.runBeforePass<Function>(*Pass, F)) + continue; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass->name()); + PassPA = Pass->run(F, FAM); + } + + PI.runAfterPass<Function>(*Pass, F, PassPA); + + // We know that the function pass couldn't have invalidated any other + // function's analyses (that's the contract of a function pass), so + // directly handle the function analysis manager's invalidation here. + FAM.invalidate(F, PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + + // If the call graph hasn't been preserved, update it based on this + // function pass. This may also update the current SCC to point to + // a smaller, more refined SCC. + auto PAC = PA.getChecker<LazyCallGraphAnalysis>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { + CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N, + AM, UR, FAM); + assert(CG.lookupSCC(*N) == CurrentC && + "Current SCC not updated to the SCC containing the current node!"); + } + } + + // By definition we preserve the proxy. And we preserve all analyses on + // Functions. This precludes *any* invalidation of function analyses by the + // proxy, but that's OK because we've taken care to invalidate analyses in + // the function analysis manager incrementally above. + PA.preserveSet<AllAnalysesOn<Function>>(); + PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); + + // We've also ensured that we updated the call graph along the way. + PA.preserve<LazyCallGraphAnalysis>(); + + return PA; +} + bool CGSCCAnalysisManagerModuleProxy::Result::invalidate( Module &M, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &Inv) { @@ -833,7 +833,7 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) { using SCC = LazyCallGraph::SCC; - if (NewSCCRange.empty()) + if (NewSCCRange.empty()) return C; // Add the current SCC to the worklist as its shape has changed. @@ -917,61 +917,61 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass( // First walk the function and handle all called functions. We do this first // because if there is a single call edge, whether there are ref edges is // irrelevant. - for (Instruction &I : instructions(F)) { - if (auto *CB = dyn_cast<CallBase>(&I)) { - if (Function *Callee = CB->getCalledFunction()) { + for (Instruction &I : instructions(F)) { + if (auto *CB = dyn_cast<CallBase>(&I)) { + if (Function *Callee = CB->getCalledFunction()) { if (Visited.insert(Callee).second && !Callee->isDeclaration()) { - Node *CalleeN = G.lookup(*Callee); - assert(CalleeN && - "Visited function should already have an associated node"); - Edge *E = N->lookup(*CalleeN); + Node *CalleeN = G.lookup(*Callee); + assert(CalleeN && + "Visited function should already have an associated node"); + Edge *E = N->lookup(*CalleeN); assert((E || !FunctionPass) && "No function transformations should introduce *new* " "call edges! Any new calls should be modeled as " "promoted existing ref edges!"); - bool Inserted = RetainedEdges.insert(CalleeN).second; + bool Inserted = RetainedEdges.insert(CalleeN).second; (void)Inserted; assert(Inserted && "We should never visit a function twice."); if (!E) - NewCallEdges.insert(CalleeN); + NewCallEdges.insert(CalleeN); else if (!E->isCall()) - PromotedRefTargets.insert(CalleeN); + PromotedRefTargets.insert(CalleeN); } - } else { - // We can miss devirtualization if an indirect call is created then - // promoted before updateCGAndAnalysisManagerForPass runs. - auto *Entry = UR.IndirectVHs.find(CB); - if (Entry == UR.IndirectVHs.end()) - UR.IndirectVHs.insert({CB, WeakTrackingVH(CB)}); - else if (!Entry->second) - Entry->second = WeakTrackingVH(CB); - } - } - } + } else { + // We can miss devirtualization if an indirect call is created then + // promoted before updateCGAndAnalysisManagerForPass runs. + auto *Entry = UR.IndirectVHs.find(CB); + if (Entry == UR.IndirectVHs.end()) + UR.IndirectVHs.insert({CB, WeakTrackingVH(CB)}); + else if (!Entry->second) + Entry->second = WeakTrackingVH(CB); + } + } + } // Now walk all references. for (Instruction &I : instructions(F)) for (Value *Op : I.operand_values()) - if (auto *OpC = dyn_cast<Constant>(Op)) - if (Visited.insert(OpC).second) - Worklist.push_back(OpC); + if (auto *OpC = dyn_cast<Constant>(Op)) + if (Visited.insert(OpC).second) + Worklist.push_back(OpC); auto VisitRef = [&](Function &Referee) { - Node *RefereeN = G.lookup(Referee); - assert(RefereeN && - "Visited function should already have an associated node"); - Edge *E = N->lookup(*RefereeN); + Node *RefereeN = G.lookup(Referee); + assert(RefereeN && + "Visited function should already have an associated node"); + Edge *E = N->lookup(*RefereeN); assert((E || !FunctionPass) && "No function transformations should introduce *new* ref " "edges! Any new ref edges would require IPO which " "function passes aren't allowed to do!"); - bool Inserted = RetainedEdges.insert(RefereeN).second; + bool Inserted = RetainedEdges.insert(RefereeN).second; (void)Inserted; assert(Inserted && "We should never visit a function twice."); if (!E) - NewRefEdges.insert(RefereeN); + NewRefEdges.insert(RefereeN); else if (E->isCall()) - DemotedCallTargets.insert(RefereeN); + DemotedCallTargets.insert(RefereeN); }; LazyCallGraph::visitReferences(Worklist, Visited, VisitRef); @@ -994,17 +994,17 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass( // TODO: This only allows trivial edges to be added for now. assert((RC == &TargetRC || RC->isAncestorOf(TargetRC)) && "New call edge is not trivial!"); - // Add a trivial ref edge to be promoted later on alongside - // PromotedRefTargets. - RC->insertTrivialRefEdge(N, *CallTarget); + // Add a trivial ref edge to be promoted later on alongside + // PromotedRefTargets. + RC->insertTrivialRefEdge(N, *CallTarget); } // Include synthetic reference edges to known, defined lib functions. - for (auto *LibFn : G.getLibFunctions()) + for (auto *LibFn : G.getLibFunctions()) // While the list of lib functions doesn't have repeats, don't re-visit // anything handled above. - if (!Visited.count(LibFn)) - VisitRef(*LibFn); + if (!Visited.count(LibFn)) + VisitRef(*LibFn); // First remove all of the edges that are no longer present in this function. // The first step makes these edges uniformly ref edges and accumulates them @@ -1031,20 +1031,20 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass( DeadTargets.push_back(&E.getNode()); } // Remove the easy cases quickly and actually pull them out of our list. - llvm::erase_if(DeadTargets, [&](Node *TargetN) { - SCC &TargetC = *G.lookupSCC(*TargetN); - RefSCC &TargetRC = TargetC.getOuterRefSCC(); + llvm::erase_if(DeadTargets, [&](Node *TargetN) { + SCC &TargetC = *G.lookupSCC(*TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); - // We can't trivially remove internal targets, so skip - // those. - if (&TargetRC == RC) - return false; + // We can't trivially remove internal targets, so skip + // those. + if (&TargetRC == RC) + return false; - RC->removeOutgoingEdge(N, *TargetN); - LLVM_DEBUG(dbgs() << "Deleting outgoing edge from '" << N << "' to '" - << TargetN << "'\n"); - return true; - }); + RC->removeOutgoingEdge(N, *TargetN); + LLVM_DEBUG(dbgs() << "Deleting outgoing edge from '" << N << "' to '" + << TargetN << "'\n"); + return true; + }); // Now do a batch removal of the internal ref edges left. auto NewRefSCCs = RC->removeInternalRefEdge(N, DeadTargets); @@ -1108,11 +1108,11 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass( C, AM, UR); } - // We added a ref edge earlier for new call edges, promote those to call edges - // alongside PromotedRefTargets. - for (Node *E : NewCallEdges) - PromotedRefTargets.insert(E); - + // We added a ref edge earlier for new call edges, promote those to call edges + // alongside PromotedRefTargets. + for (Node *E : NewCallEdges) + PromotedRefTargets.insert(E); + // Now promote ref edges into call edges. for (Node *CallTarget : PromotedRefTargets) { SCC &TargetC = *G.lookupSCC(*CallTarget); diff --git a/contrib/libs/llvm12/lib/Analysis/CallGraph.cpp b/contrib/libs/llvm12/lib/Analysis/CallGraph.cpp index 9b212e564a..931a3ff3ac 100644 --- a/contrib/libs/llvm12/lib/Analysis/CallGraph.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CallGraph.cpp @@ -267,37 +267,37 @@ void CallGraphNode::replaceCallEdge(CallBase &Call, CallBase &NewCall, I->second = NewNode; NewNode->AddRef(); - // Refresh callback references. Do not resize CalledFunctions if the - // number of callbacks is the same for new and old call sites. - SmallVector<CallGraphNode *, 4u> OldCBs; - SmallVector<CallGraphNode *, 4u> NewCBs; - forEachCallbackFunction(Call, [this, &OldCBs](Function *CB) { - OldCBs.push_back(CG->getOrInsertFunction(CB)); + // Refresh callback references. Do not resize CalledFunctions if the + // number of callbacks is the same for new and old call sites. + SmallVector<CallGraphNode *, 4u> OldCBs; + SmallVector<CallGraphNode *, 4u> NewCBs; + forEachCallbackFunction(Call, [this, &OldCBs](Function *CB) { + OldCBs.push_back(CG->getOrInsertFunction(CB)); }); - forEachCallbackFunction(NewCall, [this, &NewCBs](Function *CB) { - NewCBs.push_back(CG->getOrInsertFunction(CB)); + forEachCallbackFunction(NewCall, [this, &NewCBs](Function *CB) { + NewCBs.push_back(CG->getOrInsertFunction(CB)); }); - if (OldCBs.size() == NewCBs.size()) { - for (unsigned N = 0; N < OldCBs.size(); ++N) { - CallGraphNode *OldNode = OldCBs[N]; - CallGraphNode *NewNode = NewCBs[N]; - for (auto J = CalledFunctions.begin();; ++J) { - assert(J != CalledFunctions.end() && - "Cannot find callsite to update!"); - if (!J->first && J->second == OldNode) { - J->second = NewNode; - OldNode->DropRef(); - NewNode->AddRef(); - break; - } - } - } - } else { - for (auto *CGN : OldCBs) - removeOneAbstractEdgeTo(CGN); - for (auto *CGN : NewCBs) - addCalledFunction(nullptr, CGN); - } + if (OldCBs.size() == NewCBs.size()) { + for (unsigned N = 0; N < OldCBs.size(); ++N) { + CallGraphNode *OldNode = OldCBs[N]; + CallGraphNode *NewNode = NewCBs[N]; + for (auto J = CalledFunctions.begin();; ++J) { + assert(J != CalledFunctions.end() && + "Cannot find callsite to update!"); + if (!J->first && J->second == OldNode) { + J->second = NewNode; + OldNode->DropRef(); + NewNode->AddRef(); + break; + } + } + } + } else { + for (auto *CGN : OldCBs) + removeOneAbstractEdgeTo(CGN); + for (auto *CGN : NewCBs) + addCalledFunction(nullptr, CGN); + } return; } } diff --git a/contrib/libs/llvm12/lib/Analysis/CallGraphSCCPass.cpp b/contrib/libs/llvm12/lib/Analysis/CallGraphSCCPass.cpp index 38057d44e2..85339724ed 100644 --- a/contrib/libs/llvm12/lib/Analysis/CallGraphSCCPass.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CallGraphSCCPass.cpp @@ -27,8 +27,8 @@ #include "llvm/IR/Module.h" #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassTimingInfo.h" -#include "llvm/IR/PrintPasses.h" -#include "llvm/IR/StructuralHash.h" +#include "llvm/IR/PrintPasses.h" +#include "llvm/IR/StructuralHash.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -43,8 +43,8 @@ using namespace llvm; #define DEBUG_TYPE "cgscc-passmgr" -cl::opt<unsigned> MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, - cl::init(4)); +cl::opt<unsigned> MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, + cl::init(4)); STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC"); @@ -467,30 +467,30 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, initializeAnalysisImpl(P); -#ifdef EXPENSIVE_CHECKS - uint64_t RefHash = StructuralHash(CG.getModule()); -#endif - +#ifdef EXPENSIVE_CHECKS + uint64_t RefHash = StructuralHash(CG.getModule()); +#endif + // Actually run this pass on the current SCC. - bool LocalChanged = - RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate, DevirtualizedCall); - - Changed |= LocalChanged; - -#ifdef EXPENSIVE_CHECKS - if (!LocalChanged && (RefHash != StructuralHash(CG.getModule()))) { - llvm::errs() << "Pass modifies its input and doesn't report it: " - << P->getPassName() << "\n"; - llvm_unreachable("Pass modifies its input and doesn't report it"); - } -#endif - if (LocalChanged) + bool LocalChanged = + RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate, DevirtualizedCall); + + Changed |= LocalChanged; + +#ifdef EXPENSIVE_CHECKS + if (!LocalChanged && (RefHash != StructuralHash(CG.getModule()))) { + llvm::errs() << "Pass modifies its input and doesn't report it: " + << P->getPassName() << "\n"; + llvm_unreachable("Pass modifies its input and doesn't report it"); + } +#endif + if (LocalChanged) dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); dumpPreservedSet(P); verifyPreservedAnalysis(P); - if (LocalChanged) - removeNotPreservedAnalysis(P); + if (LocalChanged) + removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); removeDeadPasses(P, "", ON_CG_MSG); } @@ -539,12 +539,12 @@ bool CGPassManager::runOnModule(Module &M) { << '\n'); DevirtualizedCall = false; Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall); - } while (Iteration++ < MaxDevirtIterations && DevirtualizedCall); + } while (Iteration++ < MaxDevirtIterations && DevirtualizedCall); if (DevirtualizedCall) LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration - << " times, due to -max-devirt-iterations\n"); + << " times, due to -max-devirt-iterations\n"); MaxSCCIterations.updateMax(Iteration); } diff --git a/contrib/libs/llvm12/lib/Analysis/CallPrinter.cpp b/contrib/libs/llvm12/lib/Analysis/CallPrinter.cpp index 872a91ad7c..c1619eb9ea 100644 --- a/contrib/libs/llvm12/lib/Analysis/CallPrinter.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CallPrinter.cpp @@ -143,8 +143,8 @@ struct DOTGraphTraits<CallGraphDOTInfo *> : public DefaultDOTGraphTraits { std::string(CGInfo->getModule()->getModuleIdentifier()); } - static bool isNodeHidden(const CallGraphNode *Node, - const CallGraphDOTInfo *CGInfo) { + static bool isNodeHidden(const CallGraphNode *Node, + const CallGraphDOTInfo *CGInfo) { if (CallMultiGraph || Node->getFunction()) return false; return true; @@ -196,7 +196,7 @@ struct DOTGraphTraits<CallGraphDOTInfo *> : public DefaultDOTGraphTraits { Function *F = Node->getFunction(); if (F == nullptr) return ""; - std::string attrs; + std::string attrs; if (ShowHeatColors) { uint64_t freq = CGInfo->getFreq(F); std::string color = getHeatColor(freq, CGInfo->getMaxFreq()); diff --git a/contrib/libs/llvm12/lib/Analysis/CaptureTracking.cpp b/contrib/libs/llvm12/lib/Analysis/CaptureTracking.cpp index b2fc6e603f..aff0f8afee 100644 --- a/contrib/libs/llvm12/lib/Analysis/CaptureTracking.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CaptureTracking.cpp @@ -18,7 +18,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ValueTracking.h" @@ -30,13 +30,13 @@ using namespace llvm; -#define DEBUG_TYPE "capture-tracking" - -STATISTIC(NumCaptured, "Number of pointers maybe captured"); -STATISTIC(NumNotCaptured, "Number of pointers not captured"); -STATISTIC(NumCapturedBefore, "Number of pointers maybe captured before"); -STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before"); - +#define DEBUG_TYPE "capture-tracking" + +STATISTIC(NumCaptured, "Number of pointers maybe captured"); +STATISTIC(NumNotCaptured, "Number of pointers not captured"); +STATISTIC(NumCapturedBefore, "Number of pointers maybe captured before"); +STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before"); + /// The default value for MaxUsesToExplore argument. It's relatively small to /// keep the cost of analysis reasonable for clients like BasicAliasAnalysis, /// where the results can't be cached. @@ -202,10 +202,10 @@ bool llvm::PointerMayBeCaptured(const Value *V, SimpleCaptureTracker SCT(ReturnCaptures); PointerMayBeCaptured(V, &SCT, MaxUsesToExplore); - if (SCT.Captured) - ++NumCaptured; - else - ++NumNotCaptured; + if (SCT.Captured) + ++NumCaptured; + else + ++NumNotCaptured; return SCT.Captured; } @@ -234,10 +234,10 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, CapturesBefore CB(ReturnCaptures, I, DT, IncludeI); PointerMayBeCaptured(V, &CB, MaxUsesToExplore); - if (CB.Captured) - ++NumCapturedBefore; - else - ++NumNotCapturedBefore; + if (CB.Captured) + ++NumCapturedBefore; + else + ++NumNotCapturedBefore; return CB.Captured; } @@ -256,20 +256,20 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, for (const Use &U : V->uses()) { // If there are lots of uses, conservatively say that the value // is captured to avoid taking too much compile time. - if (Count++ >= MaxUsesToExplore) { - Tracker->tooManyUses(); - return false; - } + if (Count++ >= MaxUsesToExplore) { + Tracker->tooManyUses(); + return false; + } if (!Visited.insert(&U).second) continue; if (!Tracker->shouldExplore(&U)) continue; Worklist.push_back(&U); } - return true; + return true; }; - if (!AddUses(V)) - return; + if (!AddUses(V)) + return; while (!Worklist.empty()) { const Use *U = Worklist.pop_back_val(); @@ -289,12 +289,12 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // The pointer is not captured if returned pointer is not captured. // NOTE: CaptureTracking users should not assume that only functions // marked with nocapture do not capture. This means that places like - // getUnderlyingObject in ValueTracking or DecomposeGEPExpression + // getUnderlyingObject in ValueTracking or DecomposeGEPExpression // in BasicAA also need to know about this property. if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, true)) { - if (!AddUses(Call)) - return; + if (!AddUses(Call)) + return; break; } @@ -312,11 +312,11 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // that loading a value from a pointer does not cause the pointer to be // captured, even though the loaded value might be the pointer itself // (think of self-referential objects). - if (Call->isDataOperand(U) && - !Call->doesNotCapture(Call->getDataOperandNo(U))) { - // The parameter is not marked 'nocapture' - captured. - if (Tracker->captured(U)) - return; + if (Call->isDataOperand(U) && + !Call->doesNotCapture(Call->getDataOperandNo(U))) { + // The parameter is not marked 'nocapture' - captured. + if (Tracker->captured(U)) + return; } break; } @@ -330,9 +330,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // "va-arg" from a pointer does not cause it to be captured. break; case Instruction::Store: - // Stored the pointer - conservatively assume it may be captured. - // Volatile stores make the address observable. - if (U->getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile()) + // Stored the pointer - conservatively assume it may be captured. + // Volatile stores make the address observable. + if (U->getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile()) if (Tracker->captured(U)) return; break; @@ -343,7 +343,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // but the value being stored is. // Volatile stores make the address observable. auto *ARMWI = cast<AtomicRMWInst>(I); - if (U->getOperandNo() == 1 || ARMWI->isVolatile()) + if (U->getOperandNo() == 1 || ARMWI->isVolatile()) if (Tracker->captured(U)) return; break; @@ -355,7 +355,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // but the value being stored is. // Volatile stores make the address observable. auto *ACXI = cast<AtomicCmpXchgInst>(I); - if (U->getOperandNo() == 1 || U->getOperandNo() == 2 || + if (U->getOperandNo() == 1 || U->getOperandNo() == 2 || ACXI->isVolatile()) if (Tracker->captured(U)) return; @@ -367,18 +367,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, case Instruction::Select: case Instruction::AddrSpaceCast: // The original value is not captured via this if the new value isn't. - if (!AddUses(I)) - return; + if (!AddUses(I)) + return; break; case Instruction::ICmp: { - unsigned Idx = U->getOperandNo(); + unsigned Idx = U->getOperandNo(); unsigned OtherIdx = 1 - Idx; if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) { // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results // with null, for example. if (CPN->getType()->getAddressSpace() == 0) - if (isNoAliasCall(U->get()->stripPointerCasts())) + if (isNoAliasCall(U->get()->stripPointerCasts())) break; if (!I->getFunction()->nullPointerIsDefined()) { auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation(); @@ -411,44 +411,44 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // All uses examined. } - -bool llvm::isNonEscapingLocalObject( - const Value *V, SmallDenseMap<const Value *, bool, 8> *IsCapturedCache) { - SmallDenseMap<const Value *, bool, 8>::iterator CacheIt; - if (IsCapturedCache) { - bool Inserted; - std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false}); - if (!Inserted) - // Found cached result, return it! - return CacheIt->second; - } - - // If this is a local allocation, check to see if it escapes. - if (isa<AllocaInst>(V) || isNoAliasCall(V)) { - // Set StoreCaptures to True so that we can assume in our callers that the - // pointer is not the result of a load instruction. Currently - // PointerMayBeCaptured doesn't have any special analysis for the - // StoreCaptures=false case; if it did, our callers could be refined to be - // more precise. - auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); - if (IsCapturedCache) - CacheIt->second = Ret; - return Ret; - } - - // If this is an argument that corresponds to a byval or noalias argument, - // then it has not escaped before entering the function. Check if it escapes - // inside the function. - if (const Argument *A = dyn_cast<Argument>(V)) - if (A->hasByValAttr() || A->hasNoAliasAttr()) { - // Note even if the argument is marked nocapture, we still need to check - // for copies made inside the function. The nocapture attribute only - // specifies that there are no copies made that outlive the function. - auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); - if (IsCapturedCache) - CacheIt->second = Ret; - return Ret; - } - - return false; -} + +bool llvm::isNonEscapingLocalObject( + const Value *V, SmallDenseMap<const Value *, bool, 8> *IsCapturedCache) { + SmallDenseMap<const Value *, bool, 8>::iterator CacheIt; + if (IsCapturedCache) { + bool Inserted; + std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false}); + if (!Inserted) + // Found cached result, return it! + return CacheIt->second; + } + + // If this is a local allocation, check to see if it escapes. + if (isa<AllocaInst>(V) || isNoAliasCall(V)) { + // Set StoreCaptures to True so that we can assume in our callers that the + // pointer is not the result of a load instruction. Currently + // PointerMayBeCaptured doesn't have any special analysis for the + // StoreCaptures=false case; if it did, our callers could be refined to be + // more precise. + auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + if (IsCapturedCache) + CacheIt->second = Ret; + return Ret; + } + + // If this is an argument that corresponds to a byval or noalias argument, + // then it has not escaped before entering the function. Check if it escapes + // inside the function. + if (const Argument *A = dyn_cast<Argument>(V)) + if (A->hasByValAttr() || A->hasNoAliasAttr()) { + // Note even if the argument is marked nocapture, we still need to check + // for copies made inside the function. The nocapture attribute only + // specifies that there are no copies made that outlive the function. + auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + if (IsCapturedCache) + CacheIt->second = Ret; + return Ret; + } + + return false; +} diff --git a/contrib/libs/llvm12/lib/Analysis/CodeMetrics.cpp b/contrib/libs/llvm12/lib/Analysis/CodeMetrics.cpp index 157811c04e..5ef904e1f1 100644 --- a/contrib/libs/llvm12/lib/Analysis/CodeMetrics.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CodeMetrics.cpp @@ -112,9 +112,9 @@ void CodeMetrics::collectEphemeralValues( /// Fill in the current structure with information gleaned from the specified /// block. -void CodeMetrics::analyzeBasicBlock( - const BasicBlock *BB, const TargetTransformInfo &TTI, - const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) { +void CodeMetrics::analyzeBasicBlock( + const BasicBlock *BB, const TargetTransformInfo &TTI, + const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (const Instruction &I : *BB) { @@ -125,16 +125,16 @@ void CodeMetrics::analyzeBasicBlock( // Special handling for calls. if (const auto *Call = dyn_cast<CallBase>(&I)) { if (const Function *F = Call->getCalledFunction()) { - bool IsLoweredToCall = TTI.isLoweredToCall(F); + bool IsLoweredToCall = TTI.isLoweredToCall(F); // If a function is both internal and has a single use, then it is // extremely likely to get inlined in the future (it was probably // exposed by an interleaved devirtualization pass). - // When preparing for LTO, liberally consider calls as inline - // candidates. - if (!Call->isNoInline() && IsLoweredToCall && - ((F->hasInternalLinkage() && F->hasOneUse()) || PrepareForLTO)) { + // When preparing for LTO, liberally consider calls as inline + // candidates. + if (!Call->isNoInline() && IsLoweredToCall && + ((F->hasInternalLinkage() && F->hasOneUse()) || PrepareForLTO)) { ++NumInlineCandidates; - } + } // If this call is to function itself, then the function is recursive. // Inlining it into other functions is a bad idea, because this is @@ -143,7 +143,7 @@ void CodeMetrics::analyzeBasicBlock( if (F == BB->getParent()) isRecursive = true; - if (IsLoweredToCall) + if (IsLoweredToCall) ++NumCalls; } else { // We don't want inline asm to count as a call - that would prevent loop diff --git a/contrib/libs/llvm12/lib/Analysis/ConstantFolding.cpp b/contrib/libs/llvm12/lib/Analysis/ConstantFolding.cpp index cc1ce4c658..b514387d88 100644 --- a/contrib/libs/llvm12/lib/Analysis/ConstantFolding.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ConstantFolding.cpp @@ -18,7 +18,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -42,8 +42,8 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAMDGPU.h" -#include "llvm/IR/IntrinsicsARM.h" -#include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -105,16 +105,16 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { "Invalid constantexpr bitcast!"); // Catch the obvious splat cases. - if (C->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy()) + if (C->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy()) return Constant::getNullValue(DestTy); - if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy() && + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy() && !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! return Constant::getAllOnesValue(DestTy); if (auto *VTy = dyn_cast<VectorType>(C->getType())) { // Handle a vector->scalar integer/fp cast. if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) { - unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements(); + unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements(); Type *SrcEltTy = VTy->getElementType(); // If the vector is a vector of floating point, convert it to vector of int @@ -157,8 +157,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { return ConstantExpr::getBitCast(C, DestTy); // If the element types match, IR can fold it. - unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements(); - unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements(); + unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements(); + unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements(); if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); @@ -295,11 +295,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { /// If this constant is a constant offset from a global, return the global and /// the constant. Because of constantexprs, this function is recursive. bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - APInt &Offset, const DataLayout &DL, - DSOLocalEquivalent **DSOEquiv) { - if (DSOEquiv) - *DSOEquiv = nullptr; - + APInt &Offset, const DataLayout &DL, + DSOLocalEquivalent **DSOEquiv) { + if (DSOEquiv) + *DSOEquiv = nullptr; + // Trivial case, constant is the global. if ((GV = dyn_cast<GlobalValue>(C))) { unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); @@ -307,15 +307,15 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return true; } - if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) { - if (DSOEquiv) - *DSOEquiv = FoundDSOEquiv; - GV = FoundDSOEquiv->getGlobalValue(); - unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); - Offset = APInt(BitWidth, 0); - return true; - } - + if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) { + if (DSOEquiv) + *DSOEquiv = FoundDSOEquiv; + GV = FoundDSOEquiv->getGlobalValue(); + unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); + Offset = APInt(BitWidth, 0); + return true; + } + // Otherwise, if this isn't a constant expr, bail out. auto *CE = dyn_cast<ConstantExpr>(C); if (!CE) return false; @@ -323,8 +323,8 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || CE->getOpcode() == Instruction::BitCast) - return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL, - DSOEquiv); + return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL, + DSOEquiv); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) auto *GEP = dyn_cast<GEPOperator>(CE); @@ -335,8 +335,8 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt TmpOffset(BitWidth, 0); // If the base isn't a global+constant, we aren't either. - if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL, - DSOEquiv)) + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL, + DSOEquiv)) return false; // Otherwise, add any offset that our operands provide. @@ -358,13 +358,13 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, // Catch the obvious splat cases (since all-zeros can coerce non-integral // pointers legally). - if (C->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy()) + if (C->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy()) return Constant::getNullValue(DestTy); if (C->isAllOnesValue() && (DestTy->isIntegerTy() || DestTy->isFloatingPointTy() || DestTy->isVectorTy()) && - !DestTy->isX86_AMXTy() && !DestTy->isX86_MMXTy() && - !DestTy->isPtrOrPtrVectorTy()) + !DestTy->isX86_AMXTy() && !DestTy->isX86_MMXTy() && + !DestTy->isPtrOrPtrVectorTy()) // Get ones when the input is trivial, but // only for supported types inside getAllOnesValue. return Constant::getAllOnesValue(DestTy); @@ -508,8 +508,8 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, NumElts = AT->getNumElements(); EltTy = AT->getElementType(); } else { - NumElts = cast<FixedVectorType>(C->getType())->getNumElements(); - EltTy = cast<FixedVectorType>(C->getType())->getElementType(); + NumElts = cast<FixedVectorType>(C->getType())->getNumElements(); + EltTy = cast<FixedVectorType>(C->getType())->getElementType(); } uint64_t EltSize = DL.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; @@ -576,16 +576,16 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, C = FoldBitCast(C, MapTy->getPointerTo(AS), DL); if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) { - if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && - !LoadTy->isX86_AMXTy()) + if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && + !LoadTy->isX86_AMXTy()) // Materializing a zero can be done trivially without a bitcast return Constant::getNullValue(LoadTy); Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; Res = FoldBitCast(Res, CastTy, DL); if (LoadTy->isPtrOrPtrVectorTy()) { // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr - if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && - !LoadTy->isX86_AMXTy()) + if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && + !LoadTy->isX86_AMXTy()) return Constant::getNullValue(LoadTy); if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) // Be careful not to replace a load of an addrspace value with an inttoptr here @@ -738,7 +738,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, // If this load comes from anywhere in a constant global, and if the global // is all undef or zero, we know what it loads. - if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(CE))) { + if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(CE))) { if (GV->isConstant() && GV->hasDefinitiveInitializer()) { if (GV->getInitializer()->isNullValue()) return Constant::getNullValue(Ty); @@ -1091,8 +1091,8 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, default: return nullptr; case Instruction::ICmp: case Instruction::FCmp: llvm_unreachable("Invalid for compares"); - case Instruction::Freeze: - return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr; + case Instruction::Freeze: + return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr; case Instruction::Call: if (auto *F = dyn_cast<Function>(Ops.back())) { const auto *Call = cast<CallBase>(InstOrCE); @@ -1456,12 +1456,12 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::masked_load: - case Intrinsic::get_active_lane_mask: - case Intrinsic::abs: - case Intrinsic::smax: - case Intrinsic::smin: - case Intrinsic::umax: - case Intrinsic::umin: + case Intrinsic::get_active_lane_mask: + case Intrinsic::abs: + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: @@ -1476,25 +1476,25 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::smul_fix_sat: case Intrinsic::bitreverse: case Intrinsic::is_constant: - case Intrinsic::vector_reduce_add: - case Intrinsic::vector_reduce_mul: - case Intrinsic::vector_reduce_and: - case Intrinsic::vector_reduce_or: - case Intrinsic::vector_reduce_xor: - case Intrinsic::vector_reduce_smin: - case Intrinsic::vector_reduce_smax: - case Intrinsic::vector_reduce_umin: - case Intrinsic::vector_reduce_umax: - // Target intrinsics - case Intrinsic::arm_mve_vctp8: - case Intrinsic::arm_mve_vctp16: - case Intrinsic::arm_mve_vctp32: - case Intrinsic::arm_mve_vctp64: - // WebAssembly float semantics are always known - case Intrinsic::wasm_trunc_signed: - case Intrinsic::wasm_trunc_unsigned: - case Intrinsic::wasm_trunc_saturate_signed: - case Intrinsic::wasm_trunc_saturate_unsigned: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_umax: + // Target intrinsics + case Intrinsic::arm_mve_vctp8: + case Intrinsic::arm_mve_vctp16: + case Intrinsic::arm_mve_vctp32: + case Intrinsic::arm_mve_vctp64: + // WebAssembly float semantics are always known + case Intrinsic::wasm_trunc_signed: + case Intrinsic::wasm_trunc_unsigned: + case Intrinsic::wasm_trunc_saturate_signed: + case Intrinsic::wasm_trunc_saturate_unsigned: return true; // Floating point operations cannot be folded in strictfp functions in @@ -1515,8 +1515,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::powi: case Intrinsic::fma: case Intrinsic::fmuladd: - case Intrinsic::fptoui_sat: - case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: + case Intrinsic::fptosi_sat: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::amdgcn_cos: @@ -1525,7 +1525,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::amdgcn_cubesc: case Intrinsic::amdgcn_cubetc: case Intrinsic::amdgcn_fmul_legacy: - case Intrinsic::amdgcn_fma_legacy: + case Intrinsic::amdgcn_fma_legacy: case Intrinsic::amdgcn_fract: case Intrinsic::amdgcn_ldexp: case Intrinsic::amdgcn_sin: @@ -1733,31 +1733,31 @@ Constant *ConstantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) { return nullptr; const APInt &X = CI->getValue(); switch (IID) { - case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_add: Acc = Acc + X; break; - case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_mul: Acc = Acc * X; break; - case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_and: Acc = Acc & X; break; - case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_or: Acc = Acc | X; break; - case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_xor: Acc = Acc ^ X; break; - case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_smin: Acc = APIntOps::smin(Acc, X); break; - case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smax: Acc = APIntOps::smax(Acc, X); break; - case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_umin: Acc = APIntOps::umin(Acc, X); break; - case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umax: Acc = APIntOps::umax(Acc, X); break; } @@ -1832,18 +1832,18 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, // We know we have a "Constant" argument. But we want to only // return true for manifest constants, not those that depend on // constants with unknowable values, e.g. GlobalValue or BlockAddress. - if (Operands[0]->isManifestConstant()) + if (Operands[0]->isManifestConstant()) return ConstantInt::getTrue(Ty->getContext()); return nullptr; } if (isa<UndefValue>(Operands[0])) { // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN. // ctpop() is between 0 and bitwidth, pick 0 for undef. - // fptoui.sat and fptosi.sat can always fold to zero (for a zero input). + // fptoui.sat and fptosi.sat can always fold to zero (for a zero input). if (IntrinsicID == Intrinsic::cos || - IntrinsicID == Intrinsic::ctpop || - IntrinsicID == Intrinsic::fptoui_sat || - IntrinsicID == Intrinsic::fptosi_sat) + IntrinsicID == Intrinsic::ctpop || + IntrinsicID == Intrinsic::fptoui_sat || + IntrinsicID == Intrinsic::fptosi_sat) return Constant::getNullValue(Ty); if (IntrinsicID == Intrinsic::bswap || IntrinsicID == Intrinsic::bitreverse || @@ -1880,51 +1880,51 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); } - APFloat U = Op->getValueAPF(); - - if (IntrinsicID == Intrinsic::wasm_trunc_signed || - IntrinsicID == Intrinsic::wasm_trunc_unsigned || - IntrinsicID == Intrinsic::wasm_trunc_saturate_signed || - IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned) { - - bool Saturating = IntrinsicID == Intrinsic::wasm_trunc_saturate_signed || - IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned; - bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed || - IntrinsicID == Intrinsic::wasm_trunc_saturate_signed; - - if (U.isNaN()) - return Saturating ? ConstantInt::get(Ty, 0) : nullptr; - - unsigned Width = Ty->getIntegerBitWidth(); - APSInt Int(Width, !Signed); - bool IsExact = false; - APFloat::opStatus Status = - U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); - - if (Status == APFloat::opOK || Status == APFloat::opInexact) - return ConstantInt::get(Ty, Int); - - if (!Saturating) - return nullptr; - - if (U.isNegative()) - return Signed ? ConstantInt::get(Ty, APInt::getSignedMinValue(Width)) - : ConstantInt::get(Ty, APInt::getMinValue(Width)); - else - return Signed ? ConstantInt::get(Ty, APInt::getSignedMaxValue(Width)) - : ConstantInt::get(Ty, APInt::getMaxValue(Width)); - } - - if (IntrinsicID == Intrinsic::fptoui_sat || - IntrinsicID == Intrinsic::fptosi_sat) { - // convertToInteger() already has the desired saturation semantics. - APSInt Int(Ty->getIntegerBitWidth(), - IntrinsicID == Intrinsic::fptoui_sat); - bool IsExact; - U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); - return ConstantInt::get(Ty, Int); - } - + APFloat U = Op->getValueAPF(); + + if (IntrinsicID == Intrinsic::wasm_trunc_signed || + IntrinsicID == Intrinsic::wasm_trunc_unsigned || + IntrinsicID == Intrinsic::wasm_trunc_saturate_signed || + IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned) { + + bool Saturating = IntrinsicID == Intrinsic::wasm_trunc_saturate_signed || + IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned; + bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed || + IntrinsicID == Intrinsic::wasm_trunc_saturate_signed; + + if (U.isNaN()) + return Saturating ? ConstantInt::get(Ty, 0) : nullptr; + + unsigned Width = Ty->getIntegerBitWidth(); + APSInt Int(Width, !Signed); + bool IsExact = false; + APFloat::opStatus Status = + U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); + + if (Status == APFloat::opOK || Status == APFloat::opInexact) + return ConstantInt::get(Ty, Int); + + if (!Saturating) + return nullptr; + + if (U.isNegative()) + return Signed ? ConstantInt::get(Ty, APInt::getSignedMinValue(Width)) + : ConstantInt::get(Ty, APInt::getMinValue(Width)); + else + return Signed ? ConstantInt::get(Ty, APInt::getSignedMaxValue(Width)) + : ConstantInt::get(Ty, APInt::getMaxValue(Width)); + } + + if (IntrinsicID == Intrinsic::fptoui_sat || + IntrinsicID == Intrinsic::fptosi_sat) { + // convertToInteger() already has the desired saturation semantics. + APSInt Int(Ty->getIntegerBitWidth(), + IntrinsicID == Intrinsic::fptoui_sat); + bool IsExact; + U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); + return ConstantInt::get(Ty, Int); + } + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return nullptr; @@ -2262,15 +2262,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (isa<ConstantAggregateZero>(Operands[0])) { switch (IntrinsicID) { default: break; - case Intrinsic::vector_reduce_add: - case Intrinsic::vector_reduce_mul: - case Intrinsic::vector_reduce_and: - case Intrinsic::vector_reduce_or: - case Intrinsic::vector_reduce_xor: - case Intrinsic::vector_reduce_smin: - case Intrinsic::vector_reduce_smax: - case Intrinsic::vector_reduce_umin: - case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_umax: return ConstantInt::get(Ty, 0); } } @@ -2281,15 +2281,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, auto *Op = cast<Constant>(Operands[0]); switch (IntrinsicID) { default: break; - case Intrinsic::vector_reduce_add: - case Intrinsic::vector_reduce_mul: - case Intrinsic::vector_reduce_and: - case Intrinsic::vector_reduce_or: - case Intrinsic::vector_reduce_xor: - case Intrinsic::vector_reduce_smin: - case Intrinsic::vector_reduce_smax: - case Intrinsic::vector_reduce_umin: - case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_umax: if (Constant *C = ConstantFoldVectorReduce(IntrinsicID, Op)) return C; break; @@ -2327,25 +2327,25 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, const CallBase *Call) { assert(Operands.size() == 2 && "Wrong number of operands."); - if (Ty->isFloatingPointTy()) { - // TODO: We should have undef handling for all of the FP intrinsics that - // are attempted to be folded in this function. - bool IsOp0Undef = isa<UndefValue>(Operands[0]); - bool IsOp1Undef = isa<UndefValue>(Operands[1]); - switch (IntrinsicID) { - case Intrinsic::maxnum: - case Intrinsic::minnum: - case Intrinsic::maximum: - case Intrinsic::minimum: - // If one argument is undef, return the other argument. - if (IsOp0Undef) - return Operands[1]; - if (IsOp1Undef) - return Operands[0]; - break; - } - } - + if (Ty->isFloatingPointTy()) { + // TODO: We should have undef handling for all of the FP intrinsics that + // are attempted to be folded in this function. + bool IsOp0Undef = isa<UndefValue>(Operands[0]); + bool IsOp1Undef = isa<UndefValue>(Operands[1]); + switch (IntrinsicID) { + case Intrinsic::maxnum: + case Intrinsic::minnum: + case Intrinsic::maximum: + case Intrinsic::minimum: + // If one argument is undef, return the other argument. + if (IsOp0Undef) + return Operands[1]; + if (IsOp1Undef) + return Operands[0]; + break; + } + } + if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return nullptr; @@ -2393,8 +2393,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, if (IntrinsicID == Intrinsic::amdgcn_fmul_legacy) { const APFloat &C1 = Op1->getValueAPF(); const APFloat &C2 = Op2->getValueAPF(); - // The legacy behaviour is that multiplying +/- 0.0 by anything, even - // NaN or infinity, gives +0.0. + // The legacy behaviour is that multiplying +/- 0.0 by anything, even + // NaN or infinity, gives +0.0. if (C1.isZero() || C2.isZero()) return ConstantFP::getNullValue(Ty); return ConstantFP::get(Ty->getContext(), C1 * C2); @@ -2473,37 +2473,37 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, !getConstIntOrUndef(Operands[1], C1)) return nullptr; - unsigned BitWidth = Ty->getScalarSizeInBits(); + unsigned BitWidth = Ty->getScalarSizeInBits(); switch (IntrinsicID) { default: break; - case Intrinsic::smax: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth)); - return ConstantInt::get(Ty, C0->sgt(*C1) ? *C0 : *C1); - - case Intrinsic::smin: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)); - return ConstantInt::get(Ty, C0->slt(*C1) ? *C0 : *C1); - - case Intrinsic::umax: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return ConstantInt::get(Ty, APInt::getMaxValue(BitWidth)); - return ConstantInt::get(Ty, C0->ugt(*C1) ? *C0 : *C1); - - case Intrinsic::umin: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return ConstantInt::get(Ty, APInt::getMinValue(BitWidth)); - return ConstantInt::get(Ty, C0->ult(*C1) ? *C0 : *C1); - + case Intrinsic::smax: + if (!C0 && !C1) + return UndefValue::get(Ty); + if (!C0 || !C1) + return ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth)); + return ConstantInt::get(Ty, C0->sgt(*C1) ? *C0 : *C1); + + case Intrinsic::smin: + if (!C0 && !C1) + return UndefValue::get(Ty); + if (!C0 || !C1) + return ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)); + return ConstantInt::get(Ty, C0->slt(*C1) ? *C0 : *C1); + + case Intrinsic::umax: + if (!C0 && !C1) + return UndefValue::get(Ty); + if (!C0 || !C1) + return ConstantInt::get(Ty, APInt::getMaxValue(BitWidth)); + return ConstantInt::get(Ty, C0->ugt(*C1) ? *C0 : *C1); + + case Intrinsic::umin: + if (!C0 && !C1) + return UndefValue::get(Ty); + if (!C0 || !C1) + return ConstantInt::get(Ty, APInt::getMinValue(BitWidth)); + return ConstantInt::get(Ty, C0->ult(*C1) ? *C0 : *C1); + case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::uadd_with_overflow: @@ -2588,18 +2588,18 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return ConstantInt::get(Ty, C0->countTrailingZeros()); else return ConstantInt::get(Ty, C0->countLeadingZeros()); - - case Intrinsic::abs: - // Undef or minimum val operand with poison min --> undef - assert(C1 && "Must be constant int"); - if (C1->isOneValue() && (!C0 || C0->isMinSignedValue())) - return UndefValue::get(Ty); - - // Undef operand with no poison min --> 0 (sign bit must be clear) - if (C1->isNullValue() && !C0) - return Constant::getNullValue(Ty); - - return ConstantInt::get(Ty, C0->abs()); + + case Intrinsic::abs: + // Undef or minimum val operand with poison min --> undef + assert(C1 && "Must be constant int"); + if (C1->isOneValue() && (!C0 || C0->isMinSignedValue())) + return UndefValue::get(Ty); + + // Undef operand with no poison min --> 0 (sign bit must be clear) + if (C1->isNullValue() && !C0) + return Constant::getNullValue(Ty); + + return ConstantInt::get(Ty, C0->abs()); } return nullptr; @@ -2728,19 +2728,19 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) { switch (IntrinsicID) { default: break; - case Intrinsic::amdgcn_fma_legacy: { - const APFloat &C1 = Op1->getValueAPF(); - const APFloat &C2 = Op2->getValueAPF(); - // The legacy behaviour is that multiplying +/- 0.0 by anything, even - // NaN or infinity, gives +0.0. - if (C1.isZero() || C2.isZero()) { - const APFloat &C3 = Op3->getValueAPF(); - // It's tempting to just return C3 here, but that would give the - // wrong result if C3 was -0.0. - return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3); - } - LLVM_FALLTHROUGH; - } + case Intrinsic::amdgcn_fma_legacy: { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + // The legacy behaviour is that multiplying +/- 0.0 by anything, even + // NaN or infinity, gives +0.0. + if (C1.isZero() || C2.isZero()) { + const APFloat &C3 = Op3->getValueAPF(); + // It's tempting to just return C3 here, but that would give the + // wrong result if C3 was -0.0. + return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3); + } + LLVM_FALLTHROUGH; + } case Intrinsic::fma: case Intrinsic::fmuladd: { APFloat V = Op1->getValueAPF(); @@ -2868,8 +2868,8 @@ static Constant *ConstantFoldVectorCall(StringRef Name, SmallVector<Constant *, 4> Lane(Operands.size()); Type *Ty = FVTy->getElementType(); - switch (IntrinsicID) { - case Intrinsic::masked_load: { + switch (IntrinsicID) { + case Intrinsic::masked_load: { auto *SrcPtr = Operands[0]; auto *Mask = Operands[2]; auto *Passthru = Operands[3]; @@ -2907,52 +2907,52 @@ static Constant *ConstantFoldVectorCall(StringRef Name, return nullptr; return ConstantVector::get(NewElements); } - case Intrinsic::arm_mve_vctp8: - case Intrinsic::arm_mve_vctp16: - case Intrinsic::arm_mve_vctp32: - case Intrinsic::arm_mve_vctp64: { - if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { - unsigned Lanes = FVTy->getNumElements(); - uint64_t Limit = Op->getZExtValue(); - // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make - // sure we get the limit right in that case and set all relevant lanes. - if (IntrinsicID == Intrinsic::arm_mve_vctp64) - Limit *= 2; - - SmallVector<Constant *, 16> NCs; - for (unsigned i = 0; i < Lanes; i++) { - if (i < Limit) - NCs.push_back(ConstantInt::getTrue(Ty)); - else - NCs.push_back(ConstantInt::getFalse(Ty)); - } - return ConstantVector::get(NCs); - } - break; - } - case Intrinsic::get_active_lane_mask: { - auto *Op0 = dyn_cast<ConstantInt>(Operands[0]); - auto *Op1 = dyn_cast<ConstantInt>(Operands[1]); - if (Op0 && Op1) { - unsigned Lanes = FVTy->getNumElements(); - uint64_t Base = Op0->getZExtValue(); - uint64_t Limit = Op1->getZExtValue(); - - SmallVector<Constant *, 16> NCs; - for (unsigned i = 0; i < Lanes; i++) { - if (Base + i < Limit) - NCs.push_back(ConstantInt::getTrue(Ty)); - else - NCs.push_back(ConstantInt::getFalse(Ty)); - } - return ConstantVector::get(NCs); - } - break; - } - default: - break; - } - + case Intrinsic::arm_mve_vctp8: + case Intrinsic::arm_mve_vctp16: + case Intrinsic::arm_mve_vctp32: + case Intrinsic::arm_mve_vctp64: { + if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { + unsigned Lanes = FVTy->getNumElements(); + uint64_t Limit = Op->getZExtValue(); + // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make + // sure we get the limit right in that case and set all relevant lanes. + if (IntrinsicID == Intrinsic::arm_mve_vctp64) + Limit *= 2; + + SmallVector<Constant *, 16> NCs; + for (unsigned i = 0; i < Lanes; i++) { + if (i < Limit) + NCs.push_back(ConstantInt::getTrue(Ty)); + else + NCs.push_back(ConstantInt::getFalse(Ty)); + } + return ConstantVector::get(NCs); + } + break; + } + case Intrinsic::get_active_lane_mask: { + auto *Op0 = dyn_cast<ConstantInt>(Operands[0]); + auto *Op1 = dyn_cast<ConstantInt>(Operands[1]); + if (Op0 && Op1) { + unsigned Lanes = FVTy->getNumElements(); + uint64_t Base = Op0->getZExtValue(); + uint64_t Limit = Op1->getZExtValue(); + + SmallVector<Constant *, 16> NCs; + for (unsigned i = 0; i < Lanes; i++) { + if (Base + i < Limit) + NCs.push_back(ConstantInt::getTrue(Ty)); + else + NCs.push_back(ConstantInt::getFalse(Ty)); + } + return ConstantVector::get(NCs); + } + break; + } + default: + break; + } + for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { diff --git a/contrib/libs/llvm12/lib/Analysis/ConstraintSystem.cpp b/contrib/libs/llvm12/lib/Analysis/ConstraintSystem.cpp index 9739c6af57..93a9847e26 100644 --- a/contrib/libs/llvm12/lib/Analysis/ConstraintSystem.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ConstraintSystem.cpp @@ -1,158 +1,158 @@ -//===- ConstraintSytem.cpp - A system of linear constraints. ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ConstraintSystem.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Debug.h" - -#include <algorithm> -#include <string> - -using namespace llvm; - -#define DEBUG_TYPE "constraint-system" - -bool ConstraintSystem::eliminateUsingFM() { - // Implementation of Fourier–Motzkin elimination, with some tricks from the - // paper Pugh, William. "The Omega test: a fast and practical integer - // programming algorithm for dependence - // analysis." - // Supercomputing'91: Proceedings of the 1991 ACM/ - // IEEE conference on Supercomputing. IEEE, 1991. - assert(!Constraints.empty() && - "should only be called for non-empty constraint systems"); - unsigned NumVariables = Constraints[0].size(); - SmallVector<SmallVector<int64_t, 8>, 4> NewSystem; - - unsigned NumConstraints = Constraints.size(); - uint32_t NewGCD = 1; - // FIXME do not use copy - for (unsigned R1 = 0; R1 < NumConstraints; R1++) { - if (Constraints[R1][1] == 0) { - SmallVector<int64_t, 8> NR; - NR.push_back(Constraints[R1][0]); - for (unsigned i = 2; i < NumVariables; i++) { - NR.push_back(Constraints[R1][i]); - } - NewSystem.push_back(std::move(NR)); - continue; - } - - // FIXME do not use copy - for (unsigned R2 = R1 + 1; R2 < NumConstraints; R2++) { - if (R1 == R2) - continue; - - // FIXME: can we do better than just dropping things here? - if (Constraints[R2][1] == 0) - continue; - - if ((Constraints[R1][1] < 0 && Constraints[R2][1] < 0) || - (Constraints[R1][1] > 0 && Constraints[R2][1] > 0)) - continue; - - unsigned LowerR = R1; - unsigned UpperR = R2; - if (Constraints[UpperR][1] < 0) - std::swap(LowerR, UpperR); - - SmallVector<int64_t, 8> NR; - for (unsigned I = 0; I < NumVariables; I++) { - if (I == 1) - continue; - - int64_t M1, M2, N; - if (MulOverflow(Constraints[UpperR][I], - ((-1) * Constraints[LowerR][1] / GCD), M1)) - return false; - if (MulOverflow(Constraints[LowerR][I], - (Constraints[UpperR][1] / GCD), M2)) - return false; - if (AddOverflow(M1, M2, N)) - return false; - NR.push_back(N); - - NewGCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)NR.back()}, - {32, NewGCD}) - .getZExtValue(); - } - NewSystem.push_back(std::move(NR)); - // Give up if the new system gets too big. - if (NewSystem.size() > 500) - return false; - } - } - Constraints = std::move(NewSystem); - GCD = NewGCD; - - return true; -} - -bool ConstraintSystem::mayHaveSolutionImpl() { - while (!Constraints.empty() && Constraints[0].size() > 1) { - if (!eliminateUsingFM()) - return true; - } - - if (Constraints.empty() || Constraints[0].size() > 1) - return true; - - return all_of(Constraints, [](auto &R) { return R[0] >= 0; }); -} - -void ConstraintSystem::dump(ArrayRef<std::string> Names) const { - if (Constraints.empty()) - return; - - for (auto &Row : Constraints) { - SmallVector<std::string, 16> Parts; - for (unsigned I = 1, S = Row.size(); I < S; ++I) { - if (Row[I] == 0) - continue; - std::string Coefficient; - if (Row[I] != 1) - Coefficient = std::to_string(Row[I]) + " * "; - Parts.push_back(Coefficient + Names[I - 1]); - } - assert(!Parts.empty() && "need to have at least some parts"); - LLVM_DEBUG(dbgs() << join(Parts, std::string(" + ")) - << " <= " << std::to_string(Row[0]) << "\n"); - } -} - -void ConstraintSystem::dump() const { - SmallVector<std::string, 16> Names; - for (unsigned i = 1; i < Constraints.back().size(); ++i) - Names.push_back("x" + std::to_string(i)); - LLVM_DEBUG(dbgs() << "---\n"); - dump(Names); -} - -bool ConstraintSystem::mayHaveSolution() { - LLVM_DEBUG(dump()); - bool HasSolution = mayHaveSolutionImpl(); - LLVM_DEBUG(dbgs() << (HasSolution ? "sat" : "unsat") << "\n"); - return HasSolution; -} - -bool ConstraintSystem::isConditionImplied(SmallVector<int64_t, 8> R) { - // If all variable coefficients are 0, we have 'C >= 0'. If the constant is >= - // 0, R is always true, regardless of the system. - if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; })) - return R[0] >= 0; - - // If there is no solution with the negation of R added to the system, the - // condition must hold based on the existing constraints. - R = ConstraintSystem::negate(R); - - auto NewSystem = *this; - NewSystem.addVariableRow(R); - return !NewSystem.mayHaveSolution(); -} +//===- ConstraintSytem.cpp - A system of linear constraints. ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstraintSystem.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" + +#include <algorithm> +#include <string> + +using namespace llvm; + +#define DEBUG_TYPE "constraint-system" + +bool ConstraintSystem::eliminateUsingFM() { + // Implementation of Fourier–Motzkin elimination, with some tricks from the + // paper Pugh, William. "The Omega test: a fast and practical integer + // programming algorithm for dependence + // analysis." + // Supercomputing'91: Proceedings of the 1991 ACM/ + // IEEE conference on Supercomputing. IEEE, 1991. + assert(!Constraints.empty() && + "should only be called for non-empty constraint systems"); + unsigned NumVariables = Constraints[0].size(); + SmallVector<SmallVector<int64_t, 8>, 4> NewSystem; + + unsigned NumConstraints = Constraints.size(); + uint32_t NewGCD = 1; + // FIXME do not use copy + for (unsigned R1 = 0; R1 < NumConstraints; R1++) { + if (Constraints[R1][1] == 0) { + SmallVector<int64_t, 8> NR; + NR.push_back(Constraints[R1][0]); + for (unsigned i = 2; i < NumVariables; i++) { + NR.push_back(Constraints[R1][i]); + } + NewSystem.push_back(std::move(NR)); + continue; + } + + // FIXME do not use copy + for (unsigned R2 = R1 + 1; R2 < NumConstraints; R2++) { + if (R1 == R2) + continue; + + // FIXME: can we do better than just dropping things here? + if (Constraints[R2][1] == 0) + continue; + + if ((Constraints[R1][1] < 0 && Constraints[R2][1] < 0) || + (Constraints[R1][1] > 0 && Constraints[R2][1] > 0)) + continue; + + unsigned LowerR = R1; + unsigned UpperR = R2; + if (Constraints[UpperR][1] < 0) + std::swap(LowerR, UpperR); + + SmallVector<int64_t, 8> NR; + for (unsigned I = 0; I < NumVariables; I++) { + if (I == 1) + continue; + + int64_t M1, M2, N; + if (MulOverflow(Constraints[UpperR][I], + ((-1) * Constraints[LowerR][1] / GCD), M1)) + return false; + if (MulOverflow(Constraints[LowerR][I], + (Constraints[UpperR][1] / GCD), M2)) + return false; + if (AddOverflow(M1, M2, N)) + return false; + NR.push_back(N); + + NewGCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)NR.back()}, + {32, NewGCD}) + .getZExtValue(); + } + NewSystem.push_back(std::move(NR)); + // Give up if the new system gets too big. + if (NewSystem.size() > 500) + return false; + } + } + Constraints = std::move(NewSystem); + GCD = NewGCD; + + return true; +} + +bool ConstraintSystem::mayHaveSolutionImpl() { + while (!Constraints.empty() && Constraints[0].size() > 1) { + if (!eliminateUsingFM()) + return true; + } + + if (Constraints.empty() || Constraints[0].size() > 1) + return true; + + return all_of(Constraints, [](auto &R) { return R[0] >= 0; }); +} + +void ConstraintSystem::dump(ArrayRef<std::string> Names) const { + if (Constraints.empty()) + return; + + for (auto &Row : Constraints) { + SmallVector<std::string, 16> Parts; + for (unsigned I = 1, S = Row.size(); I < S; ++I) { + if (Row[I] == 0) + continue; + std::string Coefficient; + if (Row[I] != 1) + Coefficient = std::to_string(Row[I]) + " * "; + Parts.push_back(Coefficient + Names[I - 1]); + } + assert(!Parts.empty() && "need to have at least some parts"); + LLVM_DEBUG(dbgs() << join(Parts, std::string(" + ")) + << " <= " << std::to_string(Row[0]) << "\n"); + } +} + +void ConstraintSystem::dump() const { + SmallVector<std::string, 16> Names; + for (unsigned i = 1; i < Constraints.back().size(); ++i) + Names.push_back("x" + std::to_string(i)); + LLVM_DEBUG(dbgs() << "---\n"); + dump(Names); +} + +bool ConstraintSystem::mayHaveSolution() { + LLVM_DEBUG(dump()); + bool HasSolution = mayHaveSolutionImpl(); + LLVM_DEBUG(dbgs() << (HasSolution ? "sat" : "unsat") << "\n"); + return HasSolution; +} + +bool ConstraintSystem::isConditionImplied(SmallVector<int64_t, 8> R) { + // If all variable coefficients are 0, we have 'C >= 0'. If the constant is >= + // 0, R is always true, regardless of the system. + if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; })) + return R[0] >= 0; + + // If there is no solution with the negation of R added to the system, the + // condition must hold based on the existing constraints. + R = ConstraintSystem::negate(R); + + auto NewSystem = *this; + NewSystem.addVariableRow(R); + return !NewSystem.mayHaveSolution(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/CostModel.cpp b/contrib/libs/llvm12/lib/Analysis/CostModel.cpp index 19c307b4ef..5e35c450ef 100644 --- a/contrib/libs/llvm12/lib/Analysis/CostModel.cpp +++ b/contrib/libs/llvm12/lib/Analysis/CostModel.cpp @@ -35,11 +35,11 @@ static cl::opt<TargetTransformInfo::TargetCostKind> CostKind( clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, - "code-size", "Code size"), - clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, - "size-latency", "Code size and latency"))); - + "code-size", "Code size"), + clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, + "size-latency", "Code size and latency"))); + #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME @@ -57,7 +57,7 @@ namespace { /// Returns -1 if the cost is unknown. /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. - InstructionCost getInstructionCost(const Instruction *I) const { + InstructionCost getInstructionCost(const Instruction *I) const { return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput); } @@ -103,9 +103,9 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { for (BasicBlock &B : *F) { for (Instruction &Inst : B) { - InstructionCost Cost = TTI->getInstructionCost(&Inst, CostKind); - if (auto CostVal = Cost.getValue()) - OS << "Cost Model: Found an estimated cost of " << *CostVal; + InstructionCost Cost = TTI->getInstructionCost(&Inst, CostKind); + if (auto CostVal = Cost.getValue()) + OS << "Cost Model: Found an estimated cost of " << *CostVal; else OS << "Cost Model: Unknown cost"; diff --git a/contrib/libs/llvm12/lib/Analysis/DDG.cpp b/contrib/libs/llvm12/lib/Analysis/DDG.cpp index da5de75a03..1d4fe14038 100644 --- a/contrib/libs/llvm12/lib/Analysis/DDG.cpp +++ b/contrib/libs/llvm12/lib/Analysis/DDG.cpp @@ -49,7 +49,7 @@ bool DDGNode::collectInstructions( assert(!isa<PiBlockDDGNode>(PN) && "Nested PiBlocks are not supported."); SmallVector<Instruction *, 8> TmpIList; PN->collectInstructions(Pred, TmpIList); - llvm::append_range(IList, TmpIList); + llvm::append_range(IList, TmpIList); } } else llvm_unreachable("unimplemented type of node"); @@ -190,7 +190,7 @@ DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D) // directions. BasicBlockListType BBList; for (auto &SCC : make_range(scc_begin(&F), scc_end(&F))) - append_range(BBList, SCC); + append_range(BBList, SCC); std::reverse(BBList.begin(), BBList.end()); DDGBuilder(*this, D, BBList).populate(); } @@ -206,7 +206,7 @@ DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI, LoopBlocksDFS DFS(&L); DFS.perform(&LI); BasicBlockListType BBList; - append_range(BBList, make_range(DFS.beginRPO(), DFS.endRPO())); + append_range(BBList, make_range(DFS.beginRPO(), DFS.endRPO())); DDGBuilder(*this, D, BBList).populate(); } diff --git a/contrib/libs/llvm12/lib/Analysis/DDGPrinter.cpp b/contrib/libs/llvm12/lib/Analysis/DDGPrinter.cpp index 51bd548098..9c16fcdc8b 100644 --- a/contrib/libs/llvm12/lib/Analysis/DDGPrinter.cpp +++ b/contrib/libs/llvm12/lib/Analysis/DDGPrinter.cpp @@ -1,150 +1,150 @@ -//===- DDGPrinter.cpp - DOT printer for the data dependence graph ----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// -// This file defines the `-dot-ddg` analysis pass, which emits DDG in DOT format -// in a file named `ddg.<graph-name>.dot` for each loop in a function. -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/DDGPrinter.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/GraphWriter.h" - -using namespace llvm; - -static cl::opt<bool> DotOnly("dot-ddg-only", cl::init(false), cl::Hidden, - cl::ZeroOrMore, cl::desc("simple ddg dot graph")); -static cl::opt<std::string> DDGDotFilenamePrefix( - "dot-ddg-filename-prefix", cl::init("ddg"), cl::Hidden, - cl::desc("The prefix used for the DDG dot file names.")); - -static void writeDDGToDotFile(DataDependenceGraph &G, bool DOnly = false); - -//===--------------------------------------------------------------------===// -// Implementation of DDG DOT Printer for a loop -//===--------------------------------------------------------------------===// -PreservedAnalyses DDGDotPrinterPass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &U) { - writeDDGToDotFile(*AM.getResult<DDGAnalysis>(L, AR), DotOnly); - return PreservedAnalyses::all(); -} - -static void writeDDGToDotFile(DataDependenceGraph &G, bool DOnly) { - std::string Filename = - Twine(DDGDotFilenamePrefix + "." + G.getName() + ".dot").str(); - errs() << "Writing '" << Filename << "'..."; - - std::error_code EC; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); - - if (!EC) - // We only provide the constant verson of the DOTGraphTrait specialization, - // hence the conversion to const pointer - WriteGraph(File, (const DataDependenceGraph *)&G, DOnly); - else - errs() << " error opening file for writing!"; - errs() << "\n"; -} - -//===--------------------------------------------------------------------===// -// DDG DOT Printer Implementation -//===--------------------------------------------------------------------===// -std::string DDGDotGraphTraits::getNodeLabel(const DDGNode *Node, - const DataDependenceGraph *Graph) { - if (isSimple()) - return getSimpleNodeLabel(Node, Graph); - else - return getVerboseNodeLabel(Node, Graph); -} - -std::string DDGDotGraphTraits::getEdgeAttributes( - const DDGNode *Node, GraphTraits<const DDGNode *>::ChildIteratorType I, - const DataDependenceGraph *G) { - const DDGEdge *E = static_cast<const DDGEdge *>(*I.getCurrent()); - if (isSimple()) - return getSimpleEdgeAttributes(Node, E, G); - else - return getVerboseEdgeAttributes(Node, E, G); -} - -bool DDGDotGraphTraits::isNodeHidden(const DDGNode *Node, - const DataDependenceGraph *Graph) { - if (isSimple() && isa<RootDDGNode>(Node)) - return true; - assert(Graph && "expected a valid graph pointer"); - return Graph->getPiBlock(*Node) != nullptr; -} - -std::string -DDGDotGraphTraits::getSimpleNodeLabel(const DDGNode *Node, - const DataDependenceGraph *G) { - std::string Str; - raw_string_ostream OS(Str); - if (isa<SimpleDDGNode>(Node)) - for (auto *II : static_cast<const SimpleDDGNode *>(Node)->getInstructions()) - OS << *II << "\n"; - else if (isa<PiBlockDDGNode>(Node)) - OS << "pi-block\nwith\n" - << cast<PiBlockDDGNode>(Node)->getNodes().size() << " nodes\n"; - else if (isa<RootDDGNode>(Node)) - OS << "root\n"; - else - llvm_unreachable("Unimplemented type of node"); - return OS.str(); -} - -std::string -DDGDotGraphTraits::getVerboseNodeLabel(const DDGNode *Node, - const DataDependenceGraph *G) { - std::string Str; - raw_string_ostream OS(Str); - OS << "<kind:" << Node->getKind() << ">\n"; - if (isa<SimpleDDGNode>(Node)) - for (auto *II : static_cast<const SimpleDDGNode *>(Node)->getInstructions()) - OS << *II << "\n"; - else if (isa<PiBlockDDGNode>(Node)) { - OS << "--- start of nodes in pi-block ---\n"; - unsigned Count = 0; - const auto &PNodes = cast<PiBlockDDGNode>(Node)->getNodes(); - for (auto *PN : PNodes) { - OS << getVerboseNodeLabel(PN, G); - if (++Count != PNodes.size()) - OS << "\n"; - } - OS << "--- end of nodes in pi-block ---\n"; - } else if (isa<RootDDGNode>(Node)) - OS << "root\n"; - else - llvm_unreachable("Unimplemented type of node"); - return OS.str(); -} - -std::string DDGDotGraphTraits::getSimpleEdgeAttributes( - const DDGNode *Src, const DDGEdge *Edge, const DataDependenceGraph *G) { - std::string Str; - raw_string_ostream OS(Str); - DDGEdge::EdgeKind Kind = Edge->getKind(); - OS << "label=\"[" << Kind << "]\""; - return OS.str(); -} - -std::string DDGDotGraphTraits::getVerboseEdgeAttributes( - const DDGNode *Src, const DDGEdge *Edge, const DataDependenceGraph *G) { - std::string Str; - raw_string_ostream OS(Str); - DDGEdge::EdgeKind Kind = Edge->getKind(); - OS << "label=\"["; - if (Kind == DDGEdge::EdgeKind::MemoryDependence) - OS << G->getDependenceString(*Src, Edge->getTargetNode()); - else - OS << Kind; - OS << "]\""; - return OS.str(); -} +//===- DDGPrinter.cpp - DOT printer for the data dependence graph ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// This file defines the `-dot-ddg` analysis pass, which emits DDG in DOT format +// in a file named `ddg.<graph-name>.dot` for each loop in a function. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DDGPrinter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/GraphWriter.h" + +using namespace llvm; + +static cl::opt<bool> DotOnly("dot-ddg-only", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("simple ddg dot graph")); +static cl::opt<std::string> DDGDotFilenamePrefix( + "dot-ddg-filename-prefix", cl::init("ddg"), cl::Hidden, + cl::desc("The prefix used for the DDG dot file names.")); + +static void writeDDGToDotFile(DataDependenceGraph &G, bool DOnly = false); + +//===--------------------------------------------------------------------===// +// Implementation of DDG DOT Printer for a loop +//===--------------------------------------------------------------------===// +PreservedAnalyses DDGDotPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + writeDDGToDotFile(*AM.getResult<DDGAnalysis>(L, AR), DotOnly); + return PreservedAnalyses::all(); +} + +static void writeDDGToDotFile(DataDependenceGraph &G, bool DOnly) { + std::string Filename = + Twine(DDGDotFilenamePrefix + "." + G.getName() + ".dot").str(); + errs() << "Writing '" << Filename << "'..."; + + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + + if (!EC) + // We only provide the constant verson of the DOTGraphTrait specialization, + // hence the conversion to const pointer + WriteGraph(File, (const DataDependenceGraph *)&G, DOnly); + else + errs() << " error opening file for writing!"; + errs() << "\n"; +} + +//===--------------------------------------------------------------------===// +// DDG DOT Printer Implementation +//===--------------------------------------------------------------------===// +std::string DDGDotGraphTraits::getNodeLabel(const DDGNode *Node, + const DataDependenceGraph *Graph) { + if (isSimple()) + return getSimpleNodeLabel(Node, Graph); + else + return getVerboseNodeLabel(Node, Graph); +} + +std::string DDGDotGraphTraits::getEdgeAttributes( + const DDGNode *Node, GraphTraits<const DDGNode *>::ChildIteratorType I, + const DataDependenceGraph *G) { + const DDGEdge *E = static_cast<const DDGEdge *>(*I.getCurrent()); + if (isSimple()) + return getSimpleEdgeAttributes(Node, E, G); + else + return getVerboseEdgeAttributes(Node, E, G); +} + +bool DDGDotGraphTraits::isNodeHidden(const DDGNode *Node, + const DataDependenceGraph *Graph) { + if (isSimple() && isa<RootDDGNode>(Node)) + return true; + assert(Graph && "expected a valid graph pointer"); + return Graph->getPiBlock(*Node) != nullptr; +} + +std::string +DDGDotGraphTraits::getSimpleNodeLabel(const DDGNode *Node, + const DataDependenceGraph *G) { + std::string Str; + raw_string_ostream OS(Str); + if (isa<SimpleDDGNode>(Node)) + for (auto *II : static_cast<const SimpleDDGNode *>(Node)->getInstructions()) + OS << *II << "\n"; + else if (isa<PiBlockDDGNode>(Node)) + OS << "pi-block\nwith\n" + << cast<PiBlockDDGNode>(Node)->getNodes().size() << " nodes\n"; + else if (isa<RootDDGNode>(Node)) + OS << "root\n"; + else + llvm_unreachable("Unimplemented type of node"); + return OS.str(); +} + +std::string +DDGDotGraphTraits::getVerboseNodeLabel(const DDGNode *Node, + const DataDependenceGraph *G) { + std::string Str; + raw_string_ostream OS(Str); + OS << "<kind:" << Node->getKind() << ">\n"; + if (isa<SimpleDDGNode>(Node)) + for (auto *II : static_cast<const SimpleDDGNode *>(Node)->getInstructions()) + OS << *II << "\n"; + else if (isa<PiBlockDDGNode>(Node)) { + OS << "--- start of nodes in pi-block ---\n"; + unsigned Count = 0; + const auto &PNodes = cast<PiBlockDDGNode>(Node)->getNodes(); + for (auto *PN : PNodes) { + OS << getVerboseNodeLabel(PN, G); + if (++Count != PNodes.size()) + OS << "\n"; + } + OS << "--- end of nodes in pi-block ---\n"; + } else if (isa<RootDDGNode>(Node)) + OS << "root\n"; + else + llvm_unreachable("Unimplemented type of node"); + return OS.str(); +} + +std::string DDGDotGraphTraits::getSimpleEdgeAttributes( + const DDGNode *Src, const DDGEdge *Edge, const DataDependenceGraph *G) { + std::string Str; + raw_string_ostream OS(Str); + DDGEdge::EdgeKind Kind = Edge->getKind(); + OS << "label=\"[" << Kind << "]\""; + return OS.str(); +} + +std::string DDGDotGraphTraits::getVerboseEdgeAttributes( + const DDGNode *Src, const DDGEdge *Edge, const DataDependenceGraph *G) { + std::string Str; + raw_string_ostream OS(Str); + DDGEdge::EdgeKind Kind = Edge->getKind(); + OS << "label=\"["; + if (Kind == DDGEdge::EdgeKind::MemoryDependence) + OS << G->getDependenceString(*Src, Edge->getTargetNode()); + else + OS << Kind; + OS << "]\""; + return OS.str(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/Delinearization.cpp b/contrib/libs/llvm12/lib/Analysis/Delinearization.cpp index 87a41bbf16..50adf84cca 100644 --- a/contrib/libs/llvm12/lib/Analysis/Delinearization.cpp +++ b/contrib/libs/llvm12/lib/Analysis/Delinearization.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Delinearization.h" +#include "llvm/Analysis/Delinearization.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -24,7 +24,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -56,8 +56,8 @@ public: void print(raw_ostream &O, const Module *M = nullptr) const override; }; -void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI, - ScalarEvolution *SE) { +void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI, + ScalarEvolution *SE) { O << "Delinearization on function " << F->getName() << ":\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { Instruction *Inst = &(*I); @@ -108,25 +108,25 @@ void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI, } } -} // end anonymous namespace - -void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); -} - -bool Delinearization::runOnFunction(Function &F) { - this->F = &F; - SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - return false; -} - -void Delinearization::print(raw_ostream &O, const Module *) const { - printDelinearization(O, F, LI, SE); -} - +} // end anonymous namespace + +void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); +} + +bool Delinearization::runOnFunction(Function &F) { + this->F = &F; + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + return false; +} + +void Delinearization::print(raw_ostream &O, const Module *) const { + printDelinearization(O, F, LI, SE); +} + char Delinearization::ID = 0; static const char delinearization_name[] = "Delinearization"; INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true, @@ -135,12 +135,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true) FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; } - -DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS) - : OS(OS) {} -PreservedAnalyses DelinearizationPrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - printDelinearization(OS, &F, &AM.getResult<LoopAnalysis>(F), - &AM.getResult<ScalarEvolutionAnalysis>(F)); - return PreservedAnalyses::all(); -} + +DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS) + : OS(OS) {} +PreservedAnalyses DelinearizationPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + printDelinearization(OS, &F, &AM.getResult<LoopAnalysis>(F), + &AM.getResult<ScalarEvolutionAnalysis>(F)); + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/DemandedBits.cpp b/contrib/libs/llvm12/lib/Analysis/DemandedBits.cpp index dd11b0b02b..ce17c01829 100644 --- a/contrib/libs/llvm12/lib/Analysis/DemandedBits.cpp +++ b/contrib/libs/llvm12/lib/Analysis/DemandedBits.cpp @@ -80,7 +80,7 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const { static bool isAlwaysLive(Instruction *I) { return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() || - I->mayHaveSideEffects() || !I->willReturn(); + I->mayHaveSideEffects() || !I->willReturn(); } void DemandedBits::determineLiveOperandBits( @@ -115,7 +115,7 @@ void DemandedBits::determineLiveOperandBits( default: break; case Instruction::Call: case Instruction::Invoke: - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI)) { + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::bswap: @@ -170,33 +170,33 @@ void DemandedBits::determineLiveOperandBits( } break; } - case Intrinsic::umax: - case Intrinsic::umin: - case Intrinsic::smax: - case Intrinsic::smin: - // If low bits of result are not demanded, they are also not demanded - // for the min/max operands. - AB = APInt::getBitsSetFrom(BitWidth, AOut.countTrailingZeros()); - break; + case Intrinsic::umax: + case Intrinsic::umin: + case Intrinsic::smax: + case Intrinsic::smin: + // If low bits of result are not demanded, they are also not demanded + // for the min/max operands. + AB = APInt::getBitsSetFrom(BitWidth, AOut.countTrailingZeros()); + break; } - } + } break; case Instruction::Add: - if (AOut.isMask()) { - AB = AOut; - } else { - ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1)); - AB = determineLiveOperandBitsAdd(OperandNo, AOut, Known, Known2); - } - break; + if (AOut.isMask()) { + AB = AOut; + } else { + ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1)); + AB = determineLiveOperandBitsAdd(OperandNo, AOut, Known, Known2); + } + break; case Instruction::Sub: - if (AOut.isMask()) { - AB = AOut; - } else { - ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1)); - AB = determineLiveOperandBitsSub(OperandNo, AOut, Known, Known2); - } - break; + if (AOut.isMask()) { + AB = AOut; + } else { + ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1)); + AB = determineLiveOperandBitsSub(OperandNo, AOut, Known, Known2); + } + break; case Instruction::Mul: // Find the highest live output bit. We don't need any more input // bits than that (adds, and thus subtracts, ripple only to the @@ -492,86 +492,86 @@ void DemandedBits::print(raw_ostream &OS) { } } -static APInt determineLiveOperandBitsAddCarry(unsigned OperandNo, - const APInt &AOut, - const KnownBits &LHS, - const KnownBits &RHS, - bool CarryZero, bool CarryOne) { - assert(!(CarryZero && CarryOne) && - "Carry can't be zero and one at the same time"); - - // The following check should be done by the caller, as it also indicates - // that LHS and RHS don't need to be computed. - // - // if (AOut.isMask()) - // return AOut; - - // Boundary bits' carry out is unaffected by their carry in. - APInt Bound = (LHS.Zero & RHS.Zero) | (LHS.One & RHS.One); - - // First, the alive carry bits are determined from the alive output bits: - // Let demand ripple to the right but only up to any set bit in Bound. - // AOut = -1---- - // Bound = ----1- - // ACarry&~AOut = --111- - APInt RBound = Bound.reverseBits(); - APInt RAOut = AOut.reverseBits(); - APInt RProp = RAOut + (RAOut | ~RBound); - APInt RACarry = RProp ^ ~RBound; - APInt ACarry = RACarry.reverseBits(); - - // Then, the alive input bits are determined from the alive carry bits: - APInt NeededToMaintainCarryZero; - APInt NeededToMaintainCarryOne; - if (OperandNo == 0) { - NeededToMaintainCarryZero = LHS.Zero | ~RHS.Zero; - NeededToMaintainCarryOne = LHS.One | ~RHS.One; - } else { - NeededToMaintainCarryZero = RHS.Zero | ~LHS.Zero; - NeededToMaintainCarryOne = RHS.One | ~LHS.One; - } - - // As in computeForAddCarry - APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero; - APInt PossibleSumOne = LHS.One + RHS.One + CarryOne; - - // The below is simplified from - // - // APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero); - // APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One; - // APInt CarryUnknown = ~(CarryKnownZero | CarryKnownOne); - // - // APInt NeededToMaintainCarry = - // (CarryKnownZero & NeededToMaintainCarryZero) | - // (CarryKnownOne & NeededToMaintainCarryOne) | - // CarryUnknown; - - APInt NeededToMaintainCarry = (~PossibleSumZero | NeededToMaintainCarryZero) & - (PossibleSumOne | NeededToMaintainCarryOne); - - APInt AB = AOut | (ACarry & NeededToMaintainCarry); - return AB; -} - -APInt DemandedBits::determineLiveOperandBitsAdd(unsigned OperandNo, - const APInt &AOut, - const KnownBits &LHS, - const KnownBits &RHS) { - return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, RHS, true, - false); -} - -APInt DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo, - const APInt &AOut, - const KnownBits &LHS, - const KnownBits &RHS) { - KnownBits NRHS; - NRHS.Zero = RHS.One; - NRHS.One = RHS.Zero; - return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, NRHS, false, - true); -} - +static APInt determineLiveOperandBitsAddCarry(unsigned OperandNo, + const APInt &AOut, + const KnownBits &LHS, + const KnownBits &RHS, + bool CarryZero, bool CarryOne) { + assert(!(CarryZero && CarryOne) && + "Carry can't be zero and one at the same time"); + + // The following check should be done by the caller, as it also indicates + // that LHS and RHS don't need to be computed. + // + // if (AOut.isMask()) + // return AOut; + + // Boundary bits' carry out is unaffected by their carry in. + APInt Bound = (LHS.Zero & RHS.Zero) | (LHS.One & RHS.One); + + // First, the alive carry bits are determined from the alive output bits: + // Let demand ripple to the right but only up to any set bit in Bound. + // AOut = -1---- + // Bound = ----1- + // ACarry&~AOut = --111- + APInt RBound = Bound.reverseBits(); + APInt RAOut = AOut.reverseBits(); + APInt RProp = RAOut + (RAOut | ~RBound); + APInt RACarry = RProp ^ ~RBound; + APInt ACarry = RACarry.reverseBits(); + + // Then, the alive input bits are determined from the alive carry bits: + APInt NeededToMaintainCarryZero; + APInt NeededToMaintainCarryOne; + if (OperandNo == 0) { + NeededToMaintainCarryZero = LHS.Zero | ~RHS.Zero; + NeededToMaintainCarryOne = LHS.One | ~RHS.One; + } else { + NeededToMaintainCarryZero = RHS.Zero | ~LHS.Zero; + NeededToMaintainCarryOne = RHS.One | ~LHS.One; + } + + // As in computeForAddCarry + APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero; + APInt PossibleSumOne = LHS.One + RHS.One + CarryOne; + + // The below is simplified from + // + // APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero); + // APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One; + // APInt CarryUnknown = ~(CarryKnownZero | CarryKnownOne); + // + // APInt NeededToMaintainCarry = + // (CarryKnownZero & NeededToMaintainCarryZero) | + // (CarryKnownOne & NeededToMaintainCarryOne) | + // CarryUnknown; + + APInt NeededToMaintainCarry = (~PossibleSumZero | NeededToMaintainCarryZero) & + (PossibleSumOne | NeededToMaintainCarryOne); + + APInt AB = AOut | (ACarry & NeededToMaintainCarry); + return AB; +} + +APInt DemandedBits::determineLiveOperandBitsAdd(unsigned OperandNo, + const APInt &AOut, + const KnownBits &LHS, + const KnownBits &RHS) { + return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, RHS, true, + false); +} + +APInt DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo, + const APInt &AOut, + const KnownBits &LHS, + const KnownBits &RHS) { + KnownBits NRHS; + NRHS.Zero = RHS.One; + NRHS.One = RHS.Zero; + return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, NRHS, false, + true); +} + FunctionPass *llvm::createDemandedBitsWrapperPass() { return new DemandedBitsWrapperPass(); } diff --git a/contrib/libs/llvm12/lib/Analysis/DependenceAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/DependenceAnalysis.cpp index c2c61131e4..b637ea7665 100644 --- a/contrib/libs/llvm12/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/DependenceAnalysis.cpp @@ -653,16 +653,16 @@ static AliasResult underlyingObjectsAlias(AAResults *AA, const MemoryLocation &LocB) { // Check the original locations (minus size) for noalias, which can happen for // tbaa, incompatible underlying object locations, etc. - MemoryLocation LocAS = - MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); - MemoryLocation LocBS = - MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); + MemoryLocation LocAS = + MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); + MemoryLocation LocBS = + MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); if (AA->alias(LocAS, LocBS) == NoAlias) return NoAlias; // Check the underlying objects are the same - const Value *AObj = getUnderlyingObject(LocA.Ptr); - const Value *BObj = getUnderlyingObject(LocB.Ptr); + const Value *AObj = getUnderlyingObject(LocA.Ptr); + const Value *BObj = getUnderlyingObject(LocB.Ptr); // If the underlying objects are the same, they must alias if (AObj == BObj) @@ -873,8 +873,8 @@ void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { const SCEV *Dst = Pair->Dst; if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { - const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); - const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); + const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); + const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); const SCEV *SrcCastOp = SrcCast->getOperand(); const SCEV *DstCastOp = DstCast->getOperand(); if (SrcCastOp->getType() == DstCastOp->getType()) { @@ -971,8 +971,8 @@ bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, isa<SCEVSignExtendExpr>(Y)) || (isa<SCEVZeroExtendExpr>(X) && isa<SCEVZeroExtendExpr>(Y))) { - const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); - const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); + const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); + const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); const SCEV *Xop = CX->getOperand(); const SCEV *Yop = CY->getOperand(); if (Xop->getType() == Yop->getType()) { @@ -1531,18 +1531,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // test(BM/G, LM-X) and test(-BM/G, X-UM) APInt TMUL = BM.sdiv(G); if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (UMvalid) { - TU = APIntOps::smin(TU, floorOfQuotient(UM - X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(UM - X, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { - TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (UMvalid) { - TL = APIntOps::smax(TL, ceilingOfQuotient(UM - X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(UM - X, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } @@ -1550,18 +1550,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // test(AM/G, LM-Y) and test(-AM/G, Y-UM) TMUL = AM.sdiv(G); if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (UMvalid) { - TU = APIntOps::smin(TU, floorOfQuotient(UM - Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(UM - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { - TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (UMvalid) { - TL = APIntOps::smax(TL, ceilingOfQuotient(UM - Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(UM - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } @@ -1580,11 +1580,11 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); TMUL = AM - BM; if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y + 1, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y + 1, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { - TU = APIntOps::smin(TU, floorOfQuotient(X - Y + 1, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(X - Y + 1, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { @@ -1597,20 +1597,20 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, TL = SaveTL; LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { - TU = APIntOps::smin(TU, floorOfQuotient(X - Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(X - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } TMUL = BM - AM; if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { - TU = APIntOps::smin(TU, floorOfQuotient(Y - X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(Y - X, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { @@ -1623,11 +1623,11 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, TL = SaveTL; LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X + 1, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X + 1, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { - TU = APIntOps::smin(TU, floorOfQuotient(Y - X + 1, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(Y - X + 1, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { @@ -1939,18 +1939,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // test(BM/G, LM-X) and test(-BM/G, X-UM) APInt TMUL = BM.sdiv(G); if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (SrcUMvalid) { - TU = APIntOps::smin(TU, floorOfQuotient(SrcUM - X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(SrcUM - X, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { - TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (SrcUMvalid) { - TL = APIntOps::smax(TL, ceilingOfQuotient(SrcUM - X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(SrcUM - X, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } @@ -1958,18 +1958,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // test(AM/G, LM-Y) and test(-AM/G, Y-UM) TMUL = AM.sdiv(G); if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (DstUMvalid) { - TU = APIntOps::smin(TU, floorOfQuotient(DstUM - Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(DstUM - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { - TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (DstUMvalid) { - TL = APIntOps::smax(TL, ceilingOfQuotient(DstUM - Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(DstUM - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } diff --git a/contrib/libs/llvm12/lib/Analysis/DependenceGraphBuilder.cpp b/contrib/libs/llvm12/lib/Analysis/DependenceGraphBuilder.cpp index 6b90db4baf..0851a5d98c 100644 --- a/contrib/libs/llvm12/lib/Analysis/DependenceGraphBuilder.cpp +++ b/contrib/libs/llvm12/lib/Analysis/DependenceGraphBuilder.cpp @@ -140,74 +140,74 @@ template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() { if (*N == PiNode || NodesInSCC.count(N)) continue; - enum Direction { - Incoming, // Incoming edges to the SCC - Outgoing, // Edges going ot of the SCC - DirectionCount // To make the enum usable as an array index. - }; - - // Use these flags to help us avoid creating redundant edges. If there - // are more than one edges from an outside node to inside nodes, we only - // keep one edge from that node to the pi-block node. Similarly, if - // there are more than one edges from inside nodes to an outside node, - // we only keep one edge from the pi-block node to the outside node. - // There is a flag defined for each direction (incoming vs outgoing) and - // for each type of edge supported, using a two-dimensional boolean - // array. - using EdgeKind = typename EdgeType::EdgeKind; - EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false, - false}; - - auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst, - const EdgeKind K) { - switch (K) { - case EdgeKind::RegisterDefUse: - createDefUseEdge(Src, Dst); - break; - case EdgeKind::MemoryDependence: - createMemoryEdge(Src, Dst); - break; - case EdgeKind::Rooted: - createRootedEdge(Src, Dst); - break; - default: - llvm_unreachable("Unsupported type of edge."); - } - }; - - auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New, - const Direction Dir) { - if (!Src->hasEdgeTo(*Dst)) - return; - LLVM_DEBUG( - dbgs() << "reconnecting(" - << (Dir == Direction::Incoming ? "incoming)" : "outgoing)") - << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New - << "\n"); - assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) && - "Invalid direction."); - - SmallVector<EdgeType *, 10> EL; - Src->findEdgesTo(*Dst, EL); - for (EdgeType *OldEdge : EL) { - EdgeKind Kind = OldEdge->getKind(); - if (!EdgeAlreadyCreated[Dir][Kind]) { - if (Dir == Direction::Incoming) { - createEdgeOfKind(*Src, *New, Kind); - LLVM_DEBUG(dbgs() << "created edge from Src to New.\n"); - } else if (Dir == Direction::Outgoing) { - createEdgeOfKind(*New, *Dst, Kind); - LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n"); + enum Direction { + Incoming, // Incoming edges to the SCC + Outgoing, // Edges going ot of the SCC + DirectionCount // To make the enum usable as an array index. + }; + + // Use these flags to help us avoid creating redundant edges. If there + // are more than one edges from an outside node to inside nodes, we only + // keep one edge from that node to the pi-block node. Similarly, if + // there are more than one edges from inside nodes to an outside node, + // we only keep one edge from the pi-block node to the outside node. + // There is a flag defined for each direction (incoming vs outgoing) and + // for each type of edge supported, using a two-dimensional boolean + // array. + using EdgeKind = typename EdgeType::EdgeKind; + EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false, + false}; + + auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst, + const EdgeKind K) { + switch (K) { + case EdgeKind::RegisterDefUse: + createDefUseEdge(Src, Dst); + break; + case EdgeKind::MemoryDependence: + createMemoryEdge(Src, Dst); + break; + case EdgeKind::Rooted: + createRootedEdge(Src, Dst); + break; + default: + llvm_unreachable("Unsupported type of edge."); + } + }; + + auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New, + const Direction Dir) { + if (!Src->hasEdgeTo(*Dst)) + return; + LLVM_DEBUG( + dbgs() << "reconnecting(" + << (Dir == Direction::Incoming ? "incoming)" : "outgoing)") + << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New + << "\n"); + assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) && + "Invalid direction."); + + SmallVector<EdgeType *, 10> EL; + Src->findEdgesTo(*Dst, EL); + for (EdgeType *OldEdge : EL) { + EdgeKind Kind = OldEdge->getKind(); + if (!EdgeAlreadyCreated[Dir][Kind]) { + if (Dir == Direction::Incoming) { + createEdgeOfKind(*Src, *New, Kind); + LLVM_DEBUG(dbgs() << "created edge from Src to New.\n"); + } else if (Dir == Direction::Outgoing) { + createEdgeOfKind(*New, *Dst, Kind); + LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n"); } - EdgeAlreadyCreated[Dir][Kind] = true; + EdgeAlreadyCreated[Dir][Kind] = true; } - Src->removeEdge(*OldEdge); - destroyEdge(*OldEdge); - LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n"); - } - }; + Src->removeEdge(*OldEdge); + destroyEdge(*OldEdge); + LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n"); + } + }; - for (NodeType *SCCNode : NL) { + for (NodeType *SCCNode : NL) { // Process incoming edges incident to the pi-block node. reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming); @@ -491,14 +491,14 @@ void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() { // Put members of the pi-block right after the pi-block itself, for // convenience. const NodeListType &PiBlockMembers = getNodesInPiBlock(*N); - llvm::append_range(NodesInPO, PiBlockMembers); + llvm::append_range(NodesInPO, PiBlockMembers); } NodesInPO.push_back(N); } size_t OldSize = Graph.Nodes.size(); Graph.Nodes.clear(); - append_range(Graph.Nodes, reverse(NodesInPO)); + append_range(Graph.Nodes, reverse(NodesInPO)); if (Graph.Nodes.size() != OldSize) assert(false && "Expected the number of nodes to stay the same after the sort"); diff --git a/contrib/libs/llvm12/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/contrib/libs/llvm12/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index e138e82c8b..728c83a6d6 100644 --- a/contrib/libs/llvm12/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/contrib/libs/llvm12/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -1,531 +1,531 @@ -//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a model runner using Tensorflow C APIs, allowing the -// loading of a model from a command line option. -// -//===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#if defined(LLVM_HAVE_TF_API) - -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" -#include "llvm/Analysis/MLInlineAdvisor.h" -#include "llvm/Analysis/Utils/TFUtils.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ManagedStatic.h" - -#include <vector> - -using namespace llvm; - -static cl::opt<std::string> TrainingLog( - "training-log", cl::Hidden, - cl::desc("Path where the development - mode inlining log is saved.")); - -static cl::opt<std::string> TFModelUnderTrainingPath( - "ml-inliner-model-under-training", cl::Hidden, - cl::desc(R"(Path to SavedModel from the previous training iteration. -The directory is also expected to contain a JSON specification of the -outputs expected to be logged, where the first entry must be the -inlining decision. The file containing the specification should be -called output_spec.json. The expected JSON value is an array of -dictionaries. Each dictionary should have 2 keys: - -- "tensor_spec, followed by the TensorSpec description of the -output; and -- "logging_name", a string indicating the name to use when -logging the output values. - -Example: -[ - { - "logging_name" : "some_name", - "tensor_spec" : { - "name" : "model_name", - "port" : 0, - "shape" : [2, 3], - "type" : "float" - } - } -] - -The first value must always correspond to the decision.)")); - -static cl::opt<std::string> TFOutputSpecOverride( - "ml-inliner-output-spec-override", cl::Hidden, - cl::desc("Override the path to the output spec json file. See " - "-ml-inliner-model-under-training documentation for the " - "specification of that file.")); - -static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", - cl::Hidden, cl::init("action_"), - cl::desc("Prefix for feature names.")); - -namespace { -/// An InlineEvent, used by TrainingLogger. -struct InlineEvent { - /// What the default policy's decision would have been. - int64_t DefaultDecision = 0; - - /// What we advised. When training off the default policy, this is the same as - /// DefaultDecision. - int64_t AdvisedDecision = 0; - - /// What actually happened. This would be 'false' in the case of an inline - /// error, even if AdvisedDecision were true, otherwise it agrees with - /// AdvisedDecision. - bool Effect = false; - - /// What the change in size was: size_after - size_before - int64_t Reward = 0; -}; - -/// Collect data we may use for training a model, and write it as a textual -/// Tensorflow SequenceExample -/// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) -/// protobuf (https://developers.google.com/protocol-buffers). -/// Because this is a protobuf, we cannot just stream the events as they come. -/// Internally, TrainingLogger stores data in column-major format, because that -/// lines up with how TF SequenceExample represents it. -class ModelUnderTrainingRunner; -class TrainingLogger final { -public: - TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); - - /// Log one inlining event. - void logInlineEvent(const InlineEvent &Event, - const MLModelRunner &ModelRunner); - - /// Print the stored tensors. - void print(); - -private: - StringRef LogFileName; - const ModelUnderTrainingRunner *const MUTR; - std::unique_ptr<Logger> L; - std::vector<bool> Effects; - /// There's at least one output. We'll set this to a different value if MUTR - /// is avaliable. - size_t OutputCount = 1; - /// Set these 2 clearly OOB, to make sure we set them later. - size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); - size_t DecisionPos = std::numeric_limits<size_t>::max(); -}; - -/// An extension of the MLInlineAdvisor for the 'development' mode, targeting -/// the offline training scenario. Note that training happens outside of the -/// compiler, this facility is concerned with producing training data ("logs"). -/// This InlineAdvisor can operate in the following modes: -/// -/// 1) collect logs for the default policy. This is useful for bootstrapping -/// training, which will be considerably faster by starting from a reasonable -/// policy. -/// -/// 2) collect logs for the ML policy, using a model from a previous -/// training. Potentially, that model uses internally some small random -/// perturbation of its weights, to induce exploration (setting this up is the -/// responsibility of the training algorithm). The logs would then be used to -/// retrain and improve on this model. -/// -/// 3) use the provided model, with no logging. This is useful for end to end -/// validation - the model, in this case, is a release candidate and shouldn't -/// have random perturbations. It is a convenience feature: rather than needing -/// to take the release candidate model and compile it in 'release' mode, -/// validate it, then potentially discard it, it's easier to just pass the model -/// to the compiler, albeit compilation would be slower, as a one-off. Once the -/// model behaves satisfactorily, it can be compiled AOT, for efficiency, in -/// release mode. The expectation is that a well-trained model provides a good -/// policy over a sufficiently diverse codebase, over many changes (i.e. -/// training happens seldom). -class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { -public: - DevelopmentModeMLInlineAdvisor( - Module &M, ModuleAnalysisManager &MAM, - std::unique_ptr<MLModelRunner> ModelRunner, - std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, - std::unique_ptr<TrainingLogger> Logger); - - size_t getTotalSizeEstimate(); - - virtual ~DevelopmentModeMLInlineAdvisor(); - void updateNativeSizeEstimate(int64_t Change) { - *CurrentNativeSize += Change; - } - void resetNativeSize(Function *F) { - FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); - } - - std::unique_ptr<MLInlineAdvice> - getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; - - Optional<size_t> getNativeSizeEstimate(const Function &F) const; - -private: - bool isLogging() const { return !!Logger; } - std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; - - std::function<bool(CallBase &)> GetDefaultAdvice; - const bool IsDoingInference; - std::unique_ptr<TrainingLogger> Logger; - - const Optional<int32_t> InitialNativeSize; - Optional<int32_t> CurrentNativeSize; -}; - -/// A variant of MLInlineAdvice that tracks all non-trivial inlining -/// decisions, for training/logging. -class LoggingMLInlineAdvice : public MLInlineAdvice { -public: - LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, - OptimizationRemarkEmitter &ORE, bool Recommendation, - TrainingLogger &Logger, - Optional<size_t> CallerSizeEstimateBefore, - Optional<size_t> CalleeSizeEstimateBefore, - bool DefaultDecision, bool Mandatory = false) - : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), - CallerSizeEstimateBefore(CallerSizeEstimateBefore), - CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), - DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} - - virtual ~LoggingMLInlineAdvice() = default; - -private: - DevelopmentModeMLInlineAdvisor *getAdvisor() const { - return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); - } - void recordInliningImpl() override { - MLInlineAdvice::recordInliningImpl(); - getAdvisor()->resetNativeSize(Caller); - int Reward = std::numeric_limits<int>::max(); - if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && - !getAdvisor()->isForcedToStop()) { - int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + - *CalleeSizeEstimateBefore; - Reward = NativeSizeAfter - - (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); - getAdvisor()->updateNativeSizeEstimate(Reward); - } - log(Reward, /*Success=*/true); - } - - void recordInliningWithCalleeDeletedImpl() override { - MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); - getAdvisor()->resetNativeSize(Caller); - if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && - !getAdvisor()->isForcedToStop()) { - int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); - int Reward = NativeSizeAfter - - (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); - getAdvisor()->updateNativeSizeEstimate(Reward); - log(Reward, /*Success=*/true); - } - } - - void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { - MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); - log(NoReward, /*Success=*/false); - } - - void recordUnattemptedInliningImpl() override { - MLInlineAdvice::recordUnattemptedInliningImpl(); - log(NoReward, /*Success=*/false); - } - - void log(int64_t Reward, bool Success) { - if (Mandatory) - return; - InlineEvent Event; - Event.AdvisedDecision = isInliningRecommended(); - Event.DefaultDecision = DefaultDecision; - Event.Effect = Success; - Event.Reward = Reward; - Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); - } - - static const int64_t NoReward = 0; - TrainingLogger &Logger; - const Optional<size_t> CallerSizeEstimateBefore; - const Optional<size_t> CalleeSizeEstimateBefore; - const int64_t DefaultDecision; - const int64_t Mandatory; -}; - -/// A pseudo model runner. We use it to store feature values when collecting -/// logs for the default policy, but never ask it to 'run'. -class NoInferenceModelRunner : public MLModelRunner { -public: - NoInferenceModelRunner(LLVMContext &Ctx) - : MLModelRunner(Ctx), Features(NumberOfFeatures) {} - void setFeature(FeatureIndex Index, int64_t Value) override { - Features[static_cast<int>(Index)] = Value; - } - - int64_t getFeature(int Index) const override { return Features[Index]; } - bool run() override { - llvm_unreachable("We shouldn't call run on this model runner."); - } - -private: - InlineFeatures Features; -}; - -/// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs -/// to dynamically load and evaluate a TF SavedModel -/// (https://www.tensorflow.org/guide/saved_model). Runtime performance is -/// sacrificed for ease of use while training. -class ModelUnderTrainingRunner final : public MLModelRunner { -public: - ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); - - bool run() override; - - // Disallows copy and assign. - ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; - ModelUnderTrainingRunner & - operator=(const ModelUnderTrainingRunner &) = delete; - - void setFeature(FeatureIndex Index, int64_t Value) override; - int64_t getFeature(int Index) const override; - bool isValid() const { return !!Evaluator; } - - const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const { - return OutputSpecs; - } - - const Optional<TFModelEvaluator::EvaluationResult> & - lastEvaluationResult() const { - return LastEvaluationResult; - } - -private: - std::unique_ptr<TFModelEvaluator> Evaluator; - std::vector<LoggedFeatureSpec> OutputSpecs; - Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult; - - // The training framework needs some additional features. - const std::vector<TensorSpec> TrainingOnlyFeatures{ - TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), - TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), - TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), - TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; -}; -} // namespace - -TrainingLogger::TrainingLogger(StringRef LogFileName, - const ModelUnderTrainingRunner *MUTR) - : LogFileName(LogFileName), MUTR(MUTR) { - // The first output is the inlining decision. - if (MUTR) - OutputCount = MUTR->outputLoggedFeatureSpecs().size(); - std::vector<LoggedFeatureSpec> FT; - - for (size_t I = 0; I < NumberOfFeatures; ++I) - FT.push_back( - {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None}); - if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) - append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs())); - - DefaultDecisionPos = FT.size(); - FT.push_back( - {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None}); - - DecisionPos = FT.size(); - FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None}); - - L = std::make_unique<Logger>( - FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), - InlineSizeEstimatorAnalysis::isEvaluatorRequested()); -} - -/// Log one inlining event. -void TrainingLogger::logInlineEvent(const InlineEvent &Event, - const MLModelRunner &ModelRunner) { - size_t CurrentFeature = 0; - for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { - int64_t F = ModelRunner.getFeature(CurrentFeature); - L->logTensorValue(CurrentFeature, &F); - } - - for (size_t I = 1; I < OutputCount; ++I) { - const auto &Result = *MUTR->lastEvaluationResult(); - auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec; - const char *RawData = - reinterpret_cast<const char *>(Result.getUntypedTensorValue(I)); - L->logTensorValue(CurrentFeature, RawData, - Spec.getElementCount() * Spec.getElementByteSize()); - ++CurrentFeature; - } - - assert(CurrentFeature == DefaultDecisionPos); - L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision); - L->logTensorValue(DecisionPos, &Event.AdvisedDecision); - if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) - L->logReward(Event.Reward); - - // For debugging / later use - Effects.push_back(Event.Effect); -} - -void TrainingLogger::print() { - std::error_code EC; - raw_fd_ostream OutFile(LogFileName, EC); - L->print(OutFile); -} - -DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( - Module &M, ModuleAnalysisManager &MAM, - std::unique_ptr<MLModelRunner> ModelRunner, - std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, - std::unique_ptr<TrainingLogger> Logger) - : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), - GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), - Logger(std::move(Logger)), - InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), - CurrentNativeSize(InitialNativeSize) { - // We cannot have the case of neither inference nor logging. - assert(IsDoingInference || isLogging()); -} - -DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { - if (isLogging()) - Logger->print(); -} - -Optional<size_t> -DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { - if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) - return None; - auto &R = - FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); - if (!R) { - F.getParent()->getContext().emitError( - "Native size estimator is not present."); - return 0; - } - return *R; -} - -std::unique_ptr<MLInlineAdvice> -DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { - return std::make_unique<LoggingMLInlineAdvice>( - /*Advisor=*/this, - /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, - /*Logger=*/*Logger, - /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), - /*CalleeSizeEstimateBefore=*/ - getNativeSizeEstimate(*CB.getCalledFunction()), - /*DefaultDecision=*/true, /*Mandatory*/ true); -} - -std::unique_ptr<MLInlineAdvice> -DevelopmentModeMLInlineAdvisor::getAdviceFromModel( - CallBase &CB, OptimizationRemarkEmitter &ORE) { - if (IsDoingInference && !isLogging()) - return MLInlineAdvisor::getAdviceFromModel(CB, ORE); - - bool DefaultAdvice = GetDefaultAdvice(CB); - auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; - return std::make_unique<LoggingMLInlineAdvice>( - /*Advisor=*/this, - /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, - /*Logger=*/*Logger, - /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), - /*CalleeSizeEstimateBefore=*/ - getNativeSizeEstimate(*CB.getCalledFunction()), - /*DefaultDecision=*/DefaultAdvice); -} - -size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { - if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) - return 0; - size_t Ret = 0; - for (auto &F : M) { - if (F.isDeclaration()) - continue; - if (isFunctionDeleted(&F)) - continue; - Ret += *getNativeSizeEstimate(F); - } - return Ret; -} - -ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, - const std::string &ModelPath) - : MLModelRunner(Ctx) { - std::vector<TensorSpec> InputSpecs; - for (size_t I = 0; I < NumberOfFeatures; ++I) - InputSpecs.push_back( - TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); - append_range(InputSpecs, TrainingOnlyFeatures); - if (auto MaybeOutSpecs = - loadOutputSpecs(Ctx, DecisionName, ModelPath, TFOutputSpecOverride)) - OutputSpecs = std::move(*MaybeOutSpecs); - else - return; - - Evaluator = std::make_unique<TFModelEvaluator>( - ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; }, - OutputSpecs.size()); - if (!Evaluator || !Evaluator->isValid()) { - Ctx.emitError("Failed to create inliner saved model evaluator"); - Evaluator.reset(); - return; - } -} - -bool ModelUnderTrainingRunner::run() { - LastEvaluationResult = Evaluator->evaluate(); - if (!LastEvaluationResult.hasValue()) { - Ctx.emitError("Error evaluating model."); - return false; - } - int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0); - return static_cast<bool>(Decision); -} - -int64_t ModelUnderTrainingRunner::getFeature(int Index) const { - return *Evaluator->getInput<int64_t>(Index); -} - -void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { - size_t NumericIndex = static_cast<size_t>(Index); - *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; -} - -std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( - Module &M, ModuleAnalysisManager &MAM, - std::function<bool(CallBase &)> GetDefaultAdvice) { - auto &Ctx = M.getContext(); - std::unique_ptr<MLModelRunner> Runner; - ModelUnderTrainingRunner *MUTRPtr = nullptr; - bool IsDoingInference = false; - if (TFModelUnderTrainingPath.empty()) - Runner.reset(new NoInferenceModelRunner(Ctx)); - else { - auto MUTR = std::make_unique<ModelUnderTrainingRunner>( - Ctx, TFModelUnderTrainingPath); - if (!MUTR || !MUTR->isValid()) { - Ctx.emitError("Could not load the policy model from the provided path"); - return nullptr; - } - IsDoingInference = true; - MUTRPtr = MUTR.get(); - Runner = std::move(MUTR); - } - std::unique_ptr<TrainingLogger> Logger; - if (!TrainingLog.empty()) - Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr); - - return std::make_unique<DevelopmentModeMLInlineAdvisor>( - M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference, - std::move(Logger)); -} -#endif // defined(LLVM_HAVE_TF_API) +//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a model runner using Tensorflow C APIs, allowing the +// loading of a model from a command line option. +// +//===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" +#if defined(LLVM_HAVE_TF_API) + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" +#include "llvm/Analysis/MLInlineAdvisor.h" +#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" + +#include <vector> + +using namespace llvm; + +static cl::opt<std::string> TrainingLog( + "training-log", cl::Hidden, + cl::desc("Path where the development - mode inlining log is saved.")); + +static cl::opt<std::string> TFModelUnderTrainingPath( + "ml-inliner-model-under-training", cl::Hidden, + cl::desc(R"(Path to SavedModel from the previous training iteration. +The directory is also expected to contain a JSON specification of the +outputs expected to be logged, where the first entry must be the +inlining decision. The file containing the specification should be +called output_spec.json. The expected JSON value is an array of +dictionaries. Each dictionary should have 2 keys: + +- "tensor_spec, followed by the TensorSpec description of the +output; and +- "logging_name", a string indicating the name to use when +logging the output values. + +Example: +[ + { + "logging_name" : "some_name", + "tensor_spec" : { + "name" : "model_name", + "port" : 0, + "shape" : [2, 3], + "type" : "float" + } + } +] + +The first value must always correspond to the decision.)")); + +static cl::opt<std::string> TFOutputSpecOverride( + "ml-inliner-output-spec-override", cl::Hidden, + cl::desc("Override the path to the output spec json file. See " + "-ml-inliner-model-under-training documentation for the " + "specification of that file.")); + +static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", + cl::Hidden, cl::init("action_"), + cl::desc("Prefix for feature names.")); + +namespace { +/// An InlineEvent, used by TrainingLogger. +struct InlineEvent { + /// What the default policy's decision would have been. + int64_t DefaultDecision = 0; + + /// What we advised. When training off the default policy, this is the same as + /// DefaultDecision. + int64_t AdvisedDecision = 0; + + /// What actually happened. This would be 'false' in the case of an inline + /// error, even if AdvisedDecision were true, otherwise it agrees with + /// AdvisedDecision. + bool Effect = false; + + /// What the change in size was: size_after - size_before + int64_t Reward = 0; +}; + +/// Collect data we may use for training a model, and write it as a textual +/// Tensorflow SequenceExample +/// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) +/// protobuf (https://developers.google.com/protocol-buffers). +/// Because this is a protobuf, we cannot just stream the events as they come. +/// Internally, TrainingLogger stores data in column-major format, because that +/// lines up with how TF SequenceExample represents it. +class ModelUnderTrainingRunner; +class TrainingLogger final { +public: + TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); + + /// Log one inlining event. + void logInlineEvent(const InlineEvent &Event, + const MLModelRunner &ModelRunner); + + /// Print the stored tensors. + void print(); + +private: + StringRef LogFileName; + const ModelUnderTrainingRunner *const MUTR; + std::unique_ptr<Logger> L; + std::vector<bool> Effects; + /// There's at least one output. We'll set this to a different value if MUTR + /// is avaliable. + size_t OutputCount = 1; + /// Set these 2 clearly OOB, to make sure we set them later. + size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); + size_t DecisionPos = std::numeric_limits<size_t>::max(); +}; + +/// An extension of the MLInlineAdvisor for the 'development' mode, targeting +/// the offline training scenario. Note that training happens outside of the +/// compiler, this facility is concerned with producing training data ("logs"). +/// This InlineAdvisor can operate in the following modes: +/// +/// 1) collect logs for the default policy. This is useful for bootstrapping +/// training, which will be considerably faster by starting from a reasonable +/// policy. +/// +/// 2) collect logs for the ML policy, using a model from a previous +/// training. Potentially, that model uses internally some small random +/// perturbation of its weights, to induce exploration (setting this up is the +/// responsibility of the training algorithm). The logs would then be used to +/// retrain and improve on this model. +/// +/// 3) use the provided model, with no logging. This is useful for end to end +/// validation - the model, in this case, is a release candidate and shouldn't +/// have random perturbations. It is a convenience feature: rather than needing +/// to take the release candidate model and compile it in 'release' mode, +/// validate it, then potentially discard it, it's easier to just pass the model +/// to the compiler, albeit compilation would be slower, as a one-off. Once the +/// model behaves satisfactorily, it can be compiled AOT, for efficiency, in +/// release mode. The expectation is that a well-trained model provides a good +/// policy over a sufficiently diverse codebase, over many changes (i.e. +/// training happens seldom). +class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { +public: + DevelopmentModeMLInlineAdvisor( + Module &M, ModuleAnalysisManager &MAM, + std::unique_ptr<MLModelRunner> ModelRunner, + std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, + std::unique_ptr<TrainingLogger> Logger); + + size_t getTotalSizeEstimate(); + + virtual ~DevelopmentModeMLInlineAdvisor(); + void updateNativeSizeEstimate(int64_t Change) { + *CurrentNativeSize += Change; + } + void resetNativeSize(Function *F) { + FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); + } + + std::unique_ptr<MLInlineAdvice> + getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; + + Optional<size_t> getNativeSizeEstimate(const Function &F) const; + +private: + bool isLogging() const { return !!Logger; } + std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; + + std::function<bool(CallBase &)> GetDefaultAdvice; + const bool IsDoingInference; + std::unique_ptr<TrainingLogger> Logger; + + const Optional<int32_t> InitialNativeSize; + Optional<int32_t> CurrentNativeSize; +}; + +/// A variant of MLInlineAdvice that tracks all non-trivial inlining +/// decisions, for training/logging. +class LoggingMLInlineAdvice : public MLInlineAdvice { +public: + LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, + OptimizationRemarkEmitter &ORE, bool Recommendation, + TrainingLogger &Logger, + Optional<size_t> CallerSizeEstimateBefore, + Optional<size_t> CalleeSizeEstimateBefore, + bool DefaultDecision, bool Mandatory = false) + : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), + CallerSizeEstimateBefore(CallerSizeEstimateBefore), + CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), + DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} + + virtual ~LoggingMLInlineAdvice() = default; + +private: + DevelopmentModeMLInlineAdvisor *getAdvisor() const { + return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); + } + void recordInliningImpl() override { + MLInlineAdvice::recordInliningImpl(); + getAdvisor()->resetNativeSize(Caller); + int Reward = std::numeric_limits<int>::max(); + if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && + !getAdvisor()->isForcedToStop()) { + int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + + *CalleeSizeEstimateBefore; + Reward = NativeSizeAfter - + (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); + getAdvisor()->updateNativeSizeEstimate(Reward); + } + log(Reward, /*Success=*/true); + } + + void recordInliningWithCalleeDeletedImpl() override { + MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); + getAdvisor()->resetNativeSize(Caller); + if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && + !getAdvisor()->isForcedToStop()) { + int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); + int Reward = NativeSizeAfter - + (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); + getAdvisor()->updateNativeSizeEstimate(Reward); + log(Reward, /*Success=*/true); + } + } + + void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { + MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); + log(NoReward, /*Success=*/false); + } + + void recordUnattemptedInliningImpl() override { + MLInlineAdvice::recordUnattemptedInliningImpl(); + log(NoReward, /*Success=*/false); + } + + void log(int64_t Reward, bool Success) { + if (Mandatory) + return; + InlineEvent Event; + Event.AdvisedDecision = isInliningRecommended(); + Event.DefaultDecision = DefaultDecision; + Event.Effect = Success; + Event.Reward = Reward; + Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); + } + + static const int64_t NoReward = 0; + TrainingLogger &Logger; + const Optional<size_t> CallerSizeEstimateBefore; + const Optional<size_t> CalleeSizeEstimateBefore; + const int64_t DefaultDecision; + const int64_t Mandatory; +}; + +/// A pseudo model runner. We use it to store feature values when collecting +/// logs for the default policy, but never ask it to 'run'. +class NoInferenceModelRunner : public MLModelRunner { +public: + NoInferenceModelRunner(LLVMContext &Ctx) + : MLModelRunner(Ctx), Features(NumberOfFeatures) {} + void setFeature(FeatureIndex Index, int64_t Value) override { + Features[static_cast<int>(Index)] = Value; + } + + int64_t getFeature(int Index) const override { return Features[Index]; } + bool run() override { + llvm_unreachable("We shouldn't call run on this model runner."); + } + +private: + InlineFeatures Features; +}; + +/// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs +/// to dynamically load and evaluate a TF SavedModel +/// (https://www.tensorflow.org/guide/saved_model). Runtime performance is +/// sacrificed for ease of use while training. +class ModelUnderTrainingRunner final : public MLModelRunner { +public: + ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); + + bool run() override; + + // Disallows copy and assign. + ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; + ModelUnderTrainingRunner & + operator=(const ModelUnderTrainingRunner &) = delete; + + void setFeature(FeatureIndex Index, int64_t Value) override; + int64_t getFeature(int Index) const override; + bool isValid() const { return !!Evaluator; } + + const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const { + return OutputSpecs; + } + + const Optional<TFModelEvaluator::EvaluationResult> & + lastEvaluationResult() const { + return LastEvaluationResult; + } + +private: + std::unique_ptr<TFModelEvaluator> Evaluator; + std::vector<LoggedFeatureSpec> OutputSpecs; + Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult; + + // The training framework needs some additional features. + const std::vector<TensorSpec> TrainingOnlyFeatures{ + TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), + TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), + TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), + TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; +}; +} // namespace + +TrainingLogger::TrainingLogger(StringRef LogFileName, + const ModelUnderTrainingRunner *MUTR) + : LogFileName(LogFileName), MUTR(MUTR) { + // The first output is the inlining decision. + if (MUTR) + OutputCount = MUTR->outputLoggedFeatureSpecs().size(); + std::vector<LoggedFeatureSpec> FT; + + for (size_t I = 0; I < NumberOfFeatures; ++I) + FT.push_back( + {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None}); + if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) + append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs())); + + DefaultDecisionPos = FT.size(); + FT.push_back( + {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None}); + + DecisionPos = FT.size(); + FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None}); + + L = std::make_unique<Logger>( + FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), + InlineSizeEstimatorAnalysis::isEvaluatorRequested()); +} + +/// Log one inlining event. +void TrainingLogger::logInlineEvent(const InlineEvent &Event, + const MLModelRunner &ModelRunner) { + size_t CurrentFeature = 0; + for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { + int64_t F = ModelRunner.getFeature(CurrentFeature); + L->logTensorValue(CurrentFeature, &F); + } + + for (size_t I = 1; I < OutputCount; ++I) { + const auto &Result = *MUTR->lastEvaluationResult(); + auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec; + const char *RawData = + reinterpret_cast<const char *>(Result.getUntypedTensorValue(I)); + L->logTensorValue(CurrentFeature, RawData, + Spec.getElementCount() * Spec.getElementByteSize()); + ++CurrentFeature; + } + + assert(CurrentFeature == DefaultDecisionPos); + L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision); + L->logTensorValue(DecisionPos, &Event.AdvisedDecision); + if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) + L->logReward(Event.Reward); + + // For debugging / later use + Effects.push_back(Event.Effect); +} + +void TrainingLogger::print() { + std::error_code EC; + raw_fd_ostream OutFile(LogFileName, EC); + L->print(OutFile); +} + +DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( + Module &M, ModuleAnalysisManager &MAM, + std::unique_ptr<MLModelRunner> ModelRunner, + std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, + std::unique_ptr<TrainingLogger> Logger) + : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), + GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), + Logger(std::move(Logger)), + InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), + CurrentNativeSize(InitialNativeSize) { + // We cannot have the case of neither inference nor logging. + assert(IsDoingInference || isLogging()); +} + +DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { + if (isLogging()) + Logger->print(); +} + +Optional<size_t> +DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { + if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) + return None; + auto &R = + FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); + if (!R) { + F.getParent()->getContext().emitError( + "Native size estimator is not present."); + return 0; + } + return *R; +} + +std::unique_ptr<MLInlineAdvice> +DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { + return std::make_unique<LoggingMLInlineAdvice>( + /*Advisor=*/this, + /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, + /*Logger=*/*Logger, + /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), + /*CalleeSizeEstimateBefore=*/ + getNativeSizeEstimate(*CB.getCalledFunction()), + /*DefaultDecision=*/true, /*Mandatory*/ true); +} + +std::unique_ptr<MLInlineAdvice> +DevelopmentModeMLInlineAdvisor::getAdviceFromModel( + CallBase &CB, OptimizationRemarkEmitter &ORE) { + if (IsDoingInference && !isLogging()) + return MLInlineAdvisor::getAdviceFromModel(CB, ORE); + + bool DefaultAdvice = GetDefaultAdvice(CB); + auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; + return std::make_unique<LoggingMLInlineAdvice>( + /*Advisor=*/this, + /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, + /*Logger=*/*Logger, + /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), + /*CalleeSizeEstimateBefore=*/ + getNativeSizeEstimate(*CB.getCalledFunction()), + /*DefaultDecision=*/DefaultAdvice); +} + +size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { + if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) + return 0; + size_t Ret = 0; + for (auto &F : M) { + if (F.isDeclaration()) + continue; + if (isFunctionDeleted(&F)) + continue; + Ret += *getNativeSizeEstimate(F); + } + return Ret; +} + +ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, + const std::string &ModelPath) + : MLModelRunner(Ctx) { + std::vector<TensorSpec> InputSpecs; + for (size_t I = 0; I < NumberOfFeatures; ++I) + InputSpecs.push_back( + TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); + append_range(InputSpecs, TrainingOnlyFeatures); + if (auto MaybeOutSpecs = + loadOutputSpecs(Ctx, DecisionName, ModelPath, TFOutputSpecOverride)) + OutputSpecs = std::move(*MaybeOutSpecs); + else + return; + + Evaluator = std::make_unique<TFModelEvaluator>( + ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; }, + OutputSpecs.size()); + if (!Evaluator || !Evaluator->isValid()) { + Ctx.emitError("Failed to create inliner saved model evaluator"); + Evaluator.reset(); + return; + } +} + +bool ModelUnderTrainingRunner::run() { + LastEvaluationResult = Evaluator->evaluate(); + if (!LastEvaluationResult.hasValue()) { + Ctx.emitError("Error evaluating model."); + return false; + } + int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0); + return static_cast<bool>(Decision); +} + +int64_t ModelUnderTrainingRunner::getFeature(int Index) const { + return *Evaluator->getInput<int64_t>(Index); +} + +void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { + size_t NumericIndex = static_cast<size_t>(Index); + *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; +} + +std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( + Module &M, ModuleAnalysisManager &MAM, + std::function<bool(CallBase &)> GetDefaultAdvice) { + auto &Ctx = M.getContext(); + std::unique_ptr<MLModelRunner> Runner; + ModelUnderTrainingRunner *MUTRPtr = nullptr; + bool IsDoingInference = false; + if (TFModelUnderTrainingPath.empty()) + Runner.reset(new NoInferenceModelRunner(Ctx)); + else { + auto MUTR = std::make_unique<ModelUnderTrainingRunner>( + Ctx, TFModelUnderTrainingPath); + if (!MUTR || !MUTR->isValid()) { + Ctx.emitError("Could not load the policy model from the provided path"); + return nullptr; + } + IsDoingInference = true; + MUTRPtr = MUTR.get(); + Runner = std::move(MUTR); + } + std::unique_ptr<TrainingLogger> Logger; + if (!TrainingLog.empty()) + Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr); + + return std::make_unique<DevelopmentModeMLInlineAdvisor>( + M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference, + std::move(Logger)); +} +#endif // defined(LLVM_HAVE_TF_API) diff --git a/contrib/libs/llvm12/lib/Analysis/DivergenceAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/DivergenceAnalysis.cpp index 287c132780..422b298bff 100644 --- a/contrib/libs/llvm12/lib/Analysis/DivergenceAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/DivergenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===---- DivergenceAnalysis.cpp --- Divergence Analysis Implementation ----==// +//===---- DivergenceAnalysis.cpp --- Divergence Analysis Implementation ----==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -96,12 +96,12 @@ DivergenceAnalysis::DivergenceAnalysis( : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA), IsLCSSAForm(IsLCSSAForm) {} -bool DivergenceAnalysis::markDivergent(const Value &DivVal) { - if (isAlwaysUniform(DivVal)) - return false; +bool DivergenceAnalysis::markDivergent(const Value &DivVal) { + if (isAlwaysUniform(DivVal)) + return false; assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal)); assert(!isAlwaysUniform(DivVal) && "cannot be a divergent"); - return DivergentValues.insert(&DivVal).second; + return DivergentValues.insert(&DivVal).second; } void DivergenceAnalysis::addUniformOverride(const Value &UniVal) { @@ -118,7 +118,7 @@ bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock, for (const auto *Loop = LI.getLoopFor(Inst->getParent()); Loop != RegionLoop && !Loop->contains(&ObservingBlock); Loop = Loop->getParentLoop()) { - if (DivergentLoops.contains(Loop)) + if (DivergentLoops.contains(Loop)) return true; } @@ -133,103 +133,103 @@ bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const { return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB); } -void DivergenceAnalysis::pushUsers(const Value &V) { - const auto *I = dyn_cast<const Instruction>(&V); - - if (I && I->isTerminator()) { - analyzeControlDivergence(*I); - return; - } - - for (const auto *User : V.users()) { - const auto *UserInst = dyn_cast<const Instruction>(User); - if (!UserInst) - continue; - - // only compute divergent inside loop - if (!inRegion(*UserInst)) - continue; - - // All users of divergent values are immediate divergent - if (markDivergent(*UserInst)) - Worklist.push_back(UserInst); - } -} - -static const Instruction *getIfCarriedInstruction(const Use &U, - const Loop &DivLoop) { - const auto *I = dyn_cast<const Instruction>(&U); - if (!I) - return nullptr; - if (!DivLoop.contains(I)) - return nullptr; - return I; -} - -void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I, - const Loop &OuterDivLoop) { - if (isAlwaysUniform(I)) - return; - if (isDivergent(I)) - return; - - LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << I.getName() << "\n"); - assert((isa<PHINode>(I) || !IsLCSSAForm) && - "In LCSSA form all users of loop-exiting defs are Phi nodes."); - for (const Use &Op : I.operands()) { - const auto *OpInst = getIfCarriedInstruction(Op, OuterDivLoop); +void DivergenceAnalysis::pushUsers(const Value &V) { + const auto *I = dyn_cast<const Instruction>(&V); + + if (I && I->isTerminator()) { + analyzeControlDivergence(*I); + return; + } + + for (const auto *User : V.users()) { + const auto *UserInst = dyn_cast<const Instruction>(User); + if (!UserInst) + continue; + + // only compute divergent inside loop + if (!inRegion(*UserInst)) + continue; + + // All users of divergent values are immediate divergent + if (markDivergent(*UserInst)) + Worklist.push_back(UserInst); + } +} + +static const Instruction *getIfCarriedInstruction(const Use &U, + const Loop &DivLoop) { + const auto *I = dyn_cast<const Instruction>(&U); + if (!I) + return nullptr; + if (!DivLoop.contains(I)) + return nullptr; + return I; +} + +void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I, + const Loop &OuterDivLoop) { + if (isAlwaysUniform(I)) + return; + if (isDivergent(I)) + return; + + LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << I.getName() << "\n"); + assert((isa<PHINode>(I) || !IsLCSSAForm) && + "In LCSSA form all users of loop-exiting defs are Phi nodes."); + for (const Use &Op : I.operands()) { + const auto *OpInst = getIfCarriedInstruction(Op, OuterDivLoop); if (!OpInst) continue; - if (markDivergent(I)) - pushUsers(I); - return; + if (markDivergent(I)) + pushUsers(I); + return; } } // marks all users of loop-carried values of the loop headed by LoopHeader as // divergent -void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit, - const Loop &OuterDivLoop) { - // All users are in immediate exit blocks - if (IsLCSSAForm) { - for (const auto &Phi : DivExit.phis()) { - analyzeTemporalDivergence(Phi, OuterDivLoop); - } - return; - } - - // For non-LCSSA we have to follow all live out edges wherever they may lead. - const BasicBlock &LoopHeader = *OuterDivLoop.getHeader(); - SmallVector<const BasicBlock *, 8> TaintStack; - TaintStack.push_back(&DivExit); +void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit, + const Loop &OuterDivLoop) { + // All users are in immediate exit blocks + if (IsLCSSAForm) { + for (const auto &Phi : DivExit.phis()) { + analyzeTemporalDivergence(Phi, OuterDivLoop); + } + return; + } + + // For non-LCSSA we have to follow all live out edges wherever they may lead. + const BasicBlock &LoopHeader = *OuterDivLoop.getHeader(); + SmallVector<const BasicBlock *, 8> TaintStack; + TaintStack.push_back(&DivExit); // Otherwise potential users of loop-carried values could be anywhere in the // dominance region of DivLoop (including its fringes for phi nodes) DenseSet<const BasicBlock *> Visited; - Visited.insert(&DivExit); + Visited.insert(&DivExit); - do { - auto *UserBlock = TaintStack.pop_back_val(); + do { + auto *UserBlock = TaintStack.pop_back_val(); // don't spread divergence beyond the region if (!inRegion(*UserBlock)) continue; - assert(!OuterDivLoop.contains(UserBlock) && + assert(!OuterDivLoop.contains(UserBlock) && "irreducible control flow detected"); // phi nodes at the fringes of the dominance region if (!DT.dominates(&LoopHeader, UserBlock)) { // all PHI nodes of UserBlock become divergent for (auto &Phi : UserBlock->phis()) { - analyzeTemporalDivergence(Phi, OuterDivLoop); + analyzeTemporalDivergence(Phi, OuterDivLoop); } continue; } - // Taint outside users of values carried by OuterDivLoop. + // Taint outside users of values carried by OuterDivLoop. for (auto &I : *UserBlock) { - analyzeTemporalDivergence(I, OuterDivLoop); + analyzeTemporalDivergence(I, OuterDivLoop); } // visit all blocks in the dominance region @@ -239,57 +239,57 @@ void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit, } TaintStack.push_back(SuccBlock); } - } while (!TaintStack.empty()); + } while (!TaintStack.empty()); } -void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit, - const Loop &InnerDivLoop) { - LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n"); - - // Find outer-most loop that does not contain \p DivExit - const Loop *DivLoop = &InnerDivLoop; - const Loop *OuterDivLoop = DivLoop; - const Loop *ExitLevelLoop = LI.getLoopFor(&DivExit); - const unsigned LoopExitDepth = - ExitLevelLoop ? ExitLevelLoop->getLoopDepth() : 0; - while (DivLoop && DivLoop->getLoopDepth() > LoopExitDepth) { - DivergentLoops.insert(DivLoop); // all crossed loops are divergent - OuterDivLoop = DivLoop; - DivLoop = DivLoop->getParentLoop(); +void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit, + const Loop &InnerDivLoop) { + LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n"); + + // Find outer-most loop that does not contain \p DivExit + const Loop *DivLoop = &InnerDivLoop; + const Loop *OuterDivLoop = DivLoop; + const Loop *ExitLevelLoop = LI.getLoopFor(&DivExit); + const unsigned LoopExitDepth = + ExitLevelLoop ? ExitLevelLoop->getLoopDepth() : 0; + while (DivLoop && DivLoop->getLoopDepth() > LoopExitDepth) { + DivergentLoops.insert(DivLoop); // all crossed loops are divergent + OuterDivLoop = DivLoop; + DivLoop = DivLoop->getParentLoop(); } - LLVM_DEBUG(dbgs() << "\tOuter-most left loop: " << OuterDivLoop->getName() - << "\n"); + LLVM_DEBUG(dbgs() << "\tOuter-most left loop: " << OuterDivLoop->getName() + << "\n"); - analyzeLoopExitDivergence(DivExit, *OuterDivLoop); + analyzeLoopExitDivergence(DivExit, *OuterDivLoop); } -// this is a divergent join point - mark all phi nodes as divergent and push -// them onto the stack. -void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { - LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName() - << "\n"); +// this is a divergent join point - mark all phi nodes as divergent and push +// them onto the stack. +void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { + LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName() + << "\n"); // ignore divergence outside the region if (!inRegion(JoinBlock)) { - return; + return; } // push non-divergent phi nodes in JoinBlock to the worklist - for (const auto &Phi : JoinBlock.phis()) { - if (isDivergent(Phi)) - continue; - // FIXME Theoretically ,the 'undef' value could be replaced by any other - // value causing spurious divergence. - if (Phi.hasConstantOrUndefValue()) - continue; - if (markDivergent(Phi)) - Worklist.push_back(&Phi); - } + for (const auto &Phi : JoinBlock.phis()) { + if (isDivergent(Phi)) + continue; + // FIXME Theoretically ,the 'undef' value could be replaced by any other + // value causing spurious divergence. + if (Phi.hasConstantOrUndefValue()) + continue; + if (markDivergent(Phi)) + Worklist.push_back(&Phi); + } } -void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) { - LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName() - << "\n"); +void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) { + LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName() + << "\n"); // Don't propagate divergence from unreachable blocks. if (!DT.isReachableFromEntry(Term.getParent())) @@ -297,45 +297,45 @@ void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) { const auto *BranchLoop = LI.getLoopFor(Term.getParent()); - const auto &DivDesc = SDA.getJoinBlocks(Term); + const auto &DivDesc = SDA.getJoinBlocks(Term); - // Iterate over all blocks now reachable by a disjoint path join - for (const auto *JoinBlock : DivDesc.JoinDivBlocks) { - taintAndPushPhiNodes(*JoinBlock); + // Iterate over all blocks now reachable by a disjoint path join + for (const auto *JoinBlock : DivDesc.JoinDivBlocks) { + taintAndPushPhiNodes(*JoinBlock); } - assert(DivDesc.LoopDivBlocks.empty() || BranchLoop); - for (const auto *DivExitBlock : DivDesc.LoopDivBlocks) { - propagateLoopExitDivergence(*DivExitBlock, *BranchLoop); + assert(DivDesc.LoopDivBlocks.empty() || BranchLoop); + for (const auto *DivExitBlock : DivDesc.LoopDivBlocks) { + propagateLoopExitDivergence(*DivExitBlock, *BranchLoop); } } void DivergenceAnalysis::compute() { - // Initialize worklist. - auto DivValuesCopy = DivergentValues; - for (const auto *DivVal : DivValuesCopy) { - assert(isDivergent(*DivVal) && "Worklist invariant violated!"); + // Initialize worklist. + auto DivValuesCopy = DivergentValues; + for (const auto *DivVal : DivValuesCopy) { + assert(isDivergent(*DivVal) && "Worklist invariant violated!"); pushUsers(*DivVal); } - // All values on the Worklist are divergent. - // Their users may not have been updated yed. + // All values on the Worklist are divergent. + // Their users may not have been updated yed. while (!Worklist.empty()) { const Instruction &I = *Worklist.back(); Worklist.pop_back(); // propagate value divergence to users - assert(isDivergent(I) && "Worklist invariant violated!"); - pushUsers(I); + assert(isDivergent(I) && "Worklist invariant violated!"); + pushUsers(I); } } bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const { - return UniformOverrides.contains(&V); + return UniformOverrides.contains(&V); } bool DivergenceAnalysis::isDivergent(const Value &V) const { - return DivergentValues.contains(&V); + return DivergentValues.contains(&V); } bool DivergenceAnalysis::isDivergentUse(const Use &U) const { @@ -360,7 +360,7 @@ GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI) - : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) { + : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) { for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { DA.markDivergent(I); diff --git a/contrib/libs/llvm12/lib/Analysis/DomTreeUpdater.cpp b/contrib/libs/llvm12/lib/Analysis/DomTreeUpdater.cpp index 8ac7d9d4ef..76b5aff077 100644 --- a/contrib/libs/llvm12/lib/Analysis/DomTreeUpdater.cpp +++ b/contrib/libs/llvm12/lib/Analysis/DomTreeUpdater.cpp @@ -32,7 +32,7 @@ bool DomTreeUpdater::isUpdateValid( // Since isUpdateValid() must be called *after* the Terminator of From is // altered we can determine if the update is unnecessary for batch updates // or invalid for a single update. - const bool HasEdge = llvm::is_contained(successors(From), To); + const bool HasEdge = llvm::is_contained(successors(From), To); // If the IR does not match the update, // 1. In batch updates, this update is unnecessary. @@ -166,7 +166,7 @@ bool DomTreeUpdater::hasPendingPostDomTreeUpdates() const { bool DomTreeUpdater::isBBPendingDeletion(llvm::BasicBlock *DelBB) const { if (Strategy == UpdateStrategy::Eager || DeletedBBs.empty()) return false; - return DeletedBBs.contains(DelBB); + return DeletedBBs.contains(DelBB); } // The DT and PDT require the nodes related to updates diff --git a/contrib/libs/llvm12/lib/Analysis/EHPersonalities.cpp b/contrib/libs/llvm12/lib/Analysis/EHPersonalities.cpp index a982f266b2..e8d0d94d8f 100644 --- a/contrib/libs/llvm12/lib/Analysis/EHPersonalities.cpp +++ b/contrib/libs/llvm12/lib/Analysis/EHPersonalities.cpp @@ -24,23 +24,23 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) { if (!F) return EHPersonality::Unknown; return StringSwitch<EHPersonality>(F->getName()) - .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) - .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) - .Case("__gxx_personality_seh0", EHPersonality::GNU_CXX) - .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) - .Case("__gcc_personality_v0", EHPersonality::GNU_C) - .Case("__gcc_personality_seh0", EHPersonality::GNU_C) - .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) - .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) - .Case("_except_handler3", EHPersonality::MSVC_X86SEH) - .Case("_except_handler4", EHPersonality::MSVC_X86SEH) - .Case("__C_specific_handler", EHPersonality::MSVC_TableSEH) - .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) - .Case("ProcessCLRException", EHPersonality::CoreCLR) - .Case("rust_eh_personality", EHPersonality::Rust) - .Case("__gxx_wasm_personality_v0", EHPersonality::Wasm_CXX) - .Case("__xlcxx_personality_v1", EHPersonality::XL_CXX) - .Default(EHPersonality::Unknown); + .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) + .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_seh0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) + .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__gcc_personality_seh0", EHPersonality::GNU_C) + .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) + .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) + .Case("_except_handler3", EHPersonality::MSVC_X86SEH) + .Case("_except_handler4", EHPersonality::MSVC_X86SEH) + .Case("__C_specific_handler", EHPersonality::MSVC_TableSEH) + .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) + .Case("ProcessCLRException", EHPersonality::CoreCLR) + .Case("rust_eh_personality", EHPersonality::Rust) + .Case("__gxx_wasm_personality_v0", EHPersonality::Wasm_CXX) + .Case("__xlcxx_personality_v1", EHPersonality::XL_CXX) + .Default(EHPersonality::Unknown); } StringRef llvm::getEHPersonalityName(EHPersonality Pers) { @@ -52,14 +52,14 @@ StringRef llvm::getEHPersonalityName(EHPersonality Pers) { case EHPersonality::GNU_C_SjLj: return "__gcc_personality_sj0"; case EHPersonality::GNU_ObjC: return "__objc_personality_v0"; case EHPersonality::MSVC_X86SEH: return "_except_handler3"; - case EHPersonality::MSVC_TableSEH: - return "__C_specific_handler"; + case EHPersonality::MSVC_TableSEH: + return "__C_specific_handler"; case EHPersonality::MSVC_CXX: return "__CxxFrameHandler3"; case EHPersonality::CoreCLR: return "ProcessCLRException"; case EHPersonality::Rust: return "rust_eh_personality"; case EHPersonality::Wasm_CXX: return "__gxx_wasm_personality_v0"; - case EHPersonality::XL_CXX: - return "__xlcxx_personality_v1"; + case EHPersonality::XL_CXX: + return "__xlcxx_personality_v1"; case EHPersonality::Unknown: llvm_unreachable("Unknown EHPersonality!"); } diff --git a/contrib/libs/llvm12/lib/Analysis/FunctionPropertiesAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/FunctionPropertiesAnalysis.cpp index 33519038e2..037c18853a 100644 --- a/contrib/libs/llvm12/lib/Analysis/FunctionPropertiesAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/FunctionPropertiesAnalysis.cpp @@ -1,88 +1,88 @@ -//===- FunctionPropertiesAnalysis.cpp - Function Properties Analysis ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis -// classes used to extract function properties. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/FunctionPropertiesAnalysis.h" -#include "llvm/IR/Instructions.h" - -using namespace llvm; - -FunctionPropertiesInfo -FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F, - const LoopInfo &LI) { - - FunctionPropertiesInfo FPI; - - FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); - - for (const auto &BB : F) { - ++FPI.BasicBlockCount; - - if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) { - if (BI->isConditional()) - FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors(); - } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) { - FPI.BlocksReachedFromConditionalInstruction += - (SI->getNumCases() + (nullptr != SI->getDefaultDest())); - } - - for (const auto &I : BB) { - if (auto *CS = dyn_cast<CallBase>(&I)) { - const auto *Callee = CS->getCalledFunction(); - if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) - ++FPI.DirectCallsToDefinedFunctions; - } - if (I.getOpcode() == Instruction::Load) { - ++FPI.LoadInstCount; - } else if (I.getOpcode() == Instruction::Store) { - ++FPI.StoreInstCount; - } - } - // Loop Depth of the Basic Block - int64_t LoopDepth; - LoopDepth = LI.getLoopDepth(&BB); - if (FPI.MaxLoopDepth < LoopDepth) - FPI.MaxLoopDepth = LoopDepth; - } - FPI.TopLevelLoopCount += llvm::size(LI); - return FPI; -} - -void FunctionPropertiesInfo::print(raw_ostream &OS) const { - OS << "BasicBlockCount: " << BasicBlockCount << "\n" - << "BlocksReachedFromConditionalInstruction: " - << BlocksReachedFromConditionalInstruction << "\n" - << "Uses: " << Uses << "\n" - << "DirectCallsToDefinedFunctions: " << DirectCallsToDefinedFunctions - << "\n" - << "LoadInstCount: " << LoadInstCount << "\n" - << "StoreInstCount: " << StoreInstCount << "\n" - << "MaxLoopDepth: " << MaxLoopDepth << "\n" - << "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n"; -} - -AnalysisKey FunctionPropertiesAnalysis::Key; - -FunctionPropertiesInfo -FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { - return FunctionPropertiesInfo::getFunctionPropertiesInfo( - F, FAM.getResult<LoopAnalysis>(F)); -} - -PreservedAnalyses -FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - OS << "Printing analysis results of CFA for function " - << "'" << F.getName() << "':" - << "\n"; - AM.getResult<FunctionPropertiesAnalysis>(F).print(OS); - return PreservedAnalyses::all(); -} +//===- FunctionPropertiesAnalysis.cpp - Function Properties Analysis ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis +// classes used to extract function properties. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/FunctionPropertiesAnalysis.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +FunctionPropertiesInfo +FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F, + const LoopInfo &LI) { + + FunctionPropertiesInfo FPI; + + FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); + + for (const auto &BB : F) { + ++FPI.BasicBlockCount; + + if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) { + if (BI->isConditional()) + FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors(); + } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) { + FPI.BlocksReachedFromConditionalInstruction += + (SI->getNumCases() + (nullptr != SI->getDefaultDest())); + } + + for (const auto &I : BB) { + if (auto *CS = dyn_cast<CallBase>(&I)) { + const auto *Callee = CS->getCalledFunction(); + if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) + ++FPI.DirectCallsToDefinedFunctions; + } + if (I.getOpcode() == Instruction::Load) { + ++FPI.LoadInstCount; + } else if (I.getOpcode() == Instruction::Store) { + ++FPI.StoreInstCount; + } + } + // Loop Depth of the Basic Block + int64_t LoopDepth; + LoopDepth = LI.getLoopDepth(&BB); + if (FPI.MaxLoopDepth < LoopDepth) + FPI.MaxLoopDepth = LoopDepth; + } + FPI.TopLevelLoopCount += llvm::size(LI); + return FPI; +} + +void FunctionPropertiesInfo::print(raw_ostream &OS) const { + OS << "BasicBlockCount: " << BasicBlockCount << "\n" + << "BlocksReachedFromConditionalInstruction: " + << BlocksReachedFromConditionalInstruction << "\n" + << "Uses: " << Uses << "\n" + << "DirectCallsToDefinedFunctions: " << DirectCallsToDefinedFunctions + << "\n" + << "LoadInstCount: " << LoadInstCount << "\n" + << "StoreInstCount: " << StoreInstCount << "\n" + << "MaxLoopDepth: " << MaxLoopDepth << "\n" + << "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n"; +} + +AnalysisKey FunctionPropertiesAnalysis::Key; + +FunctionPropertiesInfo +FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { + return FunctionPropertiesInfo::getFunctionPropertiesInfo( + F, FAM.getResult<LoopAnalysis>(F)); +} + +PreservedAnalyses +FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { + OS << "Printing analysis results of CFA for function " + << "'" << F.getName() << "':" + << "\n"; + AM.getResult<FunctionPropertiesAnalysis>(F).print(OS); + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/GlobalsModRef.cpp b/contrib/libs/llvm12/lib/Analysis/GlobalsModRef.cpp index 145baf82b6..d03e35fcfe 100644 --- a/contrib/libs/llvm12/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/libs/llvm12/lib/Analysis/GlobalsModRef.cpp @@ -44,7 +44,7 @@ STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); // An option to enable unsafe alias results from the GlobalsModRef analysis. // When enabled, GlobalsModRef will provide no-alias results which in extremely // rare cases may not be conservatively correct. In particular, in the face of -// transforms which cause asymmetry between how effective getUnderlyingObject +// transforms which cause asymmetry between how effective getUnderlyingObject // is for two pointers, it may produce incorrect results. // // These unsafe results have been returned by GMR for many years without @@ -367,8 +367,8 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { if (AnalyzeUsesOfPointer(I, Readers, Writers)) return true; - } else if (Operator::getOpcode(I) == Instruction::BitCast || - Operator::getOpcode(I) == Instruction::AddrSpaceCast) { + } else if (Operator::getOpcode(I) == Instruction::BitCast || + Operator::getOpcode(I) == Instruction::AddrSpaceCast) { if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) return true; } else if (auto *Call = dyn_cast<CallBase>(I)) { @@ -436,7 +436,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { continue; // Check the value being stored. - Value *Ptr = getUnderlyingObject(SI->getOperand(0)); + Value *Ptr = getUnderlyingObject(SI->getOperand(0)); if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction()))) return false; // Too hard to analyze. @@ -661,12 +661,12 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, return false; if (auto *LI = dyn_cast<LoadInst>(Input)) { - Inputs.push_back(getUnderlyingObject(LI->getPointerOperand())); + Inputs.push_back(getUnderlyingObject(LI->getPointerOperand())); continue; } if (auto *SI = dyn_cast<SelectInst>(Input)) { - const Value *LHS = getUnderlyingObject(SI->getTrueValue()); - const Value *RHS = getUnderlyingObject(SI->getFalseValue()); + const Value *LHS = getUnderlyingObject(SI->getTrueValue()); + const Value *RHS = getUnderlyingObject(SI->getFalseValue()); if (Visited.insert(LHS).second) Inputs.push_back(LHS); if (Visited.insert(RHS).second) @@ -675,7 +675,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, } if (auto *PN = dyn_cast<PHINode>(Input)) { for (const Value *Op : PN->incoming_values()) { - Op = getUnderlyingObject(Op); + Op = getUnderlyingObject(Op); if (Visited.insert(Op).second) Inputs.push_back(Op); } @@ -774,7 +774,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, if (auto *LI = dyn_cast<LoadInst>(Input)) { // A pointer loaded from a global would have been captured, and we know // that the global is non-escaping, so no alias. - const Value *Ptr = getUnderlyingObject(LI->getPointerOperand()); + const Value *Ptr = getUnderlyingObject(LI->getPointerOperand()); if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL)) // The load does not alias with GV. continue; @@ -782,8 +782,8 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, return false; } if (auto *SI = dyn_cast<SelectInst>(Input)) { - const Value *LHS = getUnderlyingObject(SI->getTrueValue()); - const Value *RHS = getUnderlyingObject(SI->getFalseValue()); + const Value *LHS = getUnderlyingObject(SI->getTrueValue()); + const Value *RHS = getUnderlyingObject(SI->getFalseValue()); if (Visited.insert(LHS).second) Inputs.push_back(LHS); if (Visited.insert(RHS).second) @@ -792,7 +792,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, } if (auto *PN = dyn_cast<PHINode>(Input)) { for (const Value *Op : PN->incoming_values()) { - Op = getUnderlyingObject(Op); + Op = getUnderlyingObject(Op); if (Visited.insert(Op).second) Inputs.push_back(Op); } @@ -827,10 +827,10 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI) { // Get the base object these pointers point to. - const Value *UV1 = - getUnderlyingObject(LocA.Ptr->stripPointerCastsAndInvariantGroups()); - const Value *UV2 = - getUnderlyingObject(LocB.Ptr->stripPointerCastsAndInvariantGroups()); + const Value *UV1 = + getUnderlyingObject(LocA.Ptr->stripPointerCastsAndInvariantGroups()); + const Value *UV2 = + getUnderlyingObject(LocB.Ptr->stripPointerCastsAndInvariantGroups()); // If either of the underlying values is a global, they may be non-addr-taken // globals, which we can answer queries about. @@ -917,15 +917,15 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call, // is based on GV, return the conservative result. for (auto &A : Call->args()) { SmallVector<const Value*, 4> Objects; - getUnderlyingObjects(A, Objects); + getUnderlyingObjects(A, Objects); // All objects must be identified. if (!all_of(Objects, isIdentifiedObject) && // Try ::alias to see if all objects are known not to alias GV. !all_of(Objects, [&](const Value *V) { - return this->alias(MemoryLocation::getBeforeOrAfter(V), - MemoryLocation::getBeforeOrAfter(GV), - AAQI) == NoAlias; + return this->alias(MemoryLocation::getBeforeOrAfter(V), + MemoryLocation::getBeforeOrAfter(GV), + AAQI) == NoAlias; })) return ConservativeResult; @@ -945,7 +945,7 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, // If we are asking for mod/ref info of a direct call with a pointer to a // global we are tracking, return information if we have it. if (const GlobalValue *GV = - dyn_cast<GlobalValue>(getUnderlyingObject(Loc.Ptr))) + dyn_cast<GlobalValue>(getUnderlyingObject(Loc.Ptr))) // If GV is internal to this IR and there is no function with local linkage // that has had their address taken, keep looking for a tighter ModRefInfo. if (GV->hasLocalLinkage() && !UnknownFunctionsWithLocalLinkage) diff --git a/contrib/libs/llvm12/lib/Analysis/IRSimilarityIdentifier.cpp b/contrib/libs/llvm12/lib/Analysis/IRSimilarityIdentifier.cpp index 25443a6679..751c8ddc9b 100644 --- a/contrib/libs/llvm12/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/contrib/libs/llvm12/lib/Analysis/IRSimilarityIdentifier.cpp @@ -1,937 +1,937 @@ -//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// \file -// Implementation file for the IRSimilarityIdentifier for identifying -// similarities in IR including the IRInstructionMapper. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/IRSimilarityIdentifier.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/User.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/SuffixTree.h" - -using namespace llvm; -using namespace IRSimilarity; - -IRInstructionData::IRInstructionData(Instruction &I, bool Legality, - IRInstructionDataList &IDList) - : Inst(&I), Legal(Legality), IDL(&IDList) { - // We check for whether we have a comparison instruction. If it is, we - // find the "less than" version of the predicate for consistency for - // comparison instructions throught the program. - if (CmpInst *C = dyn_cast<CmpInst>(&I)) { - CmpInst::Predicate Predicate = predicateForConsistency(C); - if (Predicate != C->getPredicate()) - RevisedPredicate = Predicate; - } - - // Here we collect the operands and their types for determining whether - // the structure of the operand use matches between two different candidates. - for (Use &OI : I.operands()) { - if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) { - // If we have a CmpInst where the predicate is reversed, it means the - // operands must be reversed as well. - OperVals.insert(OperVals.begin(), OI.get()); - continue; - } - - OperVals.push_back(OI.get()); - } -} - -CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { - switch (CI->getPredicate()) { - case CmpInst::FCMP_OGT: - case CmpInst::FCMP_UGT: - case CmpInst::FCMP_OGE: - case CmpInst::FCMP_UGE: - case CmpInst::ICMP_SGT: - case CmpInst::ICMP_UGT: - case CmpInst::ICMP_SGE: - case CmpInst::ICMP_UGE: - return CI->getSwappedPredicate(); - default: - return CI->getPredicate(); - } -} - -CmpInst::Predicate IRInstructionData::getPredicate() const { - assert(isa<CmpInst>(Inst) && - "Can only get a predicate from a compare instruction"); - - if (RevisedPredicate.hasValue()) - return RevisedPredicate.getValue(); - - return cast<CmpInst>(Inst)->getPredicate(); -} - -static StringRef getCalledFunctionName(CallInst &CI) { - assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?"); - - return CI.getCalledFunction()->getName(); -} - -bool IRSimilarity::isClose(const IRInstructionData &A, - const IRInstructionData &B) { - - if (!A.Legal || !B.Legal) - return false; - - // Check if we are performing the same sort of operation on the same types - // but not on the same values. - if (!A.Inst->isSameOperationAs(B.Inst)) { - // If there is a predicate, this means that either there is a swapped - // predicate, or that the types are different, we want to make sure that - // the predicates are equivalent via swapping. - if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { - - if (A.getPredicate() != B.getPredicate()) - return false; - - // If the predicates are the same via swap, make sure that the types are - // still the same. - auto ZippedTypes = zip(A.OperVals, B.OperVals); - - return all_of( - ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { - return std::get<0>(R)->getType() == std::get<1>(R)->getType(); - }); - } - - return false; - } - - // Since any GEP Instruction operands after the first operand cannot be - // defined by a register, we must make sure that the operands after the first - // are the same in the two instructions - if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { - auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); - - // If the instructions do not have the same inbounds restrictions, we do - // not consider them the same. - if (GEP->isInBounds() != OtherGEP->isInBounds()) - return false; - - auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); - - // We increment here since we do not care about the first instruction, - // we only care about the following operands since they must be the - // exact same to be considered similar. - return all_of(drop_begin(ZippedOperands), - [](std::tuple<llvm::Use &, llvm::Use &> R) { - return std::get<0>(R) == std::get<1>(R); - }); - } - - // If the instructions are functions, we make sure that the function name is - // the same. We already know that the types are since is isSameOperationAs is - // true. - if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { - CallInst *CIA = cast<CallInst>(A.Inst); - CallInst *CIB = cast<CallInst>(B.Inst); - if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0) - return false; - } - - return true; -} - -// TODO: This is the same as the MachineOutliner, and should be consolidated -// into the same interface. -void IRInstructionMapper::convertToUnsignedVec( - BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, - std::vector<unsigned> &IntegerMapping) { - BasicBlock::iterator It = BB.begin(); - - std::vector<unsigned> IntegerMappingForBB; - std::vector<IRInstructionData *> InstrListForBB; - - HaveLegalRange = false; - CanCombineWithPrevInstr = false; - AddedIllegalLastTime = true; - - for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { - switch (InstClassifier.visit(*It)) { - case InstrType::Legal: - mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); - break; - case InstrType::Illegal: - mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); - break; - case InstrType::Invisible: - AddedIllegalLastTime = false; - break; - } - } - - if (HaveLegalRange) { - mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); - for_each(InstrListForBB, - [this](IRInstructionData *ID) { this->IDL->push_back(*ID); }); - llvm::append_range(InstrList, InstrListForBB); - llvm::append_range(IntegerMapping, IntegerMappingForBB); - } -} - -// TODO: This is the same as the MachineOutliner, and should be consolidated -// into the same interface. -unsigned IRInstructionMapper::mapToLegalUnsigned( - BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, - std::vector<IRInstructionData *> &InstrListForBB) { - // We added something legal, so we should unset the AddedLegalLastTime - // flag. - AddedIllegalLastTime = false; - - // If we have at least two adjacent legal instructions (which may have - // invisible instructions in between), remember that. - if (CanCombineWithPrevInstr) - HaveLegalRange = true; - CanCombineWithPrevInstr = true; - - // Get the integer for this instruction or give it the current - // LegalInstrNumber. - IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); - InstrListForBB.push_back(ID); - - // Add to the instruction list - bool WasInserted; - DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator - ResultIt; - std::tie(ResultIt, WasInserted) = - InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); - unsigned INumber = ResultIt->second; - - // There was an insertion. - if (WasInserted) - LegalInstrNumber++; - - IntegerMappingForBB.push_back(INumber); - - // Make sure we don't overflow or use any integers reserved by the DenseMap. - assert(LegalInstrNumber < IllegalInstrNumber && - "Instruction mapping overflow!"); - - assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && - "Tried to assign DenseMap tombstone or empty key to instruction."); - assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && - "Tried to assign DenseMap tombstone or empty key to instruction."); - - return INumber; -} - -IRInstructionData * -IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, - IRInstructionDataList &IDL) { - return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); -} - -IRInstructionDataList * -IRInstructionMapper::allocateIRInstructionDataList() { - return new (IDLAllocator->Allocate()) IRInstructionDataList(); -} - -// TODO: This is the same as the MachineOutliner, and should be consolidated -// into the same interface. -unsigned IRInstructionMapper::mapToIllegalUnsigned( - BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, - std::vector<IRInstructionData *> &InstrListForBB, bool End) { - // Can't combine an illegal instruction. Set the flag. - CanCombineWithPrevInstr = false; - - // Only add one illegal number per range of legal numbers. - if (AddedIllegalLastTime) - return IllegalInstrNumber; - - IRInstructionData *ID = nullptr; - if (!End) - ID = allocateIRInstructionData(*It, false, *IDL); - InstrListForBB.push_back(ID); - - // Remember that we added an illegal number last time. - AddedIllegalLastTime = true; - unsigned INumber = IllegalInstrNumber; - IntegerMappingForBB.push_back(IllegalInstrNumber--); - - assert(LegalInstrNumber < IllegalInstrNumber && - "Instruction mapping overflow!"); - - assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && - "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); - - assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && - "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); - - return INumber; -} - -IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, - IRInstructionData *FirstInstIt, - IRInstructionData *LastInstIt) - : StartIdx(StartIdx), Len(Len) { - - assert(FirstInstIt != nullptr && "Instruction is nullptr!"); - assert(LastInstIt != nullptr && "Instruction is nullptr!"); - assert(StartIdx + Len > StartIdx && - "Overflow for IRSimilarityCandidate range?"); - assert(Len - 1 == static_cast<unsigned>(std::distance( - iterator(FirstInstIt), iterator(LastInstIt))) && - "Length of the first and last IRInstructionData do not match the " - "given length"); - - // We iterate over the given instructions, and map each unique value - // to a unique number in the IRSimilarityCandidate ValueToNumber and - // NumberToValue maps. A constant get its own value globally, the individual - // uses of the constants are not considered to be unique. - // - // IR: Mapping Added: - // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 - // %add2 = add i32 %a, %1 %add2 -> 4 - // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 - // - // when replace with global values, starting from 1, would be - // - // 3 = add i32 1, 2 - // 4 = add i32 1, 3 - // 6 = add i32 5, 2 - unsigned LocalValNumber = 1; - IRInstructionDataList::iterator ID = iterator(*FirstInstIt); - for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { - // Map the operand values to an unsigned integer if it does not already - // have an unsigned integer assigned to it. - for (Value *Arg : ID->OperVals) - if (ValueToNumber.find(Arg) == ValueToNumber.end()) { - ValueToNumber.try_emplace(Arg, LocalValNumber); - NumberToValue.try_emplace(LocalValNumber, Arg); - LocalValNumber++; - } - - // Mapping the instructions to an unsigned integer if it is not already - // exist in the mapping. - if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { - ValueToNumber.try_emplace(ID->Inst, LocalValNumber); - NumberToValue.try_emplace(LocalValNumber, ID->Inst); - LocalValNumber++; - } - } - - // Setting the first and last instruction data pointers for the candidate. If - // we got through the entire for loop without hitting an assert, we know - // that both of these instructions are not nullptrs. - FirstInst = FirstInstIt; - LastInst = LastInstIt; -} - -bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, - const IRSimilarityCandidate &B) { - if (A.getLength() != B.getLength()) - return false; - - auto InstrDataForBoth = - zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); - - return all_of(InstrDataForBoth, - [](std::tuple<IRInstructionData &, IRInstructionData &> R) { - IRInstructionData &A = std::get<0>(R); - IRInstructionData &B = std::get<1>(R); - if (!A.Legal || !B.Legal) - return false; - return isClose(A, B); - }); -} - -/// Determine if one or more of the assigned global value numbers for the -/// operands in \p TargetValueNumbers is in the current mapping set for operand -/// numbers in \p SourceOperands. The set of possible corresponding global -/// value numbers are replaced with the most recent version of compatible -/// values. -/// -/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global -/// value number for the source IRInstructionCandidate. -/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source -/// IRSimilarityCandidate global value numbers to a set of possible numbers in -/// the target. -/// \param [in] SourceOperands - The operands in the original -/// IRSimilarityCandidate in the current instruction. -/// \param [in] TargetValueNumbers - The global value numbers of the operands in -/// the corresponding Instruction in the other IRSimilarityCandidate. -/// \returns true if there exists a possible mapping between the source -/// Instruction operands and the target Instruction operands, and false if not. -static bool checkNumberingAndReplaceCommutative( - const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, - DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, - ArrayRef<Value *> &SourceOperands, - DenseSet<unsigned> &TargetValueNumbers){ - - DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; - - unsigned ArgVal; - bool WasInserted; - - // Iterate over the operands in the source IRSimilarityCandidate to determine - // whether there exists an operand in the other IRSimilarityCandidate that - // creates a valid mapping of Value to Value between the - // IRSimilarityCaniddates. - for (Value *V : SourceOperands) { - ArgVal = SourceValueToNumberMapping.find(V)->second; - - std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( - std::make_pair(ArgVal, TargetValueNumbers)); - - // Instead of finding a current mapping, we inserted a set. This means a - // mapping did not exist for the source Instruction operand, it has no - // current constraints we need to check. - if (WasInserted) - continue; - - // If a mapping already exists for the source operand to the values in the - // other IRSimilarityCandidate we need to iterate over the items in other - // IRSimilarityCandidate's Instruction to determine whether there is a valid - // mapping of Value to Value. - DenseSet<unsigned> NewSet; - for (unsigned &Curr : ValueMappingIt->second) - // If we can find the value in the mapping, we add it to the new set. - if (TargetValueNumbers.contains(Curr)) - NewSet.insert(Curr); - - // If we could not find a Value, return 0. - if (NewSet.empty()) - return false; - - // Otherwise replace the old mapping with the newly constructed one. - if (NewSet.size() != ValueMappingIt->second.size()) - ValueMappingIt->second.swap(NewSet); - - // We have reached no conclusions about the mapping, and cannot remove - // any items from the other operands, so we move to check the next operand. - if (ValueMappingIt->second.size() != 1) - continue; - - - unsigned ValToRemove = *ValueMappingIt->second.begin(); - // When there is only one item left in the mapping for and operand, remove - // the value from the other operands. If it results in there being no - // mapping, return false, it means the mapping is wrong - for (Value *InnerV : SourceOperands) { - if (V == InnerV) - continue; - - unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; - ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); - if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) - continue; - - ValueMappingIt->second.erase(ValToRemove); - if (ValueMappingIt->second.empty()) - return false; - } - } - - return true; -} - -/// Determine if operand number \p TargetArgVal is in the current mapping set -/// for operand number \p SourceArgVal. -/// -/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global -/// value numbers from source IRSimilarityCandidate to target -/// IRSimilarityCandidate. -/// \param [in] SourceArgVal The global value number for an operand in the -/// in the original candidate. -/// \param [in] TargetArgVal The global value number for the corresponding -/// operand in the other candidate. -/// \returns True if there exists a mapping and false if not. -bool checkNumberingAndReplace( - DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, - unsigned SourceArgVal, unsigned TargetArgVal) { - // We are given two unsigned integers representing the global values of - // the operands in different IRSimilarityCandidates and a current mapping - // between the two. - // - // Source Operand GVN: 1 - // Target Operand GVN: 2 - // CurrentMapping: {1: {1, 2}} - // - // Since we have mapping, and the target operand is contained in the set, we - // update it to: - // CurrentMapping: {1: {2}} - // and can return true. But, if the mapping was - // CurrentMapping: {1: {3}} - // we would return false. - - bool WasInserted; - DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; - - std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( - std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); - - // If we created a new mapping, then we are done. - if (WasInserted) - return true; - - // If there is more than one option in the mapping set, and the target value - // is included in the mapping set replace that set with one that only includes - // the target value, as it is the only valid mapping via the non commutative - // instruction. - - DenseSet<unsigned> &TargetSet = Val->second; - if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { - TargetSet.clear(); - TargetSet.insert(TargetArgVal); - return true; - } - - // Return true if we can find the value in the set. - return TargetSet.contains(TargetArgVal); -} - -bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( - OperandMapping A, OperandMapping B) { - // Iterators to keep track of where we are in the operands for each - // Instruction. - ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); - ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); - unsigned OperandLength = A.OperVals.size(); - - // For each operand, get the value numbering and ensure it is consistent. - for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { - unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; - unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; - - // Attempt to add a set with only the target value. If there is no mapping - // we can create it here. - // - // For an instruction like a subtraction: - // IRSimilarityCandidateA: IRSimilarityCandidateB: - // %resultA = sub %a, %b %resultB = sub %d, %e - // - // We map %a -> %d and %b -> %e. - // - // And check to see whether their mapping is consistent in - // checkNumberingAndReplace. - - if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) - return false; - - if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) - return false; - } - return true; -} - -bool IRSimilarityCandidate::compareCommutativeOperandMapping( - OperandMapping A, OperandMapping B) { - DenseSet<unsigned> ValueNumbersA; - DenseSet<unsigned> ValueNumbersB; - - ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); - ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); - unsigned OperandLength = A.OperVals.size(); - - // Find the value number sets for the operands. - for (unsigned Idx = 0; Idx < OperandLength; - Idx++, VItA++, VItB++) { - ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); - ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); - } - - // Iterate over the operands in the first IRSimilarityCandidate and make sure - // there exists a possible mapping with the operands in the second - // IRSimilarityCandidate. - if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, - A.ValueNumberMapping, A.OperVals, - ValueNumbersB)) - return false; - - // Iterate over the operands in the second IRSimilarityCandidate and make sure - // there exists a possible mapping with the operands in the first - // IRSimilarityCandidate. - if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, - B.ValueNumberMapping, B.OperVals, - ValueNumbersA)) - return false; - - return true; -} - -bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, - const IRSimilarityCandidate &B) { - if (A.getLength() != B.getLength()) - return false; - - if (A.ValueToNumber.size() != B.ValueToNumber.size()) - return false; - - iterator ItA = A.begin(); - iterator ItB = B.begin(); - - // These sets create a create a mapping between the values in one candidate - // to values in the other candidate. If we create a set with one element, - // and that same element maps to the original element in the candidate - // we have a good mapping. - DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; - DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; - DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; - - bool WasInserted; - - // Iterate over the instructions contained in each candidate - unsigned SectionLength = A.getStartIdx() + A.getLength(); - for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; - ItA++, ItB++, Loc++) { - // Make sure the instructions are similar to one another. - if (!isClose(*ItA, *ItB)) - return false; - - Instruction *IA = ItA->Inst; - Instruction *IB = ItB->Inst; - - if (!ItA->Legal || !ItB->Legal) - return false; - - // Get the operand sets for the instructions. - ArrayRef<Value *> OperValsA = ItA->OperVals; - ArrayRef<Value *> OperValsB = ItB->OperVals; - - unsigned InstValA = A.ValueToNumber.find(IA)->second; - unsigned InstValB = B.ValueToNumber.find(IB)->second; - - // Ensure that the mappings for the instructions exists. - std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( - std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); - if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) - return false; - - std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( - std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); - if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) - return false; - - // We have different paths for commutative instructions and non-commutative - // instructions since commutative instructions could allow multiple mappings - // to certain values. - if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { - if (!compareCommutativeOperandMapping( - {A, OperValsA, ValueNumberMappingA}, - {B, OperValsB, ValueNumberMappingB})) - return false; - continue; - } - - // Handle the non-commutative cases. - if (!compareNonCommutativeOperandMapping( - {A, OperValsA, ValueNumberMappingA}, - {B, OperValsB, ValueNumberMappingB})) - return false; - } - return true; -} - -bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, - const IRSimilarityCandidate &B) { - auto DoesOverlap = [](const IRSimilarityCandidate &X, - const IRSimilarityCandidate &Y) { - // Check: - // XXXXXX X starts before Y ends - // YYYYYYY Y starts after X starts - return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; - }; - - return DoesOverlap(A, B) || DoesOverlap(B, A); -} - -void IRSimilarityIdentifier::populateMapper( - Module &M, std::vector<IRInstructionData *> &InstrList, - std::vector<unsigned> &IntegerMapping) { - - std::vector<IRInstructionData *> InstrListForModule; - std::vector<unsigned> IntegerMappingForModule; - // Iterate over the functions in the module to map each Instruction in each - // BasicBlock to an unsigned integer. - for (Function &F : M) { - - if (F.empty()) - continue; - - for (BasicBlock &BB : F) { - - if (BB.sizeWithoutDebug() < 2) - continue; - - // BB has potential to have similarity since it has a size greater than 2 - // and can therefore match other regions greater than 2. Map it to a list - // of unsigned integers. - Mapper.convertToUnsignedVec(BB, InstrListForModule, - IntegerMappingForModule); - } - } - - // Insert the InstrListForModule at the end of the overall InstrList so that - // we can have a long InstrList for the entire set of Modules being analyzed. - llvm::append_range(InstrList, InstrListForModule); - // Do the same as above, but for IntegerMapping. - llvm::append_range(IntegerMapping, IntegerMappingForModule); -} - -void IRSimilarityIdentifier::populateMapper( - ArrayRef<std::unique_ptr<Module>> &Modules, - std::vector<IRInstructionData *> &InstrList, - std::vector<unsigned> &IntegerMapping) { - - // Iterate over, and map the instructions in each module. - for (const std::unique_ptr<Module> &M : Modules) - populateMapper(*M, InstrList, IntegerMapping); -} - -/// From a repeated subsequence, find all the different instances of the -/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from -/// the IRInstructionData in subsequence. -/// -/// \param [in] Mapper - The instruction mapper for sanity checks. -/// \param [in] InstrList - The vector that holds the instruction data. -/// \param [in] IntegerMapping - The vector that holds the mapped integers. -/// \param [out] CandsForRepSubstring - The vector to store the generated -/// IRSimilarityCandidates. -static void createCandidatesFromSuffixTree( - IRInstructionMapper Mapper, std::vector<IRInstructionData *> &InstrList, - std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, - std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { - - unsigned StringLen = RS.Length; - - // Create an IRSimilarityCandidate for instance of this subsequence \p RS. - for (const unsigned &StartIdx : RS.StartIndices) { - unsigned EndIdx = StartIdx + StringLen - 1; - - // Check that this subsequence does not contain an illegal instruction. - bool ContainsIllegal = false; - for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { - unsigned Key = IntegerMapping[CurrIdx]; - if (Key > Mapper.IllegalInstrNumber) { - ContainsIllegal = true; - break; - } - } - - // If we have an illegal instruction, we should not create an - // IRSimilarityCandidate for this region. - if (ContainsIllegal) - continue; - - // We are getting iterators to the instructions in this region of code - // by advancing the start and end indices from the start of the - // InstrList. - std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); - std::advance(StartIt, StartIdx); - std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); - std::advance(EndIt, EndIdx); - - CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); - } -} - -/// From the list of IRSimilarityCandidates, perform a comparison between each -/// IRSimilarityCandidate to determine if there are overlapping -/// IRInstructionData, or if they do not have the same structure. -/// -/// \param [in] CandsForRepSubstring - The vector containing the -/// IRSimilarityCandidates. -/// \param [out] StructuralGroups - the mapping of unsigned integers to vector -/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the -/// vector are structurally similar to one another. -static void findCandidateStructures( - std::vector<IRSimilarityCandidate> &CandsForRepSubstring, - DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { - std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, - InnerCandEndIt; - - // IRSimilarityCandidates each have a structure for operand use. It is - // possible that two instances of the same subsequences have different - // structure. Each type of structure found is assigned a number. This - // DenseMap maps an IRSimilarityCandidate to which type of similarity - // discovered it fits within. - DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; - - // Find the compatibility from each candidate to the others to determine - // which candidates overlap and which have the same structure by mapping - // each structure to a different group. - bool SameStructure; - bool Inserted; - unsigned CurrentGroupNum = 0; - unsigned OuterGroupNum; - DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; - DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; - DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; - - // Iterate over the candidates to determine its structural and overlapping - // compatibility with other instructions - for (CandIt = CandsForRepSubstring.begin(), - CandEndIt = CandsForRepSubstring.end(); - CandIt != CandEndIt; CandIt++) { - - // Determine if it has an assigned structural group already. - CandToGroupIt = CandToGroup.find(&*CandIt); - if (CandToGroupIt == CandToGroup.end()) { - // If not, we assign it one, and add it to our mapping. - std::tie(CandToGroupIt, Inserted) = - CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); - } - - // Get the structural group number from the iterator. - OuterGroupNum = CandToGroupIt->second; - - // Check if we already have a list of IRSimilarityCandidates for the current - // structural group. Create one if one does not exist. - CurrentGroupPair = StructuralGroups.find(OuterGroupNum); - if (CurrentGroupPair == StructuralGroups.end()) - std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( - std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); - - // Iterate over the IRSimilarityCandidates following the current - // IRSimilarityCandidate in the list to determine whether the two - // IRSimilarityCandidates are compatible. This is so we do not repeat pairs - // of IRSimilarityCandidates. - for (InnerCandIt = std::next(CandIt), - InnerCandEndIt = CandsForRepSubstring.end(); - InnerCandIt != InnerCandEndIt; InnerCandIt++) { - - // We check if the inner item has a group already, if it does, we skip it. - CandToGroupItInner = CandToGroup.find(&*InnerCandIt); - if (CandToGroupItInner != CandToGroup.end()) - continue; - - // Otherwise we determine if they have the same structure and add it to - // vector if they match. - SameStructure = - IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt); - if (!SameStructure) - continue; - - CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); - CurrentGroupPair->second.push_back(*InnerCandIt); - } - } -} - -void IRSimilarityIdentifier::findCandidates( - std::vector<IRInstructionData *> &InstrList, - std::vector<unsigned> &IntegerMapping) { - SuffixTree ST(IntegerMapping); - - std::vector<IRSimilarityCandidate> CandsForRepSubstring; - std::vector<SimilarityGroup> NewCandidateGroups; - - DenseMap<unsigned, SimilarityGroup> StructuralGroups; - - // Iterate over the subsequences found by the Suffix Tree to create - // IRSimilarityCandidates for each repeated subsequence and determine which - // instances are structurally similar to one another. - for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { - createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, *It, - CandsForRepSubstring); - - if (CandsForRepSubstring.size() < 2) - continue; - - findCandidateStructures(CandsForRepSubstring, StructuralGroups); - for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) - // We only add the group if it contains more than one - // IRSimilarityCandidate. If there is only one, that means there is no - // other repeated subsequence with the same structure. - if (Group.second.size() > 1) - SimilarityCandidates->push_back(Group.second); - - CandsForRepSubstring.clear(); - StructuralGroups.clear(); - NewCandidateGroups.clear(); - } -} - -SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( - ArrayRef<std::unique_ptr<Module>> Modules) { - resetSimilarityCandidates(); - - std::vector<IRInstructionData *> InstrList; - std::vector<unsigned> IntegerMapping; - - populateMapper(Modules, InstrList, IntegerMapping); - findCandidates(InstrList, IntegerMapping); - - return SimilarityCandidates.getValue(); -} - -SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { - resetSimilarityCandidates(); - - std::vector<IRInstructionData *> InstrList; - std::vector<unsigned> IntegerMapping; - - populateMapper(M, InstrList, IntegerMapping); - findCandidates(InstrList, IntegerMapping); - - return SimilarityCandidates.getValue(); -} - -INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", - "ir-similarity-identifier", false, true) - -IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() - : ModulePass(ID) { - initializeIRSimilarityIdentifierWrapperPassPass( - *PassRegistry::getPassRegistry()); -} - -bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { - IRSI.reset(new IRSimilarityIdentifier(M)); - return false; -} - -bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { - IRSI.reset(); - return false; -} - -bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { - // All the real work is done in the constructor for the pass. - IRSI.reset(new IRSimilarityIdentifier(M)); - return false; -} - -AnalysisKey IRSimilarityAnalysis::Key; -IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, - ModuleAnalysisManager &) { - - return IRSimilarityIdentifier(M); -} - -PreservedAnalyses -IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { - IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); - Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); - - for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { - OS << CandVec.size() << " candidates of length " - << CandVec.begin()->getLength() << ". Found in: \n"; - for (IRSimilarityCandidate &Cand : CandVec) { - OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() - << ", Basic Block: "; - if (Cand.front()->Inst->getParent()->getName().str() == "") - OS << "(unnamed)\n"; - else - OS << Cand.front()->Inst->getParent()->getName().str() << "\n"; - } - } - - return PreservedAnalyses::all(); -} - -char IRSimilarityIdentifierWrapperPass::ID = 0; +//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Implementation file for the IRSimilarityIdentifier for identifying +// similarities in IR including the IRInstructionMapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/User.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/SuffixTree.h" + +using namespace llvm; +using namespace IRSimilarity; + +IRInstructionData::IRInstructionData(Instruction &I, bool Legality, + IRInstructionDataList &IDList) + : Inst(&I), Legal(Legality), IDL(&IDList) { + // We check for whether we have a comparison instruction. If it is, we + // find the "less than" version of the predicate for consistency for + // comparison instructions throught the program. + if (CmpInst *C = dyn_cast<CmpInst>(&I)) { + CmpInst::Predicate Predicate = predicateForConsistency(C); + if (Predicate != C->getPredicate()) + RevisedPredicate = Predicate; + } + + // Here we collect the operands and their types for determining whether + // the structure of the operand use matches between two different candidates. + for (Use &OI : I.operands()) { + if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) { + // If we have a CmpInst where the predicate is reversed, it means the + // operands must be reversed as well. + OperVals.insert(OperVals.begin(), OI.get()); + continue; + } + + OperVals.push_back(OI.get()); + } +} + +CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { + switch (CI->getPredicate()) { + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SGE: + case CmpInst::ICMP_UGE: + return CI->getSwappedPredicate(); + default: + return CI->getPredicate(); + } +} + +CmpInst::Predicate IRInstructionData::getPredicate() const { + assert(isa<CmpInst>(Inst) && + "Can only get a predicate from a compare instruction"); + + if (RevisedPredicate.hasValue()) + return RevisedPredicate.getValue(); + + return cast<CmpInst>(Inst)->getPredicate(); +} + +static StringRef getCalledFunctionName(CallInst &CI) { + assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?"); + + return CI.getCalledFunction()->getName(); +} + +bool IRSimilarity::isClose(const IRInstructionData &A, + const IRInstructionData &B) { + + if (!A.Legal || !B.Legal) + return false; + + // Check if we are performing the same sort of operation on the same types + // but not on the same values. + if (!A.Inst->isSameOperationAs(B.Inst)) { + // If there is a predicate, this means that either there is a swapped + // predicate, or that the types are different, we want to make sure that + // the predicates are equivalent via swapping. + if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { + + if (A.getPredicate() != B.getPredicate()) + return false; + + // If the predicates are the same via swap, make sure that the types are + // still the same. + auto ZippedTypes = zip(A.OperVals, B.OperVals); + + return all_of( + ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { + return std::get<0>(R)->getType() == std::get<1>(R)->getType(); + }); + } + + return false; + } + + // Since any GEP Instruction operands after the first operand cannot be + // defined by a register, we must make sure that the operands after the first + // are the same in the two instructions + if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { + auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); + + // If the instructions do not have the same inbounds restrictions, we do + // not consider them the same. + if (GEP->isInBounds() != OtherGEP->isInBounds()) + return false; + + auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); + + // We increment here since we do not care about the first instruction, + // we only care about the following operands since they must be the + // exact same to be considered similar. + return all_of(drop_begin(ZippedOperands), + [](std::tuple<llvm::Use &, llvm::Use &> R) { + return std::get<0>(R) == std::get<1>(R); + }); + } + + // If the instructions are functions, we make sure that the function name is + // the same. We already know that the types are since is isSameOperationAs is + // true. + if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { + CallInst *CIA = cast<CallInst>(A.Inst); + CallInst *CIB = cast<CallInst>(B.Inst); + if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0) + return false; + } + + return true; +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +void IRInstructionMapper::convertToUnsignedVec( + BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping) { + BasicBlock::iterator It = BB.begin(); + + std::vector<unsigned> IntegerMappingForBB; + std::vector<IRInstructionData *> InstrListForBB; + + HaveLegalRange = false; + CanCombineWithPrevInstr = false; + AddedIllegalLastTime = true; + + for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { + switch (InstClassifier.visit(*It)) { + case InstrType::Legal: + mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); + break; + case InstrType::Illegal: + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); + break; + case InstrType::Invisible: + AddedIllegalLastTime = false; + break; + } + } + + if (HaveLegalRange) { + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); + for_each(InstrListForBB, + [this](IRInstructionData *ID) { this->IDL->push_back(*ID); }); + llvm::append_range(InstrList, InstrListForBB); + llvm::append_range(IntegerMapping, IntegerMappingForBB); + } +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +unsigned IRInstructionMapper::mapToLegalUnsigned( + BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, + std::vector<IRInstructionData *> &InstrListForBB) { + // We added something legal, so we should unset the AddedLegalLastTime + // flag. + AddedIllegalLastTime = false; + + // If we have at least two adjacent legal instructions (which may have + // invisible instructions in between), remember that. + if (CanCombineWithPrevInstr) + HaveLegalRange = true; + CanCombineWithPrevInstr = true; + + // Get the integer for this instruction or give it the current + // LegalInstrNumber. + IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); + InstrListForBB.push_back(ID); + + // Add to the instruction list + bool WasInserted; + DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator + ResultIt; + std::tie(ResultIt, WasInserted) = + InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); + unsigned INumber = ResultIt->second; + + // There was an insertion. + if (WasInserted) + LegalInstrNumber++; + + IntegerMappingForBB.push_back(INumber); + + // Make sure we don't overflow or use any integers reserved by the DenseMap. + assert(LegalInstrNumber < IllegalInstrNumber && + "Instruction mapping overflow!"); + + assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + + return INumber; +} + +IRInstructionData * +IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, + IRInstructionDataList &IDL) { + return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); +} + +IRInstructionDataList * +IRInstructionMapper::allocateIRInstructionDataList() { + return new (IDLAllocator->Allocate()) IRInstructionDataList(); +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +unsigned IRInstructionMapper::mapToIllegalUnsigned( + BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, + std::vector<IRInstructionData *> &InstrListForBB, bool End) { + // Can't combine an illegal instruction. Set the flag. + CanCombineWithPrevInstr = false; + + // Only add one illegal number per range of legal numbers. + if (AddedIllegalLastTime) + return IllegalInstrNumber; + + IRInstructionData *ID = nullptr; + if (!End) + ID = allocateIRInstructionData(*It, false, *IDL); + InstrListForBB.push_back(ID); + + // Remember that we added an illegal number last time. + AddedIllegalLastTime = true; + unsigned INumber = IllegalInstrNumber; + IntegerMappingForBB.push_back(IllegalInstrNumber--); + + assert(LegalInstrNumber < IllegalInstrNumber && + "Instruction mapping overflow!"); + + assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + + assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + + return INumber; +} + +IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, + IRInstructionData *FirstInstIt, + IRInstructionData *LastInstIt) + : StartIdx(StartIdx), Len(Len) { + + assert(FirstInstIt != nullptr && "Instruction is nullptr!"); + assert(LastInstIt != nullptr && "Instruction is nullptr!"); + assert(StartIdx + Len > StartIdx && + "Overflow for IRSimilarityCandidate range?"); + assert(Len - 1 == static_cast<unsigned>(std::distance( + iterator(FirstInstIt), iterator(LastInstIt))) && + "Length of the first and last IRInstructionData do not match the " + "given length"); + + // We iterate over the given instructions, and map each unique value + // to a unique number in the IRSimilarityCandidate ValueToNumber and + // NumberToValue maps. A constant get its own value globally, the individual + // uses of the constants are not considered to be unique. + // + // IR: Mapping Added: + // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 + // %add2 = add i32 %a, %1 %add2 -> 4 + // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 + // + // when replace with global values, starting from 1, would be + // + // 3 = add i32 1, 2 + // 4 = add i32 1, 3 + // 6 = add i32 5, 2 + unsigned LocalValNumber = 1; + IRInstructionDataList::iterator ID = iterator(*FirstInstIt); + for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { + // Map the operand values to an unsigned integer if it does not already + // have an unsigned integer assigned to it. + for (Value *Arg : ID->OperVals) + if (ValueToNumber.find(Arg) == ValueToNumber.end()) { + ValueToNumber.try_emplace(Arg, LocalValNumber); + NumberToValue.try_emplace(LocalValNumber, Arg); + LocalValNumber++; + } + + // Mapping the instructions to an unsigned integer if it is not already + // exist in the mapping. + if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { + ValueToNumber.try_emplace(ID->Inst, LocalValNumber); + NumberToValue.try_emplace(LocalValNumber, ID->Inst); + LocalValNumber++; + } + } + + // Setting the first and last instruction data pointers for the candidate. If + // we got through the entire for loop without hitting an assert, we know + // that both of these instructions are not nullptrs. + FirstInst = FirstInstIt; + LastInst = LastInstIt; +} + +bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + if (A.getLength() != B.getLength()) + return false; + + auto InstrDataForBoth = + zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); + + return all_of(InstrDataForBoth, + [](std::tuple<IRInstructionData &, IRInstructionData &> R) { + IRInstructionData &A = std::get<0>(R); + IRInstructionData &B = std::get<1>(R); + if (!A.Legal || !B.Legal) + return false; + return isClose(A, B); + }); +} + +/// Determine if one or more of the assigned global value numbers for the +/// operands in \p TargetValueNumbers is in the current mapping set for operand +/// numbers in \p SourceOperands. The set of possible corresponding global +/// value numbers are replaced with the most recent version of compatible +/// values. +/// +/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global +/// value number for the source IRInstructionCandidate. +/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source +/// IRSimilarityCandidate global value numbers to a set of possible numbers in +/// the target. +/// \param [in] SourceOperands - The operands in the original +/// IRSimilarityCandidate in the current instruction. +/// \param [in] TargetValueNumbers - The global value numbers of the operands in +/// the corresponding Instruction in the other IRSimilarityCandidate. +/// \returns true if there exists a possible mapping between the source +/// Instruction operands and the target Instruction operands, and false if not. +static bool checkNumberingAndReplaceCommutative( + const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, + DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, + ArrayRef<Value *> &SourceOperands, + DenseSet<unsigned> &TargetValueNumbers){ + + DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; + + unsigned ArgVal; + bool WasInserted; + + // Iterate over the operands in the source IRSimilarityCandidate to determine + // whether there exists an operand in the other IRSimilarityCandidate that + // creates a valid mapping of Value to Value between the + // IRSimilarityCaniddates. + for (Value *V : SourceOperands) { + ArgVal = SourceValueToNumberMapping.find(V)->second; + + std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( + std::make_pair(ArgVal, TargetValueNumbers)); + + // Instead of finding a current mapping, we inserted a set. This means a + // mapping did not exist for the source Instruction operand, it has no + // current constraints we need to check. + if (WasInserted) + continue; + + // If a mapping already exists for the source operand to the values in the + // other IRSimilarityCandidate we need to iterate over the items in other + // IRSimilarityCandidate's Instruction to determine whether there is a valid + // mapping of Value to Value. + DenseSet<unsigned> NewSet; + for (unsigned &Curr : ValueMappingIt->second) + // If we can find the value in the mapping, we add it to the new set. + if (TargetValueNumbers.contains(Curr)) + NewSet.insert(Curr); + + // If we could not find a Value, return 0. + if (NewSet.empty()) + return false; + + // Otherwise replace the old mapping with the newly constructed one. + if (NewSet.size() != ValueMappingIt->second.size()) + ValueMappingIt->second.swap(NewSet); + + // We have reached no conclusions about the mapping, and cannot remove + // any items from the other operands, so we move to check the next operand. + if (ValueMappingIt->second.size() != 1) + continue; + + + unsigned ValToRemove = *ValueMappingIt->second.begin(); + // When there is only one item left in the mapping for and operand, remove + // the value from the other operands. If it results in there being no + // mapping, return false, it means the mapping is wrong + for (Value *InnerV : SourceOperands) { + if (V == InnerV) + continue; + + unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; + ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); + if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) + continue; + + ValueMappingIt->second.erase(ValToRemove); + if (ValueMappingIt->second.empty()) + return false; + } + } + + return true; +} + +/// Determine if operand number \p TargetArgVal is in the current mapping set +/// for operand number \p SourceArgVal. +/// +/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global +/// value numbers from source IRSimilarityCandidate to target +/// IRSimilarityCandidate. +/// \param [in] SourceArgVal The global value number for an operand in the +/// in the original candidate. +/// \param [in] TargetArgVal The global value number for the corresponding +/// operand in the other candidate. +/// \returns True if there exists a mapping and false if not. +bool checkNumberingAndReplace( + DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, + unsigned SourceArgVal, unsigned TargetArgVal) { + // We are given two unsigned integers representing the global values of + // the operands in different IRSimilarityCandidates and a current mapping + // between the two. + // + // Source Operand GVN: 1 + // Target Operand GVN: 2 + // CurrentMapping: {1: {1, 2}} + // + // Since we have mapping, and the target operand is contained in the set, we + // update it to: + // CurrentMapping: {1: {2}} + // and can return true. But, if the mapping was + // CurrentMapping: {1: {3}} + // we would return false. + + bool WasInserted; + DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; + + std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( + std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); + + // If we created a new mapping, then we are done. + if (WasInserted) + return true; + + // If there is more than one option in the mapping set, and the target value + // is included in the mapping set replace that set with one that only includes + // the target value, as it is the only valid mapping via the non commutative + // instruction. + + DenseSet<unsigned> &TargetSet = Val->second; + if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { + TargetSet.clear(); + TargetSet.insert(TargetArgVal); + return true; + } + + // Return true if we can find the value in the set. + return TargetSet.contains(TargetArgVal); +} + +bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( + OperandMapping A, OperandMapping B) { + // Iterators to keep track of where we are in the operands for each + // Instruction. + ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); + ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); + unsigned OperandLength = A.OperVals.size(); + + // For each operand, get the value numbering and ensure it is consistent. + for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { + unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; + unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; + + // Attempt to add a set with only the target value. If there is no mapping + // we can create it here. + // + // For an instruction like a subtraction: + // IRSimilarityCandidateA: IRSimilarityCandidateB: + // %resultA = sub %a, %b %resultB = sub %d, %e + // + // We map %a -> %d and %b -> %e. + // + // And check to see whether their mapping is consistent in + // checkNumberingAndReplace. + + if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) + return false; + + if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) + return false; + } + return true; +} + +bool IRSimilarityCandidate::compareCommutativeOperandMapping( + OperandMapping A, OperandMapping B) { + DenseSet<unsigned> ValueNumbersA; + DenseSet<unsigned> ValueNumbersB; + + ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); + ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); + unsigned OperandLength = A.OperVals.size(); + + // Find the value number sets for the operands. + for (unsigned Idx = 0; Idx < OperandLength; + Idx++, VItA++, VItB++) { + ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); + ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); + } + + // Iterate over the operands in the first IRSimilarityCandidate and make sure + // there exists a possible mapping with the operands in the second + // IRSimilarityCandidate. + if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, + A.ValueNumberMapping, A.OperVals, + ValueNumbersB)) + return false; + + // Iterate over the operands in the second IRSimilarityCandidate and make sure + // there exists a possible mapping with the operands in the first + // IRSimilarityCandidate. + if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, + B.ValueNumberMapping, B.OperVals, + ValueNumbersA)) + return false; + + return true; +} + +bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + if (A.getLength() != B.getLength()) + return false; + + if (A.ValueToNumber.size() != B.ValueToNumber.size()) + return false; + + iterator ItA = A.begin(); + iterator ItB = B.begin(); + + // These sets create a create a mapping between the values in one candidate + // to values in the other candidate. If we create a set with one element, + // and that same element maps to the original element in the candidate + // we have a good mapping. + DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; + DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; + DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; + + bool WasInserted; + + // Iterate over the instructions contained in each candidate + unsigned SectionLength = A.getStartIdx() + A.getLength(); + for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; + ItA++, ItB++, Loc++) { + // Make sure the instructions are similar to one another. + if (!isClose(*ItA, *ItB)) + return false; + + Instruction *IA = ItA->Inst; + Instruction *IB = ItB->Inst; + + if (!ItA->Legal || !ItB->Legal) + return false; + + // Get the operand sets for the instructions. + ArrayRef<Value *> OperValsA = ItA->OperVals; + ArrayRef<Value *> OperValsB = ItB->OperVals; + + unsigned InstValA = A.ValueToNumber.find(IA)->second; + unsigned InstValB = B.ValueToNumber.find(IB)->second; + + // Ensure that the mappings for the instructions exists. + std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( + std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); + if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) + return false; + + std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( + std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); + if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) + return false; + + // We have different paths for commutative instructions and non-commutative + // instructions since commutative instructions could allow multiple mappings + // to certain values. + if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { + if (!compareCommutativeOperandMapping( + {A, OperValsA, ValueNumberMappingA}, + {B, OperValsB, ValueNumberMappingB})) + return false; + continue; + } + + // Handle the non-commutative cases. + if (!compareNonCommutativeOperandMapping( + {A, OperValsA, ValueNumberMappingA}, + {B, OperValsB, ValueNumberMappingB})) + return false; + } + return true; +} + +bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + auto DoesOverlap = [](const IRSimilarityCandidate &X, + const IRSimilarityCandidate &Y) { + // Check: + // XXXXXX X starts before Y ends + // YYYYYYY Y starts after X starts + return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; + }; + + return DoesOverlap(A, B) || DoesOverlap(B, A); +} + +void IRSimilarityIdentifier::populateMapper( + Module &M, std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping) { + + std::vector<IRInstructionData *> InstrListForModule; + std::vector<unsigned> IntegerMappingForModule; + // Iterate over the functions in the module to map each Instruction in each + // BasicBlock to an unsigned integer. + for (Function &F : M) { + + if (F.empty()) + continue; + + for (BasicBlock &BB : F) { + + if (BB.sizeWithoutDebug() < 2) + continue; + + // BB has potential to have similarity since it has a size greater than 2 + // and can therefore match other regions greater than 2. Map it to a list + // of unsigned integers. + Mapper.convertToUnsignedVec(BB, InstrListForModule, + IntegerMappingForModule); + } + } + + // Insert the InstrListForModule at the end of the overall InstrList so that + // we can have a long InstrList for the entire set of Modules being analyzed. + llvm::append_range(InstrList, InstrListForModule); + // Do the same as above, but for IntegerMapping. + llvm::append_range(IntegerMapping, IntegerMappingForModule); +} + +void IRSimilarityIdentifier::populateMapper( + ArrayRef<std::unique_ptr<Module>> &Modules, + std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping) { + + // Iterate over, and map the instructions in each module. + for (const std::unique_ptr<Module> &M : Modules) + populateMapper(*M, InstrList, IntegerMapping); +} + +/// From a repeated subsequence, find all the different instances of the +/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from +/// the IRInstructionData in subsequence. +/// +/// \param [in] Mapper - The instruction mapper for sanity checks. +/// \param [in] InstrList - The vector that holds the instruction data. +/// \param [in] IntegerMapping - The vector that holds the mapped integers. +/// \param [out] CandsForRepSubstring - The vector to store the generated +/// IRSimilarityCandidates. +static void createCandidatesFromSuffixTree( + IRInstructionMapper Mapper, std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, + std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { + + unsigned StringLen = RS.Length; + + // Create an IRSimilarityCandidate for instance of this subsequence \p RS. + for (const unsigned &StartIdx : RS.StartIndices) { + unsigned EndIdx = StartIdx + StringLen - 1; + + // Check that this subsequence does not contain an illegal instruction. + bool ContainsIllegal = false; + for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { + unsigned Key = IntegerMapping[CurrIdx]; + if (Key > Mapper.IllegalInstrNumber) { + ContainsIllegal = true; + break; + } + } + + // If we have an illegal instruction, we should not create an + // IRSimilarityCandidate for this region. + if (ContainsIllegal) + continue; + + // We are getting iterators to the instructions in this region of code + // by advancing the start and end indices from the start of the + // InstrList. + std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); + std::advance(StartIt, StartIdx); + std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); + std::advance(EndIt, EndIdx); + + CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); + } +} + +/// From the list of IRSimilarityCandidates, perform a comparison between each +/// IRSimilarityCandidate to determine if there are overlapping +/// IRInstructionData, or if they do not have the same structure. +/// +/// \param [in] CandsForRepSubstring - The vector containing the +/// IRSimilarityCandidates. +/// \param [out] StructuralGroups - the mapping of unsigned integers to vector +/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the +/// vector are structurally similar to one another. +static void findCandidateStructures( + std::vector<IRSimilarityCandidate> &CandsForRepSubstring, + DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { + std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, + InnerCandEndIt; + + // IRSimilarityCandidates each have a structure for operand use. It is + // possible that two instances of the same subsequences have different + // structure. Each type of structure found is assigned a number. This + // DenseMap maps an IRSimilarityCandidate to which type of similarity + // discovered it fits within. + DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; + + // Find the compatibility from each candidate to the others to determine + // which candidates overlap and which have the same structure by mapping + // each structure to a different group. + bool SameStructure; + bool Inserted; + unsigned CurrentGroupNum = 0; + unsigned OuterGroupNum; + DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; + DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; + DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; + + // Iterate over the candidates to determine its structural and overlapping + // compatibility with other instructions + for (CandIt = CandsForRepSubstring.begin(), + CandEndIt = CandsForRepSubstring.end(); + CandIt != CandEndIt; CandIt++) { + + // Determine if it has an assigned structural group already. + CandToGroupIt = CandToGroup.find(&*CandIt); + if (CandToGroupIt == CandToGroup.end()) { + // If not, we assign it one, and add it to our mapping. + std::tie(CandToGroupIt, Inserted) = + CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); + } + + // Get the structural group number from the iterator. + OuterGroupNum = CandToGroupIt->second; + + // Check if we already have a list of IRSimilarityCandidates for the current + // structural group. Create one if one does not exist. + CurrentGroupPair = StructuralGroups.find(OuterGroupNum); + if (CurrentGroupPair == StructuralGroups.end()) + std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( + std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); + + // Iterate over the IRSimilarityCandidates following the current + // IRSimilarityCandidate in the list to determine whether the two + // IRSimilarityCandidates are compatible. This is so we do not repeat pairs + // of IRSimilarityCandidates. + for (InnerCandIt = std::next(CandIt), + InnerCandEndIt = CandsForRepSubstring.end(); + InnerCandIt != InnerCandEndIt; InnerCandIt++) { + + // We check if the inner item has a group already, if it does, we skip it. + CandToGroupItInner = CandToGroup.find(&*InnerCandIt); + if (CandToGroupItInner != CandToGroup.end()) + continue; + + // Otherwise we determine if they have the same structure and add it to + // vector if they match. + SameStructure = + IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt); + if (!SameStructure) + continue; + + CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); + CurrentGroupPair->second.push_back(*InnerCandIt); + } + } +} + +void IRSimilarityIdentifier::findCandidates( + std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping) { + SuffixTree ST(IntegerMapping); + + std::vector<IRSimilarityCandidate> CandsForRepSubstring; + std::vector<SimilarityGroup> NewCandidateGroups; + + DenseMap<unsigned, SimilarityGroup> StructuralGroups; + + // Iterate over the subsequences found by the Suffix Tree to create + // IRSimilarityCandidates for each repeated subsequence and determine which + // instances are structurally similar to one another. + for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { + createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, *It, + CandsForRepSubstring); + + if (CandsForRepSubstring.size() < 2) + continue; + + findCandidateStructures(CandsForRepSubstring, StructuralGroups); + for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) + // We only add the group if it contains more than one + // IRSimilarityCandidate. If there is only one, that means there is no + // other repeated subsequence with the same structure. + if (Group.second.size() > 1) + SimilarityCandidates->push_back(Group.second); + + CandsForRepSubstring.clear(); + StructuralGroups.clear(); + NewCandidateGroups.clear(); + } +} + +SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( + ArrayRef<std::unique_ptr<Module>> Modules) { + resetSimilarityCandidates(); + + std::vector<IRInstructionData *> InstrList; + std::vector<unsigned> IntegerMapping; + + populateMapper(Modules, InstrList, IntegerMapping); + findCandidates(InstrList, IntegerMapping); + + return SimilarityCandidates.getValue(); +} + +SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { + resetSimilarityCandidates(); + + std::vector<IRInstructionData *> InstrList; + std::vector<unsigned> IntegerMapping; + + populateMapper(M, InstrList, IntegerMapping); + findCandidates(InstrList, IntegerMapping); + + return SimilarityCandidates.getValue(); +} + +INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", + "ir-similarity-identifier", false, true) + +IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() + : ModulePass(ID) { + initializeIRSimilarityIdentifierWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { + IRSI.reset(new IRSimilarityIdentifier(M)); + return false; +} + +bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { + IRSI.reset(); + return false; +} + +bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { + // All the real work is done in the constructor for the pass. + IRSI.reset(new IRSimilarityIdentifier(M)); + return false; +} + +AnalysisKey IRSimilarityAnalysis::Key; +IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, + ModuleAnalysisManager &) { + + return IRSimilarityIdentifier(M); +} + +PreservedAnalyses +IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { + IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); + Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); + + for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { + OS << CandVec.size() << " candidates of length " + << CandVec.begin()->getLength() << ". Found in: \n"; + for (IRSimilarityCandidate &Cand : CandVec) { + OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() + << ", Basic Block: "; + if (Cand.front()->Inst->getParent()->getName().str() == "") + OS << "(unnamed)\n"; + else + OS << Cand.front()->Inst->getParent()->getName().str() << "\n"; + } + } + + return PreservedAnalyses::all(); +} + +char IRSimilarityIdentifierWrapperPass::ID = 0; diff --git a/contrib/libs/llvm12/lib/Analysis/IVDescriptors.cpp b/contrib/libs/llvm12/lib/Analysis/IVDescriptors.cpp index 94a24ccf21..9902184bb0 100644 --- a/contrib/libs/llvm12/lib/Analysis/IVDescriptors.cpp +++ b/contrib/libs/llvm12/lib/Analysis/IVDescriptors.cpp @@ -47,36 +47,36 @@ bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, return true; } -bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) { +bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) { switch (Kind) { default: break; - case RecurKind::Add: - case RecurKind::Mul: - case RecurKind::Or: - case RecurKind::And: - case RecurKind::Xor: - case RecurKind::SMax: - case RecurKind::SMin: - case RecurKind::UMax: - case RecurKind::UMin: + case RecurKind::Add: + case RecurKind::Mul: + case RecurKind::Or: + case RecurKind::And: + case RecurKind::Xor: + case RecurKind::SMax: + case RecurKind::SMin: + case RecurKind::UMax: + case RecurKind::UMin: return true; } return false; } -bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) { - return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind); +bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) { + return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind); } -bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) { +bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) { switch (Kind) { default: break; - case RecurKind::Add: - case RecurKind::Mul: - case RecurKind::FAdd: - case RecurKind::FMul: + case RecurKind::Add: + case RecurKind::Mul: + case RecurKind::FAdd: + case RecurKind::FMul: return true; } return false; @@ -189,7 +189,7 @@ static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, } } -bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, +bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, bool HasFunNoNaNAttr, RecurrenceDescriptor &RedDes, DemandedBits *DB, @@ -243,14 +243,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, if (RecurrenceType->isFloatingPointTy()) { if (!isFloatingPointRecurrenceKind(Kind)) return false; - } else if (RecurrenceType->isIntegerTy()) { + } else if (RecurrenceType->isIntegerTy()) { if (!isIntegerRecurrenceKind(Kind)) return false; if (isArithmeticRecurrenceKind(Kind)) Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); - } else { - // Pointer min/max may exist, but it is not supported as a reduction op. - return false; + } else { + // Pointer min/max may exist, but it is not supported as a reduction op. + return false; } Worklist.push_back(Start); @@ -276,7 +276,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // * An instruction type other than PHI or the reduction operation. // * A PHI in the header other than the initial PHI. while (!Worklist.empty()) { - Instruction *Cur = Worklist.pop_back_val(); + Instruction *Cur = Worklist.pop_back_val(); // No Users. // If the instruction has no users then this is a broken chain and can't be @@ -307,35 +307,35 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // FIXME: FMF is allowed on phi, but propagation is not handled correctly. if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) FMF &= ReduxDesc.getPatternInst()->getFastMathFlags(); - // Update this reduction kind if we matched a new instruction. - // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind' - // state accurate while processing the worklist? - if (ReduxDesc.getRecKind() != RecurKind::None) - Kind = ReduxDesc.getRecKind(); + // Update this reduction kind if we matched a new instruction. + // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind' + // state accurate while processing the worklist? + if (ReduxDesc.getRecKind() != RecurKind::None) + Kind = ReduxDesc.getRecKind(); } bool IsASelect = isa<SelectInst>(Cur); // A conditional reduction operation must only have 2 or less uses in // VisitedInsts. - if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) && + if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) && hasMultipleUsesOf(Cur, VisitedInsts, 2)) return false; // A reduction operation must only have one use of the reduction value. - if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) && - hasMultipleUsesOf(Cur, VisitedInsts, 1)) + if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) && + hasMultipleUsesOf(Cur, VisitedInsts, 1)) return false; // All inputs to a PHI node must be a reduction value. if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) return false; - if (isIntMinMaxRecurrenceKind(Kind) && + if (isIntMinMaxRecurrenceKind(Kind) && (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) ++NumCmpSelectPatternInst; - if (isFPMinMaxRecurrenceKind(Kind) && - (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) + if (isFPMinMaxRecurrenceKind(Kind) && + (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) ++NumCmpSelectPatternInst; // Check whether we found a reduction operator. @@ -400,7 +400,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // This means we have seen one but not the other instruction of the // pattern or more than just a select and cmp. - if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2) + if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2) return false; if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) @@ -418,7 +418,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // can be ignore in the cost model. If we compute a different type than we // did when evaluating the 'and', the 'and' will not be eliminated, and we // will end up with different kinds of operations in the recurrence - // expression (e.g., IntegerAND, IntegerADD). We give up if this is + // expression (e.g., IntegerAND, IntegerADD). We give up if this is // the case. // // The vectorizer relies on InstCombine to perform the actual @@ -446,8 +446,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // instructions that are a part of the reduction. The vectorizer cost // model could then apply the recurrence type to these instructions, // without needing a white list of instructions to ignore. - // This may also be useful for the inloop reductions, if it can be - // kept simple enough. + // This may also be useful for the inloop reductions, if it can be + // kept simple enough. collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); } @@ -458,50 +458,50 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // is saved as part of the RecurrenceDescriptor. // Save the description of this reduction variable. - RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, - ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, - IsSigned, CastInsts); + RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, + ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, + IsSigned, CastInsts); RedDes = RD; return true; } RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, - const InstDesc &Prev) { - assert((isa<CmpInst>(I) || isa<SelectInst>(I)) && - "Expected a cmp or select instruction"); +RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, + const InstDesc &Prev) { + assert((isa<CmpInst>(I) || isa<SelectInst>(I)) && + "Expected a cmp or select instruction"); // We must handle the select(cmp()) as a single instruction. Advance to the // select. - CmpInst::Predicate Pred; - if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) { - if (auto *Select = dyn_cast<SelectInst>(*I->user_begin())) - return InstDesc(Select, Prev.getRecKind()); + CmpInst::Predicate Pred; + if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) { + if (auto *Select = dyn_cast<SelectInst>(*I->user_begin())) + return InstDesc(Select, Prev.getRecKind()); } - // Only match select with single use cmp condition. - if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), - m_Value()))) + // Only match select with single use cmp condition. + if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), + m_Value()))) return InstDesc(false, I); // Look for a min/max pattern. - if (match(I, m_UMin(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::UMin); - if (match(I, m_UMax(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::UMax); - if (match(I, m_SMax(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::SMax); - if (match(I, m_SMin(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::SMin); - if (match(I, m_OrdFMin(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::FMin); - if (match(I, m_OrdFMax(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::FMax); - if (match(I, m_UnordFMin(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::FMin); - if (match(I, m_UnordFMax(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::FMax); + if (match(I, m_UMin(m_Value(), m_Value()))) + return InstDesc(I, RecurKind::UMin); + if (match(I, m_UMax(m_Value(), m_Value()))) + return InstDesc(I, RecurKind::UMax); + if (match(I, m_SMax(m_Value(), m_Value()))) + return InstDesc(I, RecurKind::SMax); + if (match(I, m_SMin(m_Value(), m_Value()))) + return InstDesc(I, RecurKind::SMin); + if (match(I, m_OrdFMin(m_Value(), m_Value()))) + return InstDesc(I, RecurKind::FMin); + if (match(I, m_OrdFMax(m_Value(), m_Value()))) + return InstDesc(I, RecurKind::FMax); + if (match(I, m_UnordFMin(m_Value(), m_Value()))) + return InstDesc(I, RecurKind::FMin); + if (match(I, m_UnordFMax(m_Value(), m_Value()))) + return InstDesc(I, RecurKind::FMax); return InstDesc(false, I); } @@ -516,7 +516,7 @@ RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, /// %add = fadd %0, %sum.1 /// %sum.2 = select %cmp, %add, %sum.1 RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) { +RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) { SelectInst *SI = dyn_cast<SelectInst>(I); if (!SI) return InstDesc(false, I); @@ -544,16 +544,16 @@ RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) { if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) || m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) && I1->isFast()) - return InstDesc(Kind == RecurKind::FAdd, SI); + return InstDesc(Kind == RecurKind::FAdd, SI); if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) - return InstDesc(Kind == RecurKind::FMul, SI); + return InstDesc(Kind == RecurKind::FMul, SI); return InstDesc(false, I); } RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind, +RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { Instruction *UAI = Prev.getUnsafeAlgebraInst(); if (!UAI && isa<FPMathOperator>(I) && !I->hasAllowReassoc()) @@ -563,32 +563,32 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind, default: return InstDesc(false, I); case Instruction::PHI: - return InstDesc(I, Prev.getRecKind(), Prev.getUnsafeAlgebraInst()); + return InstDesc(I, Prev.getRecKind(), Prev.getUnsafeAlgebraInst()); case Instruction::Sub: case Instruction::Add: - return InstDesc(Kind == RecurKind::Add, I); + return InstDesc(Kind == RecurKind::Add, I); case Instruction::Mul: - return InstDesc(Kind == RecurKind::Mul, I); + return InstDesc(Kind == RecurKind::Mul, I); case Instruction::And: - return InstDesc(Kind == RecurKind::And, I); + return InstDesc(Kind == RecurKind::And, I); case Instruction::Or: - return InstDesc(Kind == RecurKind::Or, I); + return InstDesc(Kind == RecurKind::Or, I); case Instruction::Xor: - return InstDesc(Kind == RecurKind::Xor, I); - case Instruction::FDiv: + return InstDesc(Kind == RecurKind::Xor, I); + case Instruction::FDiv: case Instruction::FMul: - return InstDesc(Kind == RecurKind::FMul, I, UAI); + return InstDesc(Kind == RecurKind::FMul, I, UAI); case Instruction::FSub: case Instruction::FAdd: - return InstDesc(Kind == RecurKind::FAdd, I, UAI); + return InstDesc(Kind == RecurKind::FAdd, I, UAI); case Instruction::Select: - if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) + if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) return isConditionalRdxPattern(Kind, I); LLVM_FALLTHROUGH; case Instruction::FCmp: case Instruction::ICmp: - if (!isIntMinMaxRecurrenceKind(Kind) && - (!HasFunNoNaNAttr || !isFPMinMaxRecurrenceKind(Kind))) + if (!isIntMinMaxRecurrenceKind(Kind) && + (!HasFunNoNaNAttr || !isFPMinMaxRecurrenceKind(Kind))) return InstDesc(false, I); return isMinMaxSelectCmpPattern(I, Prev); } @@ -618,71 +618,71 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, bool HasFunNoNaNAttr = F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; - if (AddReductionVar(Phi, RecurKind::Add, TheLoop, HasFunNoNaNAttr, RedDes, DB, + if (AddReductionVar(Phi, RecurKind::Add, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, HasFunNoNaNAttr, RedDes, DB, + if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::Or, TheLoop, HasFunNoNaNAttr, RedDes, DB, + if (AddReductionVar(Phi, RecurKind::Or, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::And, TheLoop, HasFunNoNaNAttr, RedDes, DB, + if (AddReductionVar(Phi, RecurKind::And, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, HasFunNoNaNAttr, RedDes, DB, + if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, HasFunNoNaNAttr, RedDes, - DB, AC, DT)) { - LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, HasFunNoNaNAttr, RedDes, - DB, AC, DT)) { - LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, HasFunNoNaNAttr, RedDes, - DB, AC, DT)) { - LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, HasFunNoNaNAttr, RedDes, + if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, HasFunNoNaNAttr, RedDes, - DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { + LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { + LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { + LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, HasFunNoNaNAttr, RedDes, - DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, HasFunNoNaNAttr, RedDes, - DB, AC, DT)) { - LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, HasFunNoNaNAttr, RedDes, - DB, AC, DT)) { - LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n"); + if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { + LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n"); return true; } + if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { + LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n"); + return true; + } // Not a reduction of known type. return false; } @@ -764,143 +764,143 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence( /// This function returns the identity element (or neutral element) for /// the operation K. -Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) { +Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) { switch (K) { - case RecurKind::Xor: - case RecurKind::Add: - case RecurKind::Or: + case RecurKind::Xor: + case RecurKind::Add: + case RecurKind::Or: // Adding, Xoring, Oring zero to a number does not change it. return ConstantInt::get(Tp, 0); - case RecurKind::Mul: + case RecurKind::Mul: // Multiplying a number by 1 does not change it. return ConstantInt::get(Tp, 1); - case RecurKind::And: + case RecurKind::And: // AND-ing a number with an all-1 value does not change it. return ConstantInt::get(Tp, -1, true); - case RecurKind::FMul: + case RecurKind::FMul: // Multiplying a number by 1 does not change it. return ConstantFP::get(Tp, 1.0L); - case RecurKind::FAdd: + case RecurKind::FAdd: // Adding zero to a number does not change it. return ConstantFP::get(Tp, 0.0L); - case RecurKind::UMin: - return ConstantInt::get(Tp, -1); - case RecurKind::UMax: - return ConstantInt::get(Tp, 0); - case RecurKind::SMin: - return ConstantInt::get(Tp, - APInt::getSignedMaxValue(Tp->getIntegerBitWidth())); - case RecurKind::SMax: - return ConstantInt::get(Tp, - APInt::getSignedMinValue(Tp->getIntegerBitWidth())); - case RecurKind::FMin: - return ConstantFP::getInfinity(Tp, true); - case RecurKind::FMax: - return ConstantFP::getInfinity(Tp, false); + case RecurKind::UMin: + return ConstantInt::get(Tp, -1); + case RecurKind::UMax: + return ConstantInt::get(Tp, 0); + case RecurKind::SMin: + return ConstantInt::get(Tp, + APInt::getSignedMaxValue(Tp->getIntegerBitWidth())); + case RecurKind::SMax: + return ConstantInt::get(Tp, + APInt::getSignedMinValue(Tp->getIntegerBitWidth())); + case RecurKind::FMin: + return ConstantFP::getInfinity(Tp, true); + case RecurKind::FMax: + return ConstantFP::getInfinity(Tp, false); default: llvm_unreachable("Unknown recurrence kind"); } } -unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { +unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { switch (Kind) { - case RecurKind::Add: + case RecurKind::Add: return Instruction::Add; - case RecurKind::Mul: + case RecurKind::Mul: return Instruction::Mul; - case RecurKind::Or: + case RecurKind::Or: return Instruction::Or; - case RecurKind::And: + case RecurKind::And: return Instruction::And; - case RecurKind::Xor: + case RecurKind::Xor: return Instruction::Xor; - case RecurKind::FMul: + case RecurKind::FMul: return Instruction::FMul; - case RecurKind::FAdd: + case RecurKind::FAdd: return Instruction::FAdd; - case RecurKind::SMax: - case RecurKind::SMin: - case RecurKind::UMax: - case RecurKind::UMin: + case RecurKind::SMax: + case RecurKind::SMin: + case RecurKind::UMax: + case RecurKind::UMin: return Instruction::ICmp; - case RecurKind::FMax: - case RecurKind::FMin: + case RecurKind::FMax: + case RecurKind::FMin: return Instruction::FCmp; default: llvm_unreachable("Unknown recurrence operation"); } } -SmallVector<Instruction *, 4> -RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { - SmallVector<Instruction *, 4> ReductionOperations; - unsigned RedOp = getOpcode(Kind); - - // Search down from the Phi to the LoopExitInstr, looking for instructions - // with a single user of the correct type for the reduction. - - // Note that we check that the type of the operand is correct for each item in - // the chain, including the last (the loop exit value). This can come up from - // sub, which would otherwise be treated as an add reduction. MinMax also need - // to check for a pair of icmp/select, for which we use getNextInstruction and - // isCorrectOpcode functions to step the right number of instruction, and - // check the icmp/select pair. - // FIXME: We also do not attempt to look through Phi/Select's yet, which might - // be part of the reduction chain, or attempt to looks through And's to find a - // smaller bitwidth. Subs are also currently not allowed (which are usually - // treated as part of a add reduction) as they are expected to generally be - // more expensive than out-of-loop reductions, and need to be costed more - // carefully. - unsigned ExpectedUses = 1; - if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) - ExpectedUses = 2; - - auto getNextInstruction = [&](Instruction *Cur) { - if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { - // We are expecting a icmp/select pair, which we go to the next select - // instruction if we can. We already know that Cur has 2 uses. - if (isa<SelectInst>(*Cur->user_begin())) - return cast<Instruction>(*Cur->user_begin()); - else - return cast<Instruction>(*std::next(Cur->user_begin())); - } - return cast<Instruction>(*Cur->user_begin()); - }; - auto isCorrectOpcode = [&](Instruction *Cur) { - if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { - Value *LHS, *RHS; - return SelectPatternResult::isMinOrMax( - matchSelectPattern(Cur, LHS, RHS).Flavor); - } - return Cur->getOpcode() == RedOp; - }; - - // The loop exit instruction we check first (as a quick test) but add last. We - // check the opcode is correct (and dont allow them to be Subs) and that they - // have expected to have the expected number of uses. They will have one use - // from the phi and one from a LCSSA value, no matter the type. - if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2)) - return {}; - - // Check that the Phi has one (or two for min/max) uses. - if (!Phi->hasNUses(ExpectedUses)) - return {}; - Instruction *Cur = getNextInstruction(Phi); - - // Each other instruction in the chain should have the expected number of uses - // and be the correct opcode. - while (Cur != LoopExitInstr) { - if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) - return {}; - - ReductionOperations.push_back(Cur); - Cur = getNextInstruction(Cur); - } - - ReductionOperations.push_back(Cur); - return ReductionOperations; -} - +SmallVector<Instruction *, 4> +RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { + SmallVector<Instruction *, 4> ReductionOperations; + unsigned RedOp = getOpcode(Kind); + + // Search down from the Phi to the LoopExitInstr, looking for instructions + // with a single user of the correct type for the reduction. + + // Note that we check that the type of the operand is correct for each item in + // the chain, including the last (the loop exit value). This can come up from + // sub, which would otherwise be treated as an add reduction. MinMax also need + // to check for a pair of icmp/select, for which we use getNextInstruction and + // isCorrectOpcode functions to step the right number of instruction, and + // check the icmp/select pair. + // FIXME: We also do not attempt to look through Phi/Select's yet, which might + // be part of the reduction chain, or attempt to looks through And's to find a + // smaller bitwidth. Subs are also currently not allowed (which are usually + // treated as part of a add reduction) as they are expected to generally be + // more expensive than out-of-loop reductions, and need to be costed more + // carefully. + unsigned ExpectedUses = 1; + if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) + ExpectedUses = 2; + + auto getNextInstruction = [&](Instruction *Cur) { + if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { + // We are expecting a icmp/select pair, which we go to the next select + // instruction if we can. We already know that Cur has 2 uses. + if (isa<SelectInst>(*Cur->user_begin())) + return cast<Instruction>(*Cur->user_begin()); + else + return cast<Instruction>(*std::next(Cur->user_begin())); + } + return cast<Instruction>(*Cur->user_begin()); + }; + auto isCorrectOpcode = [&](Instruction *Cur) { + if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { + Value *LHS, *RHS; + return SelectPatternResult::isMinOrMax( + matchSelectPattern(Cur, LHS, RHS).Flavor); + } + return Cur->getOpcode() == RedOp; + }; + + // The loop exit instruction we check first (as a quick test) but add last. We + // check the opcode is correct (and dont allow them to be Subs) and that they + // have expected to have the expected number of uses. They will have one use + // from the phi and one from a LCSSA value, no matter the type. + if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2)) + return {}; + + // Check that the Phi has one (or two for min/max) uses. + if (!Phi->hasNUses(ExpectedUses)) + return {}; + Instruction *Cur = getNextInstruction(Phi); + + // Each other instruction in the chain should have the expected number of uses + // and be the correct opcode. + while (Cur != LoopExitInstr) { + if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) + return {}; + + ReductionOperations.push_back(Cur); + Cur = getNextInstruction(Cur); + } + + ReductionOperations.push_back(Cur); + return ReductionOperations; +} + InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, BinaryOperator *BOp, SmallVectorImpl<Instruction *> *Casts) diff --git a/contrib/libs/llvm12/lib/Analysis/ImportedFunctionsInliningStatistics.cpp b/contrib/libs/llvm12/lib/Analysis/ImportedFunctionsInliningStatistics.cpp index a7b5fda237..d057fe4ba9 100644 --- a/contrib/libs/llvm12/lib/Analysis/ImportedFunctionsInliningStatistics.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ImportedFunctionsInliningStatistics.cpp @@ -1,212 +1,212 @@ -//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Generating inliner statistics for imported functions, mostly useful for -// ThinLTO. -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <iomanip> -#include <sstream> -using namespace llvm; - -cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( - "inliner-function-import-stats", - cl::init(InlinerFunctionImportStatsOpts::No), - cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", - "basic statistics"), - clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", - "printing of statistics for each inlined function")), - cl::Hidden, cl::desc("Enable inliner stats for imported functions")); - -ImportedFunctionsInliningStatistics::InlineGraphNode & -ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { - - auto &ValueLookup = NodesMap[F.getName()]; - if (!ValueLookup) { - ValueLookup = std::make_unique<InlineGraphNode>(); - ValueLookup->Imported = F.hasMetadata("thinlto_src_module"); - } - return *ValueLookup; -} - -void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller, - const Function &Callee) { - - InlineGraphNode &CallerNode = createInlineGraphNode(Caller); - InlineGraphNode &CalleeNode = createInlineGraphNode(Callee); - CalleeNode.NumberOfInlines++; - - if (!CallerNode.Imported && !CalleeNode.Imported) { - // Direct inline from not imported callee to not imported caller, so we - // don't have to add this to graph. It might be very helpful if you wanna - // get the inliner statistics in compile step where there are no imported - // functions. In this case the graph would be empty. - CalleeNode.NumberOfRealInlines++; - return; - } - - CallerNode.InlinedCallees.push_back(&CalleeNode); - if (!CallerNode.Imported) { - // We could avoid second lookup, but it would make the code ultra ugly. - auto It = NodesMap.find(Caller.getName()); - assert(It != NodesMap.end() && "The node should be already there."); - // Save Caller as a starting node for traversal. The string has to be one - // from map because Caller can disappear (and function name with it). - NonImportedCallers.push_back(It->first()); - } -} - -void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { - ModuleName = M.getName(); - for (const auto &F : M.functions()) { - if (F.isDeclaration()) - continue; - AllFunctions++; - ImportedFunctions += int(F.hasMetadata("thinlto_src_module")); - } -} -static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, - const char *PercentageOfMsg, - bool LineEnd = true) { - double Result = 0; - if (All != 0) - Result = 100 * static_cast<double>(Fraction) / All; - - std::stringstream Str; - Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result - << "% of " << PercentageOfMsg << "]"; - if (LineEnd) - Str << "\n"; - return Str.str(); -} - -void ImportedFunctionsInliningStatistics::dump(const bool Verbose) { - calculateRealInlines(); - NonImportedCallers.clear(); - - int32_t InlinedImportedFunctionsCount = 0; - int32_t InlinedNotImportedFunctionsCount = 0; - - int32_t InlinedImportedFunctionsToImportingModuleCount = 0; - int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0; - - const auto SortedNodes = getSortedNodes(); - std::string Out; - Out.reserve(5000); - raw_string_ostream Ostream(Out); - - Ostream << "------- Dumping inliner stats for [" << ModuleName - << "] -------\n"; - - if (Verbose) - Ostream << "-- List of inlined functions:\n"; - - for (const auto &Node : SortedNodes) { - assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines); - if (Node->second->NumberOfInlines == 0) - continue; - - if (Node->second->Imported) { - InlinedImportedFunctionsCount++; - InlinedImportedFunctionsToImportingModuleCount += - int(Node->second->NumberOfRealInlines > 0); - } else { - InlinedNotImportedFunctionsCount++; - InlinedNotImportedFunctionsToImportingModuleCount += - int(Node->second->NumberOfRealInlines > 0); - } - - if (Verbose) - Ostream << "Inlined " - << (Node->second->Imported ? "imported " : "not imported ") - << "function [" << Node->first() << "]" - << ": #inlines = " << Node->second->NumberOfInlines - << ", #inlines_to_importing_module = " - << Node->second->NumberOfRealInlines << "\n"; - } - - auto InlinedFunctionsCount = - InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount; - auto NotImportedFuncCount = AllFunctions - ImportedFunctions; - auto ImportedNotInlinedIntoModule = - ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount; - - Ostream << "-- Summary:\n" - << "All functions: " << AllFunctions - << ", imported functions: " << ImportedFunctions << "\n" - << getStatString("inlined functions", InlinedFunctionsCount, - AllFunctions, "all functions") - << getStatString("imported functions inlined anywhere", - InlinedImportedFunctionsCount, ImportedFunctions, - "imported functions") - << getStatString("imported functions inlined into importing module", - InlinedImportedFunctionsToImportingModuleCount, - ImportedFunctions, "imported functions", - /*LineEnd=*/false) - << getStatString(", remaining", ImportedNotInlinedIntoModule, - ImportedFunctions, "imported functions") - << getStatString("non-imported functions inlined anywhere", - InlinedNotImportedFunctionsCount, - NotImportedFuncCount, "non-imported functions") - << getStatString( - "non-imported functions inlined into importing module", - InlinedNotImportedFunctionsToImportingModuleCount, - NotImportedFuncCount, "non-imported functions"); - Ostream.flush(); - dbgs() << Out; -} - -void ImportedFunctionsInliningStatistics::calculateRealInlines() { - // Removing duplicated Callers. - llvm::sort(NonImportedCallers); - NonImportedCallers.erase( - std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), - NonImportedCallers.end()); - - for (const auto &Name : NonImportedCallers) { - auto &Node = *NodesMap[Name]; - if (!Node.Visited) - dfs(Node); - } -} - -void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) { - assert(!GraphNode.Visited); - GraphNode.Visited = true; - for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) { - InlinedFunctionNode->NumberOfRealInlines++; - if (!InlinedFunctionNode->Visited) - dfs(*InlinedFunctionNode); - } -} - -ImportedFunctionsInliningStatistics::SortedNodesTy -ImportedFunctionsInliningStatistics::getSortedNodes() { - SortedNodesTy SortedNodes; - SortedNodes.reserve(NodesMap.size()); - for (const NodesMapTy::value_type &Node : NodesMap) - SortedNodes.push_back(&Node); - - llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs, - const SortedNodesTy::value_type &Rhs) { - if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines) - return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines; - if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines) - return Lhs->second->NumberOfRealInlines > - Rhs->second->NumberOfRealInlines; - return Lhs->first() < Rhs->first(); - }); - return SortedNodes; -} +//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Generating inliner statistics for imported functions, mostly useful for +// ThinLTO. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <iomanip> +#include <sstream> +using namespace llvm; + +cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( + "inliner-function-import-stats", + cl::init(InlinerFunctionImportStatsOpts::No), + cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", + "basic statistics"), + clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", + "printing of statistics for each inlined function")), + cl::Hidden, cl::desc("Enable inliner stats for imported functions")); + +ImportedFunctionsInliningStatistics::InlineGraphNode & +ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { + + auto &ValueLookup = NodesMap[F.getName()]; + if (!ValueLookup) { + ValueLookup = std::make_unique<InlineGraphNode>(); + ValueLookup->Imported = F.hasMetadata("thinlto_src_module"); + } + return *ValueLookup; +} + +void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller, + const Function &Callee) { + + InlineGraphNode &CallerNode = createInlineGraphNode(Caller); + InlineGraphNode &CalleeNode = createInlineGraphNode(Callee); + CalleeNode.NumberOfInlines++; + + if (!CallerNode.Imported && !CalleeNode.Imported) { + // Direct inline from not imported callee to not imported caller, so we + // don't have to add this to graph. It might be very helpful if you wanna + // get the inliner statistics in compile step where there are no imported + // functions. In this case the graph would be empty. + CalleeNode.NumberOfRealInlines++; + return; + } + + CallerNode.InlinedCallees.push_back(&CalleeNode); + if (!CallerNode.Imported) { + // We could avoid second lookup, but it would make the code ultra ugly. + auto It = NodesMap.find(Caller.getName()); + assert(It != NodesMap.end() && "The node should be already there."); + // Save Caller as a starting node for traversal. The string has to be one + // from map because Caller can disappear (and function name with it). + NonImportedCallers.push_back(It->first()); + } +} + +void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { + ModuleName = M.getName(); + for (const auto &F : M.functions()) { + if (F.isDeclaration()) + continue; + AllFunctions++; + ImportedFunctions += int(F.hasMetadata("thinlto_src_module")); + } +} +static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, + const char *PercentageOfMsg, + bool LineEnd = true) { + double Result = 0; + if (All != 0) + Result = 100 * static_cast<double>(Fraction) / All; + + std::stringstream Str; + Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result + << "% of " << PercentageOfMsg << "]"; + if (LineEnd) + Str << "\n"; + return Str.str(); +} + +void ImportedFunctionsInliningStatistics::dump(const bool Verbose) { + calculateRealInlines(); + NonImportedCallers.clear(); + + int32_t InlinedImportedFunctionsCount = 0; + int32_t InlinedNotImportedFunctionsCount = 0; + + int32_t InlinedImportedFunctionsToImportingModuleCount = 0; + int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0; + + const auto SortedNodes = getSortedNodes(); + std::string Out; + Out.reserve(5000); + raw_string_ostream Ostream(Out); + + Ostream << "------- Dumping inliner stats for [" << ModuleName + << "] -------\n"; + + if (Verbose) + Ostream << "-- List of inlined functions:\n"; + + for (const auto &Node : SortedNodes) { + assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines); + if (Node->second->NumberOfInlines == 0) + continue; + + if (Node->second->Imported) { + InlinedImportedFunctionsCount++; + InlinedImportedFunctionsToImportingModuleCount += + int(Node->second->NumberOfRealInlines > 0); + } else { + InlinedNotImportedFunctionsCount++; + InlinedNotImportedFunctionsToImportingModuleCount += + int(Node->second->NumberOfRealInlines > 0); + } + + if (Verbose) + Ostream << "Inlined " + << (Node->second->Imported ? "imported " : "not imported ") + << "function [" << Node->first() << "]" + << ": #inlines = " << Node->second->NumberOfInlines + << ", #inlines_to_importing_module = " + << Node->second->NumberOfRealInlines << "\n"; + } + + auto InlinedFunctionsCount = + InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount; + auto NotImportedFuncCount = AllFunctions - ImportedFunctions; + auto ImportedNotInlinedIntoModule = + ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount; + + Ostream << "-- Summary:\n" + << "All functions: " << AllFunctions + << ", imported functions: " << ImportedFunctions << "\n" + << getStatString("inlined functions", InlinedFunctionsCount, + AllFunctions, "all functions") + << getStatString("imported functions inlined anywhere", + InlinedImportedFunctionsCount, ImportedFunctions, + "imported functions") + << getStatString("imported functions inlined into importing module", + InlinedImportedFunctionsToImportingModuleCount, + ImportedFunctions, "imported functions", + /*LineEnd=*/false) + << getStatString(", remaining", ImportedNotInlinedIntoModule, + ImportedFunctions, "imported functions") + << getStatString("non-imported functions inlined anywhere", + InlinedNotImportedFunctionsCount, + NotImportedFuncCount, "non-imported functions") + << getStatString( + "non-imported functions inlined into importing module", + InlinedNotImportedFunctionsToImportingModuleCount, + NotImportedFuncCount, "non-imported functions"); + Ostream.flush(); + dbgs() << Out; +} + +void ImportedFunctionsInliningStatistics::calculateRealInlines() { + // Removing duplicated Callers. + llvm::sort(NonImportedCallers); + NonImportedCallers.erase( + std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), + NonImportedCallers.end()); + + for (const auto &Name : NonImportedCallers) { + auto &Node = *NodesMap[Name]; + if (!Node.Visited) + dfs(Node); + } +} + +void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) { + assert(!GraphNode.Visited); + GraphNode.Visited = true; + for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) { + InlinedFunctionNode->NumberOfRealInlines++; + if (!InlinedFunctionNode->Visited) + dfs(*InlinedFunctionNode); + } +} + +ImportedFunctionsInliningStatistics::SortedNodesTy +ImportedFunctionsInliningStatistics::getSortedNodes() { + SortedNodesTy SortedNodes; + SortedNodes.reserve(NodesMap.size()); + for (const NodesMapTy::value_type &Node : NodesMap) + SortedNodes.push_back(&Node); + + llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs, + const SortedNodesTy::value_type &Rhs) { + if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines) + return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines; + if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines) + return Lhs->second->NumberOfRealInlines > + Rhs->second->NumberOfRealInlines; + return Lhs->first() < Rhs->first(); + }); + return SortedNodes; +} diff --git a/contrib/libs/llvm12/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/IndirectCallPromotionAnalysis.cpp index b112ed2e44..db764559c5 100644 --- a/contrib/libs/llvm12/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -22,7 +22,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include <memory> +#include <memory> using namespace llvm; diff --git a/contrib/libs/llvm12/lib/Analysis/InlineAdvisor.cpp b/contrib/libs/llvm12/lib/Analysis/InlineAdvisor.cpp index 9a2276a161..e1330d5809 100644 --- a/contrib/libs/llvm12/lib/Analysis/InlineAdvisor.cpp +++ b/contrib/libs/llvm12/lib/Analysis/InlineAdvisor.cpp @@ -16,7 +16,7 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/ReplayInlineAdvisor.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -49,33 +49,33 @@ static cl::opt<int> cl::desc("Scale to limit the cost of inline deferral"), cl::init(2), cl::Hidden); -extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats; - -void DefaultInlineAdvice::recordUnsuccessfulInliningImpl( - const InlineResult &Result) { - using namespace ore; - llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) + - "; " + inlineCostStr(*OIC)); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) - << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", Caller) << ": " - << NV("Reason", Result.getFailureReason()); - }); -} - -void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() { - if (EmitRemarks) +extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats; + +void DefaultInlineAdvice::recordUnsuccessfulInliningImpl( + const InlineResult &Result) { + using namespace ore; + llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) + + "; " + inlineCostStr(*OIC)); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller) << ": " + << NV("Reason", Result.getFailureReason()); + }); +} + +void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() { + if (EmitRemarks) emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); -} +} -void DefaultInlineAdvice::recordInliningImpl() { - if (EmitRemarks) +void DefaultInlineAdvice::recordInliningImpl() { + if (EmitRemarks) emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); -} +} -llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice( - CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params) { +llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice( + CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params) { Function &Caller = *CB.getCaller(); ProfileSummaryInfo *PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(Caller) @@ -103,11 +103,11 @@ llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice( GetBFI, PSI, RemarksEnabled ? &ORE : nullptr); }; return llvm::shouldInline(CB, GetInlineCost, ORE, - Params.EnableDeferral.getValueOr(false)); + Params.EnableDeferral.getValueOr(false)); } -std::unique_ptr<InlineAdvice> -DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) { +std::unique_ptr<InlineAdvice> +DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) { auto OIC = getDefaultInlineAdvice(CB, FAM, Params); return std::make_unique<DefaultInlineAdvice>( this, CB, OIC, @@ -133,20 +133,20 @@ void InlineAdvisor::freeDeletedFunctions() { DeletedFunctions.clear(); } -void InlineAdvice::recordInlineStatsIfNeeded() { - if (Advisor->ImportedFunctionsStats) - Advisor->ImportedFunctionsStats->recordInline(*Caller, *Callee); -} - -void InlineAdvice::recordInlining() { - markRecorded(); - recordInlineStatsIfNeeded(); - recordInliningImpl(); -} - +void InlineAdvice::recordInlineStatsIfNeeded() { + if (Advisor->ImportedFunctionsStats) + Advisor->ImportedFunctionsStats->recordInline(*Caller, *Callee); +} + +void InlineAdvice::recordInlining() { + markRecorded(); + recordInlineStatsIfNeeded(); + recordInliningImpl(); +} + void InlineAdvice::recordInliningWithCalleeDeleted() { markRecorded(); - recordInlineStatsIfNeeded(); + recordInlineStatsIfNeeded(); Advisor->markFunctionAsDeleted(Callee); recordInliningWithCalleeDeletedImpl(); } @@ -154,28 +154,28 @@ void InlineAdvice::recordInliningWithCalleeDeleted() { AnalysisKey InlineAdvisorAnalysis::Key; bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params, - InliningAdvisorMode Mode, - StringRef ReplayFile) { + InliningAdvisorMode Mode, + StringRef ReplayFile) { auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); switch (Mode) { case InliningAdvisorMode::Default: - Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params)); - // Restrict replay to default advisor, ML advisors are stateful so - // replay will need augmentations to interleave with them correctly. - if (!ReplayFile.empty()) { - Advisor = std::make_unique<ReplayInlineAdvisor>( - M, FAM, M.getContext(), std::move(Advisor), ReplayFile, - /* EmitRemarks =*/true); - } + Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params)); + // Restrict replay to default advisor, ML advisors are stateful so + // replay will need augmentations to interleave with them correctly. + if (!ReplayFile.empty()) { + Advisor = std::make_unique<ReplayInlineAdvisor>( + M, FAM, M.getContext(), std::move(Advisor), ReplayFile, + /* EmitRemarks =*/true); + } break; case InliningAdvisorMode::Development: -#ifdef LLVM_HAVE_TF_API - Advisor = - llvm::getDevelopmentModeAdvisor(M, MAM, [&FAM, Params](CallBase &CB) { - auto OIC = getDefaultInlineAdvice(CB, FAM, Params); - return OIC.hasValue(); - }); -#endif +#ifdef LLVM_HAVE_TF_API + Advisor = + llvm::getDevelopmentModeAdvisor(M, MAM, [&FAM, Params](CallBase &CB) { + auto OIC = getDefaultInlineAdvice(CB, FAM, Params); + return OIC.hasValue(); + }); +#endif break; case InliningAdvisorMode::Release: #ifdef LLVM_HAVE_TF_AOT @@ -183,7 +183,7 @@ bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params, #endif break; } - + return !!Advisor; } @@ -382,35 +382,35 @@ llvm::shouldInline(CallBase &CB, return IC; } -std::string llvm::getCallSiteLocation(DebugLoc DLoc) { - std::ostringstream CallSiteLoc; - bool First = true; - for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) { - if (!First) - CallSiteLoc << " @ "; - // Note that negative line offset is actually possible, but we use - // unsigned int to match line offset representation in remarks so - // it's directly consumable by relay advisor. - uint32_t Offset = - DIL->getLine() - DIL->getScope()->getSubprogram()->getLine(); - uint32_t Discriminator = DIL->getBaseDiscriminator(); - StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); - if (Name.empty()) - Name = DIL->getScope()->getSubprogram()->getName(); - CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset) << ":" - << llvm::utostr(DIL->getColumn()); - if (Discriminator) - CallSiteLoc << "." << llvm::utostr(Discriminator); - First = false; - } - - return CallSiteLoc.str(); -} - +std::string llvm::getCallSiteLocation(DebugLoc DLoc) { + std::ostringstream CallSiteLoc; + bool First = true; + for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) { + if (!First) + CallSiteLoc << " @ "; + // Note that negative line offset is actually possible, but we use + // unsigned int to match line offset representation in remarks so + // it's directly consumable by relay advisor. + uint32_t Offset = + DIL->getLine() - DIL->getScope()->getSubprogram()->getLine(); + uint32_t Discriminator = DIL->getBaseDiscriminator(); + StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = DIL->getScope()->getSubprogram()->getName(); + CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset) << ":" + << llvm::utostr(DIL->getColumn()); + if (Discriminator) + CallSiteLoc << "." << llvm::utostr(Discriminator); + First = false; + } + + return CallSiteLoc.str(); +} + void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) { - if (!DLoc.get()) { + if (!DLoc.get()) { return; - } + } bool First = true; Remark << " at callsite "; @@ -423,14 +423,14 @@ void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) { StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); if (Name.empty()) Name = DIL->getScope()->getSubprogram()->getName(); - Remark << Name << ":" << ore::NV("Line", Offset) << ":" - << ore::NV("Column", DIL->getColumn()); + Remark << Name << ":" << ore::NV("Line", Offset) << ":" + << ore::NV("Column", DIL->getColumn()); if (Discriminator) Remark << "." << ore::NV("Disc", Discriminator); First = false; } - - Remark << ";"; + + Remark << ";"; } void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, @@ -451,64 +451,64 @@ void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, return Remark; }); } - -InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM) - : M(M), FAM(FAM) { - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { - ImportedFunctionsStats = - std::make_unique<ImportedFunctionsInliningStatistics>(); - ImportedFunctionsStats->setModuleInfo(M); - } -} - -InlineAdvisor::~InlineAdvisor() { - if (ImportedFunctionsStats) { - assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No); - ImportedFunctionsStats->dump(InlinerFunctionImportStats == - InlinerFunctionImportStatsOpts::Verbose); - } - - freeDeletedFunctions(); -} - -std::unique_ptr<InlineAdvice> InlineAdvisor::getMandatoryAdvice(CallBase &CB, - bool Advice) { - return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); -} - -InlineAdvisor::MandatoryInliningKind -InlineAdvisor::getMandatoryKind(CallBase &CB, FunctionAnalysisManager &FAM, - OptimizationRemarkEmitter &ORE) { - auto &Callee = *CB.getCalledFunction(); - - auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { - return FAM.getResult<TargetLibraryAnalysis>(F); - }; - - auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); - - auto TrivialDecision = - llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI); - - if (TrivialDecision.hasValue()) { - if (TrivialDecision->isSuccess()) - return MandatoryInliningKind::Always; - else - return MandatoryInliningKind::Never; - } - return MandatoryInliningKind::NotMandatory; -} - -std::unique_ptr<InlineAdvice> InlineAdvisor::getAdvice(CallBase &CB, - bool MandatoryOnly) { - if (!MandatoryOnly) - return getAdviceImpl(CB); - bool Advice = CB.getCaller() != CB.getCalledFunction() && - MandatoryInliningKind::Always == - getMandatoryKind(CB, FAM, getCallerORE(CB)); - return getMandatoryAdvice(CB, Advice); -} - -OptimizationRemarkEmitter &InlineAdvisor::getCallerORE(CallBase &CB) { - return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller()); -} + +InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM) + : M(M), FAM(FAM) { + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { + ImportedFunctionsStats = + std::make_unique<ImportedFunctionsInliningStatistics>(); + ImportedFunctionsStats->setModuleInfo(M); + } +} + +InlineAdvisor::~InlineAdvisor() { + if (ImportedFunctionsStats) { + assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No); + ImportedFunctionsStats->dump(InlinerFunctionImportStats == + InlinerFunctionImportStatsOpts::Verbose); + } + + freeDeletedFunctions(); +} + +std::unique_ptr<InlineAdvice> InlineAdvisor::getMandatoryAdvice(CallBase &CB, + bool Advice) { + return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); +} + +InlineAdvisor::MandatoryInliningKind +InlineAdvisor::getMandatoryKind(CallBase &CB, FunctionAnalysisManager &FAM, + OptimizationRemarkEmitter &ORE) { + auto &Callee = *CB.getCalledFunction(); + + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + + auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); + + auto TrivialDecision = + llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI); + + if (TrivialDecision.hasValue()) { + if (TrivialDecision->isSuccess()) + return MandatoryInliningKind::Always; + else + return MandatoryInliningKind::Never; + } + return MandatoryInliningKind::NotMandatory; +} + +std::unique_ptr<InlineAdvice> InlineAdvisor::getAdvice(CallBase &CB, + bool MandatoryOnly) { + if (!MandatoryOnly) + return getAdviceImpl(CB); + bool Advice = CB.getCaller() != CB.getCalledFunction() && + MandatoryInliningKind::Always == + getMandatoryKind(CB, FAM, getCallerORE(CB)); + return getMandatoryAdvice(CB, Advice); +} + +OptimizationRemarkEmitter &InlineAdvisor::getCallerORE(CallBase &CB) { + return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller()); +} diff --git a/contrib/libs/llvm12/lib/Analysis/InlineCost.cpp b/contrib/libs/llvm12/lib/Analysis/InlineCost.cpp index a35f5e11f0..d937665464 100644 --- a/contrib/libs/llvm12/lib/Analysis/InlineCost.cpp +++ b/contrib/libs/llvm12/lib/Analysis/InlineCost.cpp @@ -71,20 +71,20 @@ static cl::opt<int> cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites")); -static cl::opt<bool> InlineEnableCostBenefitAnalysis( - "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), - cl::desc("Enable the cost-benefit analysis for the inliner")); - -static cl::opt<int> InlineSavingsMultiplier( - "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, - cl::desc("Multiplier to multiply cycle savings by during inlining")); - -static cl::opt<int> - InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), - cl::ZeroOrMore, - cl::desc("The maximum size of a callee that get's " - "inlined without sufficient cycle savings")); - +static cl::opt<bool> InlineEnableCostBenefitAnalysis( + "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), + cl::desc("Enable the cost-benefit analysis for the inliner")); + +static cl::opt<int> InlineSavingsMultiplier( + "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, + cl::desc("Multiplier to multiply cycle savings by during inlining")); + +static cl::opt<int> + InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), + cl::ZeroOrMore, + cl::desc("The maximum size of a callee that get's " + "inlined without sufficient cycle savings")); + // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. @@ -197,9 +197,9 @@ protected: CallBase &CandidateCall; /// Extension points for handling callsite features. - // Called before a basic block was analyzed. - virtual void onBlockStart(const BasicBlock *BB) {} - + // Called before a basic block was analyzed. + virtual void onBlockStart(const BasicBlock *BB) {} + /// Called after a basic block was analyzed. virtual void onBlockAnalyzed(const BasicBlock *BB) {} @@ -471,24 +471,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { /// Ignore the threshold when finalizing analysis. const bool IgnoreThreshold; - // True if the cost-benefit-analysis-based inliner is enabled. - const bool CostBenefitAnalysisEnabled; - + // True if the cost-benefit-analysis-based inliner is enabled. + const bool CostBenefitAnalysisEnabled; + /// Inlining cost measured in abstract units, accounts for all the /// instructions expected to be executed for a given function invocation. /// Instructions that are statically proven to be dead based on call-site /// arguments are not counted here. int Cost = 0; - // The cumulative cost at the beginning of the basic block being analyzed. At - // the end of analyzing each basic block, "Cost - CostAtBBStart" represents - // the size of that basic block. - int CostAtBBStart = 0; - - // The static size of live but cold basic blocks. This is "static" in the - // sense that it's not weighted by profile counts at all. - int ColdSize = 0; - + // The cumulative cost at the beginning of the basic block being analyzed. At + // the end of analyzing each basic block, "Cost - CostAtBBStart" represents + // the size of that basic block. + int CostAtBBStart = 0; + + // The static size of live but cold basic blocks. This is "static" in the + // sense that it's not weighted by profile counts at all. + int ColdSize = 0; + bool SingleBB = true; unsigned SROACostSavings = 0; @@ -626,21 +626,21 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { SROACostSavings += InlineConstants::InstrCost; } - void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; } - + void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; } + void onBlockAnalyzed(const BasicBlock *BB) override { - if (CostBenefitAnalysisEnabled) { - // Keep track of the static size of live but cold basic blocks. For now, - // we define a cold basic block to be one that's never executed. - assert(GetBFI && "GetBFI must be available"); - BlockFrequencyInfo *BFI = &(GetBFI(F)); - assert(BFI && "BFI must be available"); - auto ProfileCount = BFI->getBlockProfileCount(BB); - assert(ProfileCount.hasValue()); - if (ProfileCount.getValue() == 0) - ColdSize += Cost - CostAtBBStart; - } - + if (CostBenefitAnalysisEnabled) { + // Keep track of the static size of live but cold basic blocks. For now, + // we define a cold basic block to be one that's never executed. + assert(GetBFI && "GetBFI must be available"); + BlockFrequencyInfo *BFI = &(GetBFI(F)); + assert(BFI && "BFI must be available"); + auto ProfileCount = BFI->getBlockProfileCount(BB); + assert(ProfileCount.hasValue()); + if (ProfileCount.getValue() == 0) + ColdSize += Cost - CostAtBBStart; + } + auto *TI = BB->getTerminator(); // If we had any successors at this point, than post-inlining is likely to // have them as well. Note that we assume any basic blocks which existed @@ -671,131 +671,131 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { InstructionCostDetailMap[I].ThresholdAfter = Threshold; } - bool isCostBenefitAnalysisEnabled() { - if (!InlineEnableCostBenefitAnalysis) - return false; - - if (!PSI || !PSI->hasProfileSummary()) - return false; - - if (!GetBFI) - return false; - - auto *Caller = CandidateCall.getParent()->getParent(); - if (!Caller->getEntryCount()) - return false; - - BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller)); - if (!CallerBFI) - return false; - - // For now, limit to hot call site. - if (!PSI->isHotCallSite(CandidateCall, CallerBFI)) - return false; - - if (!F.getEntryCount()) - return false; - - BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); - if (!CalleeBFI) - return false; - - return true; - } - - // Determine whether we should inline the given call site, taking into account - // both the size cost and the cycle savings. Return None if we don't have - // suficient profiling information to determine. - Optional<bool> costBenefitAnalysis() { - if (!CostBenefitAnalysisEnabled) - return None; - - // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0 - // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by - // falling back to the cost-based metric. - // TODO: Improve this hacky condition. - if (Threshold == 0) - return None; - - assert(GetBFI); - BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); - assert(CalleeBFI); - - // The cycle savings expressed as the sum of InlineConstants::InstrCost - // multiplied by the estimated dynamic count of each instruction we can - // avoid. Savings come from the call site cost, such as argument setup and - // the call instruction, as well as the instructions that are folded. - // - // We use 128-bit APInt here to avoid potential overflow. This variable - // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case - // assumes that we can avoid or fold a billion instructions, each with a - // profile count of 10^^15 -- roughly the number of cycles for a 24-hour - // period on a 4GHz machine. - APInt CycleSavings(128, 0); - - for (auto &BB : F) { - APInt CurrentSavings(128, 0); - for (auto &I : BB) { - if (BranchInst *BI = dyn_cast<BranchInst>(&I)) { - // Count a conditional branch as savings if it becomes unconditional. - if (BI->isConditional() && - dyn_cast_or_null<ConstantInt>( - SimplifiedValues.lookup(BI->getCondition()))) { - CurrentSavings += InlineConstants::InstrCost; - } - } else if (Value *V = dyn_cast<Value>(&I)) { - // Count an instruction as savings if we can fold it. - if (SimplifiedValues.count(V)) { - CurrentSavings += InlineConstants::InstrCost; - } - } - // TODO: Consider other forms of savings like switch statements, - // indirect calls becoming direct, SROACostSavings, LoadEliminationCost, - // etc. - } - - auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB); - assert(ProfileCount.hasValue()); - CurrentSavings *= ProfileCount.getValue(); - CycleSavings += CurrentSavings; - } - - // Compute the cycle savings per call. - auto EntryProfileCount = F.getEntryCount(); - assert(EntryProfileCount.hasValue()); - auto EntryCount = EntryProfileCount.getCount(); - CycleSavings += EntryCount / 2; - CycleSavings = CycleSavings.udiv(EntryCount); - - // Compute the total savings for the call site. - auto *CallerBB = CandidateCall.getParent(); - BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent()))); - CycleSavings += getCallsiteCost(this->CandidateCall, DL); - CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue(); - - // Remove the cost of the cold basic blocks. - int Size = Cost - ColdSize; - - // Allow tiny callees to be inlined regardless of whether they meet the - // savings threshold. - Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1; - - // Return true if the savings justify the cost of inlining. Specifically, - // we evaluate the following inequality: - // - // CycleSavings PSI->getOrCompHotCountThreshold() - // -------------- >= ----------------------------------- - // Size InlineSavingsMultiplier - // - // Note that the left hand side is specific to a call site. The right hand - // side is a constant for the entire executable. - APInt LHS = CycleSavings; - LHS *= InlineSavingsMultiplier; - APInt RHS(128, PSI->getOrCompHotCountThreshold()); - RHS *= Size; - return LHS.uge(RHS); - } - + bool isCostBenefitAnalysisEnabled() { + if (!InlineEnableCostBenefitAnalysis) + return false; + + if (!PSI || !PSI->hasProfileSummary()) + return false; + + if (!GetBFI) + return false; + + auto *Caller = CandidateCall.getParent()->getParent(); + if (!Caller->getEntryCount()) + return false; + + BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller)); + if (!CallerBFI) + return false; + + // For now, limit to hot call site. + if (!PSI->isHotCallSite(CandidateCall, CallerBFI)) + return false; + + if (!F.getEntryCount()) + return false; + + BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); + if (!CalleeBFI) + return false; + + return true; + } + + // Determine whether we should inline the given call site, taking into account + // both the size cost and the cycle savings. Return None if we don't have + // suficient profiling information to determine. + Optional<bool> costBenefitAnalysis() { + if (!CostBenefitAnalysisEnabled) + return None; + + // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0 + // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by + // falling back to the cost-based metric. + // TODO: Improve this hacky condition. + if (Threshold == 0) + return None; + + assert(GetBFI); + BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); + assert(CalleeBFI); + + // The cycle savings expressed as the sum of InlineConstants::InstrCost + // multiplied by the estimated dynamic count of each instruction we can + // avoid. Savings come from the call site cost, such as argument setup and + // the call instruction, as well as the instructions that are folded. + // + // We use 128-bit APInt here to avoid potential overflow. This variable + // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case + // assumes that we can avoid or fold a billion instructions, each with a + // profile count of 10^^15 -- roughly the number of cycles for a 24-hour + // period on a 4GHz machine. + APInt CycleSavings(128, 0); + + for (auto &BB : F) { + APInt CurrentSavings(128, 0); + for (auto &I : BB) { + if (BranchInst *BI = dyn_cast<BranchInst>(&I)) { + // Count a conditional branch as savings if it becomes unconditional. + if (BI->isConditional() && + dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(BI->getCondition()))) { + CurrentSavings += InlineConstants::InstrCost; + } + } else if (Value *V = dyn_cast<Value>(&I)) { + // Count an instruction as savings if we can fold it. + if (SimplifiedValues.count(V)) { + CurrentSavings += InlineConstants::InstrCost; + } + } + // TODO: Consider other forms of savings like switch statements, + // indirect calls becoming direct, SROACostSavings, LoadEliminationCost, + // etc. + } + + auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB); + assert(ProfileCount.hasValue()); + CurrentSavings *= ProfileCount.getValue(); + CycleSavings += CurrentSavings; + } + + // Compute the cycle savings per call. + auto EntryProfileCount = F.getEntryCount(); + assert(EntryProfileCount.hasValue()); + auto EntryCount = EntryProfileCount.getCount(); + CycleSavings += EntryCount / 2; + CycleSavings = CycleSavings.udiv(EntryCount); + + // Compute the total savings for the call site. + auto *CallerBB = CandidateCall.getParent(); + BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent()))); + CycleSavings += getCallsiteCost(this->CandidateCall, DL); + CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue(); + + // Remove the cost of the cold basic blocks. + int Size = Cost - ColdSize; + + // Allow tiny callees to be inlined regardless of whether they meet the + // savings threshold. + Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1; + + // Return true if the savings justify the cost of inlining. Specifically, + // we evaluate the following inequality: + // + // CycleSavings PSI->getOrCompHotCountThreshold() + // -------------- >= ----------------------------------- + // Size InlineSavingsMultiplier + // + // Note that the left hand side is specific to a call site. The right hand + // side is a constant for the entire executable. + APInt LHS = CycleSavings; + LHS *= InlineSavingsMultiplier; + APInt RHS(128, PSI->getOrCompHotCountThreshold()); + RHS *= Size; + return LHS.uge(RHS); + } + InlineResult finalizeAnalysis() override { // Loops generally act a lot like calls in that they act like barriers to // movement, require a certain amount of setup, etc. So when optimising for @@ -824,13 +824,13 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= VectorBonus / 2; - if (auto Result = costBenefitAnalysis()) { - if (Result.getValue()) - return InlineResult::success(); - else - return InlineResult::failure("Cost over threshold."); - } - + if (auto Result = costBenefitAnalysis()) { + if (Result.getValue()) + return InlineResult::success(); + else + return InlineResult::failure("Cost over threshold."); + } + if (IgnoreThreshold || Cost < std::max(1, Threshold)) return InlineResult::success(); return InlineResult::failure("Cost over threshold."); @@ -901,11 +901,11 @@ public: bool IgnoreThreshold = false) : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE), ComputeFullInlineCost(OptComputeFullInlineCost || - Params.ComputeFullInlineCost || ORE || - isCostBenefitAnalysisEnabled()), + Params.ComputeFullInlineCost || ORE || + isCostBenefitAnalysisEnabled()), Params(Params), Threshold(Params.DefaultThreshold), BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold), - CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()), + CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()), Writer(this) {} /// Annotation Writer for instruction details @@ -1018,11 +1018,11 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { SmallVector<Value *, 4> Operands; Operands.push_back(GEP.getOperand(0)); - for (const Use &Op : GEP.indices()) - if (Constant *SimpleOp = SimplifiedValues.lookup(Op)) + for (const Use &Op : GEP.indices()) + if (Constant *SimpleOp = SimplifiedValues.lookup(Op)) Operands.push_back(SimpleOp); else - Operands.push_back(Op); + Operands.push_back(Op); return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands, TargetTransformInfo::TCK_SizeAndLatency); @@ -1044,7 +1044,7 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // is needed to track stack usage during inlining. Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( - AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getKnownMinSize(), + AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) { HasDynamicAlloca = true; @@ -1058,7 +1058,7 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { if (I.isStaticAlloca()) { Type *Ty = I.getAllocatedType(); AllocatedSize = - SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); + SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); } // We will happily inline static alloca instructions. @@ -1194,8 +1194,8 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Lambda to check whether a GEP's indices are all constant. auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { - for (const Use &Op : GEP.indices()) - if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op)) + for (const Use &Op : GEP.indices()) + if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op)) return false; return true; }; @@ -1278,7 +1278,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); - if (IntegerSize == DL.getPointerSizeInBits(AS)) { + if (IntegerSize == DL.getPointerSizeInBits(AS)) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -1580,7 +1580,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // Finally, take the target-specific inlining threshold multiplier into // account. Threshold *= TTI.getInliningThresholdMultiplier(); - Threshold += TTI.adjustInliningThreshold(&Call); + Threshold += TTI.adjustInliningThreshold(&Call); SingleBBBonus = Threshold * SingleBBBonusPercent / 100; VectorBonus = Threshold * VectorBonusPercent / 100; @@ -2062,8 +2062,8 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { // We found something we don't understand or can't handle. Mark any SROA-able // values in the operand list as no longer viable. - for (const Use &Op : I.operands()) - disableSROA(Op); + for (const Use &Op : I.operands()) + disableSROA(Op); return false; } @@ -2078,7 +2078,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { InlineResult CallAnalyzer::analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues) { - for (Instruction &I : *BB) { + for (Instruction &I : *BB) { // FIXME: Currently, the number of instructions in a function regardless of // our ability to simplify them during inline to constants or dead code, // are actually used by the vector bonus heuristic. As long as that's true, @@ -2089,16 +2089,16 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, if (isa<DbgInfoIntrinsic>(I)) continue; - // Skip pseudo-probes. - if (isa<PseudoProbeInst>(I)) - continue; - + // Skip pseudo-probes. + if (isa<PseudoProbeInst>(I)) + continue; + // Skip ephemeral values. - if (EphValues.count(&I)) + if (EphValues.count(&I)) continue; ++NumInstructions; - if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy()) + if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy()) ++NumVectorInstructions; // If the instruction simplified to a constant, there is no cost to this @@ -2106,14 +2106,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, // all of the per-instruction logic. The visit tree returns true if we // consumed the instruction in any way, and false if the instruction's base // cost should count against inlining. - onInstructionAnalysisStart(&I); + onInstructionAnalysisStart(&I); - if (Base::visit(&I)) + if (Base::visit(&I)) ++NumInstructionsSimplified; else onMissedSimplification(); - onInstructionAnalysisFinish(&I); + onInstructionAnalysisFinish(&I); using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. InlineResult IR = InlineResult::success(); @@ -2274,23 +2274,23 @@ InlineResult CallAnalyzer::analyze() { // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. auto CAI = CandidateCall.arg_begin(); - for (Argument &FAI : F.args()) { + for (Argument &FAI : F.args()) { assert(CAI != CandidateCall.arg_end()); if (Constant *C = dyn_cast<Constant>(CAI)) - SimplifiedValues[&FAI] = C; + SimplifiedValues[&FAI] = C; Value *PtrArg = *CAI; if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { - ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue()); + ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) { - SROAArgValues[&FAI] = SROAArg; + SROAArgValues[&FAI] = SROAArg; onInitializeSROAArg(SROAArg); EnabledSROAAllocas.insert(SROAArg); } } - ++CAI; + ++CAI; } NumConstantArgs = SimplifiedValues.size(); NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); @@ -2324,8 +2324,8 @@ InlineResult CallAnalyzer::analyze() { if (BB->empty()) continue; - onBlockStart(BB); - + onBlockStart(BB); + // Disallow inlining a blockaddress with uses other than strictly callbr. // A blockaddress only has defined behavior for an indirect branch in the // same function, and we do not currently support inlining indirect @@ -2512,13 +2512,13 @@ Optional<InlineResult> llvm::getAttributeBasedInliningDecision( if (!Callee) return InlineResult::failure("indirect call"); - // When callee coroutine function is inlined into caller coroutine function - // before coro-split pass, - // coro-early pass can not handle this quiet well. - // So we won't inline the coroutine function if it have not been unsplited - if (Callee->isPresplitCoroutine()) - return InlineResult::failure("unsplited coroutine call"); - + // When callee coroutine function is inlined into caller coroutine function + // before coro-split pass, + // coro-early pass can not handle this quiet well. + // So we won't inline the coroutine function if it have not been unsplited + if (Callee->isPresplitCoroutine()) + return InlineResult::failure("unsplited coroutine call"); + // Never inline calls with byval arguments that does not have the alloca // address space. Since byval arguments can be replaced with a copy to an // alloca, the inlined code would need to be adjusted to handle that the @@ -2569,15 +2569,15 @@ Optional<InlineResult> llvm::getAttributeBasedInliningDecision( if (Call.isNoInline()) return InlineResult::failure("noinline call site attribute"); - // Don't inline functions if one does not have any stack protector attribute - // but the other does. - if (Caller->hasStackProtectorFnAttr() && !Callee->hasStackProtectorFnAttr()) - return InlineResult::failure( - "stack protected caller but callee requested no stack protector"); - if (Callee->hasStackProtectorFnAttr() && !Caller->hasStackProtectorFnAttr()) - return InlineResult::failure( - "stack protected callee but caller requested no stack protector"); - + // Don't inline functions if one does not have any stack protector attribute + // but the other does. + if (Caller->hasStackProtectorFnAttr() && !Callee->hasStackProtectorFnAttr()) + return InlineResult::failure( + "stack protected caller but callee requested no stack protector"); + if (Callee->hasStackProtectorFnAttr() && !Caller->hasStackProtectorFnAttr()) + return InlineResult::failure( + "stack protected callee but caller requested no stack protector"); + return None; } @@ -2619,26 +2619,26 @@ InlineCost llvm::getInlineCost( InlineResult llvm::isInlineViable(Function &F) { bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); - for (BasicBlock &BB : F) { + for (BasicBlock &BB : F) { // Disallow inlining of functions which contain indirect branches. - if (isa<IndirectBrInst>(BB.getTerminator())) + if (isa<IndirectBrInst>(BB.getTerminator())) return InlineResult::failure("contains indirect branches"); // Disallow inlining of blockaddresses which are used by non-callbr // instructions. - if (BB.hasAddressTaken()) - for (User *U : BlockAddress::get(&BB)->users()) + if (BB.hasAddressTaken()) + for (User *U : BlockAddress::get(&BB)->users()) if (!isa<CallBrInst>(*U)) return InlineResult::failure("blockaddress used outside of callbr"); - for (auto &II : BB) { + for (auto &II : BB) { CallBase *Call = dyn_cast<CallBase>(&II); if (!Call) continue; // Disallow recursive calls. - Function *Callee = Call->getCalledFunction(); - if (&F == Callee) + Function *Callee = Call->getCalledFunction(); + if (&F == Callee) return InlineResult::failure("recursive call"); // Disallow calls which expose returns-twice to a function not previously @@ -2647,8 +2647,8 @@ InlineResult llvm::isInlineViable(Function &F) { cast<CallInst>(Call)->canReturnTwice()) return InlineResult::failure("exposes returns-twice attribute"); - if (Callee) - switch (Callee->getIntrinsicID()) { + if (Callee) + switch (Callee->getIntrinsicID()) { default: break; case llvm::Intrinsic::icall_branch_funnel: @@ -2775,7 +2775,7 @@ InlineCostAnnotationPrinterPass::run(Function &F, // We can add a flag which determines InlineParams for this run. Right now, // the default InlineParams are used. const InlineParams Params = llvm::getInlineParams(); - for (BasicBlock &BB : F) { + for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (CallInst *CI = dyn_cast<CallInst>(&I)) { Function *CalledFunction = CI->getCalledFunction(); diff --git a/contrib/libs/llvm12/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/InlineSizeEstimatorAnalysis.cpp index 3c90e82fb9..185252749e 100644 --- a/contrib/libs/llvm12/lib/Analysis/InlineSizeEstimatorAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/InlineSizeEstimatorAnalysis.cpp @@ -88,32 +88,32 @@ public: // consecutive instructions (in the IR layout available at inline time) as // features improves the model performance. We want to move away from manual // feature selection. -// The array is given in opcode pairs rather than labels because 1) labels -// weren't readily available, and 2) the successions were hand - extracted. -// -// This array must be sorted. -static const std::array<std::pair<size_t, size_t>, 137> - ImportantInstructionSuccessions{ - {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, - {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, - {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, - {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, - {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, - {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, - {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, - {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, - {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, - {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, - {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, - {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, - {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, - {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, - {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, - {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, - {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, - {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, - {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, - {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; +// The array is given in opcode pairs rather than labels because 1) labels +// weren't readily available, and 2) the successions were hand - extracted. +// +// This array must be sorted. +static const std::array<std::pair<size_t, size_t>, 137> + ImportantInstructionSuccessions{ + {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, + {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, + {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, + {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, + {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, + {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, + {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, + {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, + {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, + {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, + {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, + {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, + {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, + {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, + {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, + {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, + {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, + {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, + {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, + {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; // We have: 9 calculated features (the features here); 1 feature for each // instruction opcode; and 1 feature for each manually-identified sequence. @@ -123,15 +123,15 @@ static const std::array<std::pair<size_t, size_t>, 137> // Note that instruction opcodes start from 1. For convenience, we also have an // always 0 feature for the '0' opcode, hence the extra 1. const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = - ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + - IRToNativeSizeLearning::NumNamedFeatures; + ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + + IRToNativeSizeLearning::NumNamedFeatures; size_t getSize(Function &F, TargetTransformInfo &TTI) { size_t Ret = 0; - for (const auto &BB : F) - for (const auto &I : BB) - Ret += *(TTI.getInstructionCost( - &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue()); + for (const auto &BB : F) + for (const auto &I : BB) + Ret += *(TTI.getInstructionCost( + &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue()); return Ret; } @@ -143,8 +143,8 @@ size_t getSize(Function &F, FunctionAnalysisManager &FAM) { unsigned getMaxDominatorTreeDepth(const Function &F, const DominatorTree &Tree) { unsigned Ret = 0; - for (const auto &BB : F) - if (const auto *TN = Tree.getNode(&BB)) + for (const auto &BB : F) + if (const auto *TN = Tree.getNode(&BB)) Ret = std::max(Ret, TN->getLevel()); return Ret; } @@ -153,37 +153,37 @@ unsigned getMaxDominatorTreeDepth(const Function &F, IRToNativeSizeLearning::FunctionFeatures IRToNativeSizeLearning::getFunctionFeatures(Function &F, FunctionAnalysisManager &FAM) { - assert(llvm::is_sorted(ImportantInstructionSuccessions) && - "expected function features are sorted"); + assert(llvm::is_sorted(ImportantInstructionSuccessions) && + "expected function features are sorted"); auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); FunctionFeatures FF; size_t InstrCount = getMaxInstructionID() + 1; FF.InstructionHistogram.resize(InstrCount); - FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); + FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); - int StartID = 0; - int LastID = StartID; + int StartID = 0; + int LastID = StartID; auto getPairIndex = [](size_t a, size_t b) { - auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); - if (I == ImportantInstructionSuccessions.end()) + auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); + if (I == ImportantInstructionSuccessions.end()) return -1; - return static_cast<int>( - std::distance(ImportantInstructionSuccessions.begin(), I)); + return static_cast<int>( + std::distance(ImportantInstructionSuccessions.begin(), I)); }; // We don't want debug calls, because they'd just add noise. - for (const auto &BB : F) { - for (const auto &I : BB.instructionsWithoutDebug()) { - auto ID = I.getOpcode(); + for (const auto &BB : F) { + for (const auto &I : BB.instructionsWithoutDebug()) { + auto ID = I.getOpcode(); ++FF.InstructionHistogram[ID]; int PairIndex = getPairIndex(LastID, ID); if (PairIndex >= 0) ++FF.InstructionPairHistogram[PairIndex]; LastID = ID; - if (isa<CallBase>(I)) + if (isa<CallBase>(I)) ++FF[NamedFeatureIndex::Calls]; } } @@ -221,14 +221,14 @@ InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { if (!isEvaluatorRequested()) { return; } - std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( - "serving_default_input_1", - {1, static_cast<int64_t>( - IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; - std::vector<TensorSpec> OutputSpecs{ - TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})}; + std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( + "serving_default_input_1", + {1, static_cast<int64_t>( + IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; + std::vector<TensorSpec> OutputSpecs{ + TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})}; Evaluator = std::make_unique<TFModelEvaluator>( - TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); + TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); if (!Evaluator || !Evaluator->isValid()) { Evaluator.reset(); return; @@ -272,12 +272,12 @@ InlineSizeEstimatorAnalysis::run(const Function &F, return None; } bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } -#endif - -PreservedAnalyses -InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() - << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n"; - return PreservedAnalyses::all(); -} +#endif + +PreservedAnalyses +InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() + << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n"; + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/InstCount.cpp b/contrib/libs/llvm12/lib/Analysis/InstCount.cpp index 8366bee083..f08bc82c82 100644 --- a/contrib/libs/llvm12/lib/Analysis/InstCount.cpp +++ b/contrib/libs/llvm12/lib/Analysis/InstCount.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/InstCount.h" +#include "llvm/Analysis/InstCount.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" @@ -24,71 +24,71 @@ using namespace llvm; #define DEBUG_TYPE "instcount" -STATISTIC(TotalInsts, "Number of instructions (of all types)"); +STATISTIC(TotalInsts, "Number of instructions (of all types)"); STATISTIC(TotalBlocks, "Number of basic blocks"); -STATISTIC(TotalFuncs, "Number of non-external functions"); +STATISTIC(TotalFuncs, "Number of non-external functions"); -#define HANDLE_INST(N, OPCODE, CLASS) \ - STATISTIC(Num##OPCODE##Inst, "Number of " #OPCODE " insts"); +#define HANDLE_INST(N, OPCODE, CLASS) \ + STATISTIC(Num##OPCODE##Inst, "Number of " #OPCODE " insts"); #include "llvm/IR/Instruction.def" namespace { -class InstCount : public InstVisitor<InstCount> { - friend class InstVisitor<InstCount>; +class InstCount : public InstVisitor<InstCount> { + friend class InstVisitor<InstCount>; - void visitFunction(Function &F) { ++TotalFuncs; } - void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; } + void visitFunction(Function &F) { ++TotalFuncs; } + void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; } -#define HANDLE_INST(N, OPCODE, CLASS) \ - void visit##OPCODE(CLASS &) { \ - ++Num##OPCODE##Inst; \ - ++TotalInsts; \ - } +#define HANDLE_INST(N, OPCODE, CLASS) \ + void visit##OPCODE(CLASS &) { \ + ++Num##OPCODE##Inst; \ + ++TotalInsts; \ + } #include "llvm/IR/Instruction.def" - void visitInstruction(Instruction &I) { - errs() << "Instruction Count does not know about " << I; - llvm_unreachable(nullptr); - } -}; -} // namespace - -PreservedAnalyses InstCountPass::run(Function &F, - FunctionAnalysisManager &FAM) { - LLVM_DEBUG(dbgs() << "INSTCOUNT: running on function " << F.getName() - << "\n"); - InstCount().visit(F); - - return PreservedAnalyses::all(); -} - -namespace { -class InstCountLegacyPass : public FunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - InstCountLegacyPass() : FunctionPass(ID) { - initializeInstCountLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - LLVM_DEBUG(dbgs() << "INSTCOUNT: running on function " << F.getName() - << "\n"); - InstCount().visit(F); - return false; + void visitInstruction(Instruction &I) { + errs() << "Instruction Count does not know about " << I; + llvm_unreachable(nullptr); + } +}; +} // namespace + +PreservedAnalyses InstCountPass::run(Function &F, + FunctionAnalysisManager &FAM) { + LLVM_DEBUG(dbgs() << "INSTCOUNT: running on function " << F.getName() + << "\n"); + InstCount().visit(F); + + return PreservedAnalyses::all(); +} + +namespace { +class InstCountLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + InstCountLegacyPass() : FunctionPass(ID) { + initializeInstCountLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + LLVM_DEBUG(dbgs() << "INSTCOUNT: running on function " << F.getName() + << "\n"); + InstCount().visit(F); + return false; }; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - - void print(raw_ostream &O, const Module *M) const override {} -}; -} // namespace - -char InstCountLegacyPass::ID = 0; -INITIALIZE_PASS(InstCountLegacyPass, "instcount", + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + void print(raw_ostream &O, const Module *M) const override {} +}; +} // namespace + +char InstCountLegacyPass::ID = 0; +INITIALIZE_PASS(InstCountLegacyPass, "instcount", "Counts the various types of Instructions", false, true) -FunctionPass *llvm::createInstCountPass() { return new InstCountLegacyPass(); } +FunctionPass *llvm::createInstCountPass() { return new InstCountLegacyPass(); } diff --git a/contrib/libs/llvm12/lib/Analysis/InstructionSimplify.cpp b/contrib/libs/llvm12/lib/Analysis/InstructionSimplify.cpp index a12816885c..730ff6b4e7 100644 --- a/contrib/libs/llvm12/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/libs/llvm12/lib/Analysis/InstructionSimplify.cpp @@ -228,56 +228,56 @@ static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { return false; } -/// Try to simplify a binary operator of form "V op OtherOp" where V is -/// "(B0 opex B1)" by distributing 'op' across 'opex' as -/// "(B0 op OtherOp) opex (B1 op OtherOp)". -static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V, - Value *OtherOp, Instruction::BinaryOps OpcodeToExpand, +/// Try to simplify a binary operator of form "V op OtherOp" where V is +/// "(B0 opex B1)" by distributing 'op' across 'opex' as +/// "(B0 op OtherOp) opex (B1 op OtherOp)". +static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V, + Value *OtherOp, Instruction::BinaryOps OpcodeToExpand, const SimplifyQuery &Q, unsigned MaxRecurse) { - auto *B = dyn_cast<BinaryOperator>(V); - if (!B || B->getOpcode() != OpcodeToExpand) - return nullptr; - Value *B0 = B->getOperand(0), *B1 = B->getOperand(1); - Value *L = SimplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(), - MaxRecurse); - if (!L) - return nullptr; - Value *R = SimplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(), - MaxRecurse); - if (!R) - return nullptr; - - // Does the expanded pair of binops simplify to the existing binop? - if ((L == B0 && R == B1) || - (Instruction::isCommutative(OpcodeToExpand) && L == B1 && R == B0)) { - ++NumExpand; - return B; - } - - // Otherwise, return "L op' R" if it simplifies. - Value *S = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse); - if (!S) - return nullptr; - - ++NumExpand; - return S; -} - -/// Try to simplify binops of form "A op (B op' C)" or the commuted variant by -/// distributing op over op'. -static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode, - Value *L, Value *R, - Instruction::BinaryOps OpcodeToExpand, - const SimplifyQuery &Q, - unsigned MaxRecurse) { + auto *B = dyn_cast<BinaryOperator>(V); + if (!B || B->getOpcode() != OpcodeToExpand) + return nullptr; + Value *B0 = B->getOperand(0), *B1 = B->getOperand(1); + Value *L = SimplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(), + MaxRecurse); + if (!L) + return nullptr; + Value *R = SimplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(), + MaxRecurse); + if (!R) + return nullptr; + + // Does the expanded pair of binops simplify to the existing binop? + if ((L == B0 && R == B1) || + (Instruction::isCommutative(OpcodeToExpand) && L == B1 && R == B0)) { + ++NumExpand; + return B; + } + + // Otherwise, return "L op' R" if it simplifies. + Value *S = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse); + if (!S) + return nullptr; + + ++NumExpand; + return S; +} + +/// Try to simplify binops of form "A op (B op' C)" or the commuted variant by +/// distributing op over op'. +static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode, + Value *L, Value *R, + Instruction::BinaryOps OpcodeToExpand, + const SimplifyQuery &Q, + unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; - if (Value *V = expandBinOp(Opcode, L, R, OpcodeToExpand, Q, MaxRecurse)) - return V; - if (Value *V = expandBinOp(Opcode, R, L, OpcodeToExpand, Q, MaxRecurse)) - return V; + if (Value *V = expandBinOp(Opcode, L, R, OpcodeToExpand, Q, MaxRecurse)) + return V; + if (Value *V = expandBinOp(Opcode, R, L, OpcodeToExpand, Q, MaxRecurse)) + return V; return nullptr; } @@ -415,9 +415,9 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, return TV; // If one branch simplified to undef, return the other one. - if (TV && Q.isUndefValue(TV)) + if (TV && Q.isUndefValue(TV)) return FV; - if (FV && Q.isUndefValue(FV)) + if (FV && Q.isUndefValue(FV)) return TV; // If applying the operation did not change the true and false select values, @@ -612,7 +612,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, return C; // X + undef -> undef - if (Q.isUndefValue(Op1)) + if (Q.isUndefValue(Op1)) return Op1; // X + 0 -> X @@ -732,7 +732,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // X - undef -> undef // undef - X -> undef - if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) + if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) return UndefValue::get(Op0->getType()); // X - 0 -> X @@ -867,7 +867,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, // X * undef -> 0 // X * 0 -> 0 - if (Q.isUndefValue(Op1) || match(Op1, m_Zero())) + if (Q.isUndefValue(Op1) || match(Op1, m_Zero())) return Constant::getNullValue(Op0->getType()); // X * 1 -> X @@ -893,8 +893,8 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return V; // Mul distributes over Add. Try some generic simplifications based on this. - if (Value *V = expandCommutativeBinOp(Instruction::Mul, Op0, Op1, - Instruction::Add, Q, MaxRecurse)) + if (Value *V = expandCommutativeBinOp(Instruction::Mul, Op0, Op1, + Instruction::Add, Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether @@ -920,37 +920,37 @@ Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { /// Check for common or similar folds of integer division or integer remainder. /// This applies to all 4 opcodes (sdiv/udiv/srem/urem). -static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv, - const SimplifyQuery &Q) { +static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv, + const SimplifyQuery &Q) { Type *Ty = Op0->getType(); - // X / undef -> poison - // X % undef -> poison - if (Q.isUndefValue(Op1)) - return PoisonValue::get(Ty); + // X / undef -> poison + // X % undef -> poison + if (Q.isUndefValue(Op1)) + return PoisonValue::get(Ty); - // X / 0 -> poison - // X % 0 -> poison + // X / 0 -> poison + // X % 0 -> poison // We don't need to preserve faults! if (match(Op1, m_Zero())) - return PoisonValue::get(Ty); + return PoisonValue::get(Ty); - // If any element of a constant divisor fixed width vector is zero or undef - // the behavior is undefined and we can fold the whole op to poison. + // If any element of a constant divisor fixed width vector is zero or undef + // the behavior is undefined and we can fold the whole op to poison. auto *Op1C = dyn_cast<Constant>(Op1); auto *VTy = dyn_cast<FixedVectorType>(Ty); if (Op1C && VTy) { unsigned NumElts = VTy->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = Op1C->getAggregateElement(i); - if (Elt && (Elt->isNullValue() || Q.isUndefValue(Elt))) - return PoisonValue::get(Ty); + if (Elt && (Elt->isNullValue() || Q.isUndefValue(Elt))) + return PoisonValue::get(Ty); } } // undef / X -> 0 // undef % X -> 0 - if (Q.isUndefValue(Op0)) + if (Q.isUndefValue(Op0)) return Constant::getNullValue(Ty); // 0 / X -> 0 @@ -1044,7 +1044,7 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; - if (Value *V = simplifyDivRem(Op0, Op1, true, Q)) + if (Value *V = simplifyDivRem(Op0, Op1, true, Q)) return V; bool IsSigned = Opcode == Instruction::SDiv; @@ -1102,7 +1102,7 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; - if (Value *V = simplifyDivRem(Op0, Op1, false, Q)) + if (Value *V = simplifyDivRem(Op0, Op1, false, Q)) return V; // (X % Y) % Y -> X % Y @@ -1197,14 +1197,14 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); } -/// Returns true if a shift by \c Amount always yields poison. -static bool isPoisonShift(Value *Amount, const SimplifyQuery &Q) { +/// Returns true if a shift by \c Amount always yields poison. +static bool isPoisonShift(Value *Amount, const SimplifyQuery &Q) { Constant *C = dyn_cast<Constant>(Amount); if (!C) return false; - // X shift by undef -> poison because it may shift by the bitwidth. - if (Q.isUndefValue(C)) + // X shift by undef -> poison because it may shift by the bitwidth. + if (Q.isUndefValue(C)) return true; // Shifting by the bitwidth or more is undefined. @@ -1215,10 +1215,10 @@ static bool isPoisonShift(Value *Amount, const SimplifyQuery &Q) { // If all lanes of a vector shift are undefined the whole shift is. if (isa<ConstantVector>(C) || isa<ConstantDataVector>(C)) { - for (unsigned I = 0, - E = cast<FixedVectorType>(C->getType())->getNumElements(); + for (unsigned I = 0, + E = cast<FixedVectorType>(C->getType())->getNumElements(); I != E; ++I) - if (!isPoisonShift(C->getAggregateElement(I), Q)) + if (!isPoisonShift(C->getAggregateElement(I), Q)) return false; return true; } @@ -1246,8 +1246,8 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, return Op0; // Fold undefined shifts. - if (isPoisonShift(Op1, Q)) - return PoisonValue::get(Op0->getType()); + if (isPoisonShift(Op1, Q)) + return PoisonValue::get(Op0->getType()); // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. @@ -1265,7 +1265,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // the number of bits in the type, the shift is undefined. KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (Known.One.getLimitedValue() >= Known.getBitWidth()) - return PoisonValue::get(Op0->getType()); + return PoisonValue::get(Op0->getType()); // If all valid bits in the shift amount are known zero, the first operand is // unchanged. @@ -1290,7 +1290,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, // undef >> X -> 0 // undef >> X -> undef (if it's exact) - if (Q.isUndefValue(Op0)) + if (Q.isUndefValue(Op0)) return isExact ? Op0 : Constant::getNullValue(Op0->getType()); // The low bit cannot be shifted out of an exact shift if it is set. @@ -1312,7 +1312,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // undef << X -> 0 // undef << X -> undef if (if it's NSW/NUW) - if (Q.isUndefValue(Op0)) + if (Q.isUndefValue(Op0)) return isNSW || isNUW ? Op0 : Constant::getNullValue(Op0->getType()); // (X >> A) << A -> X @@ -1698,24 +1698,24 @@ static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1, if (!Cmp0->isEquality()) return nullptr; - // The non-equality compare must include a common operand (X). Canonicalize - // the common operand as operand 0 (the predicate is swapped if the common - // operand was operand 1). - ICmpInst::Predicate Pred0 = Cmp0->getPredicate(); - Value *X = Cmp0->getOperand(0); - ICmpInst::Predicate Pred1; - bool HasNotOp = match(Cmp1, m_c_ICmp(Pred1, m_Not(m_Specific(X)), m_Value())); - if (!HasNotOp && !match(Cmp1, m_c_ICmp(Pred1, m_Specific(X), m_Value()))) - return nullptr; - if (ICmpInst::isEquality(Pred1)) - return nullptr; - - // The equality compare must be against a constant. Flip bits if we matched - // a bitwise not. Convert a null pointer constant to an integer zero value. + // The non-equality compare must include a common operand (X). Canonicalize + // the common operand as operand 0 (the predicate is swapped if the common + // operand was operand 1). + ICmpInst::Predicate Pred0 = Cmp0->getPredicate(); + Value *X = Cmp0->getOperand(0); + ICmpInst::Predicate Pred1; + bool HasNotOp = match(Cmp1, m_c_ICmp(Pred1, m_Not(m_Specific(X)), m_Value())); + if (!HasNotOp && !match(Cmp1, m_c_ICmp(Pred1, m_Specific(X), m_Value()))) + return nullptr; + if (ICmpInst::isEquality(Pred1)) + return nullptr; + + // The equality compare must be against a constant. Flip bits if we matched + // a bitwise not. Convert a null pointer constant to an integer zero value. APInt MinMaxC; const APInt *C; if (match(Cmp0->getOperand(1), m_APInt(C))) - MinMaxC = HasNotOp ? ~*C : *C; + MinMaxC = HasNotOp ? ~*C : *C; else if (isa<ConstantPointerNull>(Cmp0->getOperand(1))) MinMaxC = APInt::getNullValue(8); else @@ -1999,30 +1999,30 @@ static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0, return NotOp1; } -/// Given a bitwise logic op, check if the operands are add/sub with a common -/// source value and inverted constant (identity: C - X -> ~(X + ~C)). -static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1, - Instruction::BinaryOps Opcode) { - assert(Op0->getType() == Op1->getType() && "Mismatched binop types"); - assert(BinaryOperator::isBitwiseLogicOp(Opcode) && "Expected logic op"); - Value *X; - Constant *C1, *C2; - if ((match(Op0, m_Add(m_Value(X), m_Constant(C1))) && - match(Op1, m_Sub(m_Constant(C2), m_Specific(X)))) || - (match(Op1, m_Add(m_Value(X), m_Constant(C1))) && - match(Op0, m_Sub(m_Constant(C2), m_Specific(X))))) { - if (ConstantExpr::getNot(C1) == C2) { - // (X + C) & (~C - X) --> (X + C) & ~(X + C) --> 0 - // (X + C) | (~C - X) --> (X + C) | ~(X + C) --> -1 - // (X + C) ^ (~C - X) --> (X + C) ^ ~(X + C) --> -1 - Type *Ty = Op0->getType(); - return Opcode == Instruction::And ? ConstantInt::getNullValue(Ty) - : ConstantInt::getAllOnesValue(Ty); - } - } - return nullptr; -} - +/// Given a bitwise logic op, check if the operands are add/sub with a common +/// source value and inverted constant (identity: C - X -> ~(X + ~C)). +static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1, + Instruction::BinaryOps Opcode) { + assert(Op0->getType() == Op1->getType() && "Mismatched binop types"); + assert(BinaryOperator::isBitwiseLogicOp(Opcode) && "Expected logic op"); + Value *X; + Constant *C1, *C2; + if ((match(Op0, m_Add(m_Value(X), m_Constant(C1))) && + match(Op1, m_Sub(m_Constant(C2), m_Specific(X)))) || + (match(Op1, m_Add(m_Value(X), m_Constant(C1))) && + match(Op0, m_Sub(m_Constant(C2), m_Specific(X))))) { + if (ConstantExpr::getNot(C1) == C2) { + // (X + C) & (~C - X) --> (X + C) & ~(X + C) --> 0 + // (X + C) | (~C - X) --> (X + C) | ~(X + C) --> -1 + // (X + C) ^ (~C - X) --> (X + C) ^ ~(X + C) --> -1 + Type *Ty = Op0->getType(); + return Opcode == Instruction::And ? ConstantInt::getNullValue(Ty) + : ConstantInt::getAllOnesValue(Ty); + } + } + return nullptr; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -2031,7 +2031,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return C; // X & undef -> 0 - if (Q.isUndefValue(Op1)) + if (Q.isUndefValue(Op1)) return Constant::getNullValue(Op0->getType()); // X & X = X @@ -2059,9 +2059,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (match(Op1, m_c_Or(m_Specific(Op0), m_Value()))) return Op0; - if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And)) - return V; - + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And)) + return V; + // A mask that only clears known zeros of a shifted value is a no-op. Value *X; const APInt *Mask; @@ -2118,30 +2118,30 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return V; // And distributes over Or. Try some generic simplifications based on this. - if (Value *V = expandCommutativeBinOp(Instruction::And, Op0, Op1, - Instruction::Or, Q, MaxRecurse)) + if (Value *V = expandCommutativeBinOp(Instruction::And, Op0, Op1, + Instruction::Or, Q, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. - if (Value *V = expandCommutativeBinOp(Instruction::And, Op0, Op1, - Instruction::Xor, Q, MaxRecurse)) + if (Value *V = expandCommutativeBinOp(Instruction::And, Op0, Op1, + Instruction::Xor, Q, MaxRecurse)) return V; - if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) { - if (Op0->getType()->isIntOrIntVectorTy(1)) { - // A & (A && B) -> A && B - if (match(Op1, m_Select(m_Specific(Op0), m_Value(), m_Zero()))) - return Op1; - else if (match(Op0, m_Select(m_Specific(Op1), m_Value(), m_Zero()))) - return Op0; - } - // If the operation is with the result of a select instruction, check - // whether operating on either branch of the select always yields the same - // value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) { + if (Op0->getType()->isIntOrIntVectorTy(1)) { + // A & (A && B) -> A && B + if (match(Op1, m_Select(m_Specific(Op0), m_Value(), m_Zero()))) + return Op1; + else if (match(Op0, m_Select(m_Specific(Op1), m_Value(), m_Zero()))) + return Op0; + } + // If the operation is with the result of a select instruction, check + // whether operating on either branch of the select always yields the same + // value. if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; - } + } // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. @@ -2201,7 +2201,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, // X | undef -> -1 // X | -1 = -1 // Do not return Op1 because it may contain undef elements if it's a vector. - if (Q.isUndefValue(Op1) || match(Op1, m_AllOnes())) + if (Q.isUndefValue(Op1) || match(Op1, m_AllOnes())) return Constant::getAllOnesValue(Op0->getType()); // X | X = X @@ -2230,10 +2230,10 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value())))) return Constant::getAllOnesValue(Op0->getType()); - if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or)) - return V; - - Value *A, *B, *NotA; + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or)) + return V; + + Value *A, *B, *NotA; // (A & ~B) | (A ^ B) -> (A ^ B) // (~B & A) | (A ^ B) -> (A ^ B) // (A & ~B) | (B ^ A) -> (B ^ A) @@ -2262,7 +2262,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) return Op1; - // Commute the 'or' operands. + // Commute the 'or' operands. // (~A ^ B) | (A & B) -> (~A ^ B) // (~A ^ B) | (B & A) -> (~A ^ B) // (B ^ ~A) | (A & B) -> (B ^ ~A) @@ -2272,25 +2272,25 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) return Op0; - // (~A & B) | ~(A | B) --> ~A - // (~A & B) | ~(B | A) --> ~A - // (B & ~A) | ~(A | B) --> ~A - // (B & ~A) | ~(B | A) --> ~A - if (match(Op0, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))), - m_Value(B))) && - match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) - return NotA; - - // Commute the 'or' operands. - // ~(A | B) | (~A & B) --> ~A - // ~(B | A) | (~A & B) --> ~A - // ~(A | B) | (B & ~A) --> ~A - // ~(B | A) | (B & ~A) --> ~A - if (match(Op1, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))), - m_Value(B))) && - match(Op0, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) - return NotA; - + // (~A & B) | ~(A | B) --> ~A + // (~A & B) | ~(B | A) --> ~A + // (B & ~A) | ~(A | B) --> ~A + // (B & ~A) | ~(B | A) --> ~A + if (match(Op0, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))), + m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return NotA; + + // Commute the 'or' operands. + // ~(A | B) | (~A & B) --> ~A + // ~(B | A) | (~A & B) --> ~A + // ~(A | B) | (B & ~A) --> ~A + // ~(B | A) | (B & ~A) --> ~A + if (match(Op1, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))), + m_Value(B))) && + match(Op0, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return NotA; + if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false)) return V; @@ -2308,25 +2308,25 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return V; // Or distributes over And. Try some generic simplifications based on this. - if (Value *V = expandCommutativeBinOp(Instruction::Or, Op0, Op1, - Instruction::And, Q, MaxRecurse)) + if (Value *V = expandCommutativeBinOp(Instruction::Or, Op0, Op1, + Instruction::And, Q, MaxRecurse)) return V; - if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) { - if (Op0->getType()->isIntOrIntVectorTy(1)) { - // A | (A || B) -> A || B - if (match(Op1, m_Select(m_Specific(Op0), m_One(), m_Value()))) - return Op1; - else if (match(Op0, m_Select(m_Specific(Op1), m_One(), m_Value()))) - return Op0; - } - // If the operation is with the result of a select instruction, check - // whether operating on either branch of the select always yields the same - // value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) { + if (Op0->getType()->isIntOrIntVectorTy(1)) { + // A | (A || B) -> A || B + if (match(Op1, m_Select(m_Specific(Op0), m_One(), m_Value()))) + return Op1; + else if (match(Op0, m_Select(m_Specific(Op1), m_One(), m_Value()))) + return Op0; + } + // If the operation is with the result of a select instruction, check + // whether operating on either branch of the select always yields the same + // value. if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; - } + } // (A & C1)|(B & C2) const APInt *C1, *C2; @@ -2375,7 +2375,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return C; // A ^ undef -> undef - if (Q.isUndefValue(Op1)) + if (Q.isUndefValue(Op1)) return Op1; // A ^ 0 = A @@ -2391,9 +2391,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Op0->getType()); - if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor)) - return V; - + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor)) + return V; + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse)) @@ -2600,8 +2600,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, // memory within the lifetime of the current function (allocas, byval // arguments, globals), then determine the comparison result here. SmallVector<const Value *, 8> LHSUObjs, RHSUObjs; - getUnderlyingObjects(LHS, LHSUObjs); - getUnderlyingObjects(RHS, RHSUObjs); + getUnderlyingObjects(LHS, LHSUObjs); + getUnderlyingObjects(RHS, RHSUObjs); // Is the set of underlying objects all noalias calls? auto IsNAC = [](ArrayRef<const Value *> Objects) { @@ -2808,7 +2808,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, } const APInt *C; - if (!match(RHS, m_APIntAllowUndef(C))) + if (!match(RHS, m_APIntAllowUndef(C))) return nullptr; // Rule out tautological comparisons (eg., ult 0 or uge 0). @@ -2826,160 +2826,160 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, return ConstantInt::getFalse(ITy); } - // (mul nuw/nsw X, MulC) != C --> true (if C is not a multiple of MulC) - // (mul nuw/nsw X, MulC) == C --> false (if C is not a multiple of MulC) - const APInt *MulC; - if (ICmpInst::isEquality(Pred) && - ((match(LHS, m_NUWMul(m_Value(), m_APIntAllowUndef(MulC))) && - *MulC != 0 && C->urem(*MulC) != 0) || - (match(LHS, m_NSWMul(m_Value(), m_APIntAllowUndef(MulC))) && - *MulC != 0 && C->srem(*MulC) != 0))) - return ConstantInt::get(ITy, Pred == ICmpInst::ICMP_NE); - - return nullptr; -} - -static Value *simplifyICmpWithBinOpOnLHS( - CmpInst::Predicate Pred, BinaryOperator *LBO, Value *RHS, - const SimplifyQuery &Q, unsigned MaxRecurse) { - Type *ITy = GetCompareTy(RHS); // The return type. - - Value *Y = nullptr; - // icmp pred (or X, Y), X - if (match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))) { - if (Pred == ICmpInst::ICMP_ULT) - return getFalse(ITy); - if (Pred == ICmpInst::ICMP_UGE) - return getTrue(ITy); - - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { - KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (RHSKnown.isNonNegative() && YKnown.isNegative()) - return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); - if (RHSKnown.isNegative() || YKnown.isNonNegative()) - return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy); - } - } - - // icmp pred (and X, Y), X - if (match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) { - if (Pred == ICmpInst::ICMP_UGT) - return getFalse(ITy); - if (Pred == ICmpInst::ICMP_ULE) - return getTrue(ITy); - } - - // icmp pred (urem X, Y), Y - if (match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { - switch (Pred) { - default: - break; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: { - KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (!Known.isNonNegative()) - break; - LLVM_FALLTHROUGH; - } - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: - return getFalse(ITy); - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: { - KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (!Known.isNonNegative()) - break; - LLVM_FALLTHROUGH; - } - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: - return getTrue(ITy); - } - } - - // icmp pred (urem X, Y), X - if (match(LBO, m_URem(m_Specific(RHS), m_Value()))) { - if (Pred == ICmpInst::ICMP_ULE) - return getTrue(ITy); - if (Pred == ICmpInst::ICMP_UGT) - return getFalse(ITy); - } - - // x >> y <=u x - // x udiv y <=u x. - if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) || - match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) { - // icmp pred (X op Y), X - if (Pred == ICmpInst::ICMP_UGT) - return getFalse(ITy); - if (Pred == ICmpInst::ICMP_ULE) - return getTrue(ITy); - } - - // (x*C1)/C2 <= x for C1 <= C2. - // This holds even if the multiplication overflows: Assume that x != 0 and - // arithmetic is modulo M. For overflow to occur we must have C1 >= M/x and - // thus C2 >= M/x. It follows that (x*C1)/C2 <= (M-1)/C2 <= ((M-1)*x)/M < x. - // - // Additionally, either the multiplication and division might be represented - // as shifts: - // (x*C1)>>C2 <= x for C1 < 2**C2. - // (x<<C1)/C2 <= x for 2**C1 < C2. - const APInt *C1, *C2; - if ((match(LBO, m_UDiv(m_Mul(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) && - C1->ule(*C2)) || - (match(LBO, m_LShr(m_Mul(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) && - C1->ule(APInt(C2->getBitWidth(), 1) << *C2)) || - (match(LBO, m_UDiv(m_Shl(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) && - (APInt(C1->getBitWidth(), 1) << *C1).ule(*C2))) { - if (Pred == ICmpInst::ICMP_UGT) - return getFalse(ITy); - if (Pred == ICmpInst::ICMP_ULE) - return getTrue(ITy); - } - + // (mul nuw/nsw X, MulC) != C --> true (if C is not a multiple of MulC) + // (mul nuw/nsw X, MulC) == C --> false (if C is not a multiple of MulC) + const APInt *MulC; + if (ICmpInst::isEquality(Pred) && + ((match(LHS, m_NUWMul(m_Value(), m_APIntAllowUndef(MulC))) && + *MulC != 0 && C->urem(*MulC) != 0) || + (match(LHS, m_NSWMul(m_Value(), m_APIntAllowUndef(MulC))) && + *MulC != 0 && C->srem(*MulC) != 0))) + return ConstantInt::get(ITy, Pred == ICmpInst::ICMP_NE); + return nullptr; } - -// If only one of the icmp's operands has NSW flags, try to prove that: -// -// icmp slt (x + C1), (x +nsw C2) -// -// is equivalent to: -// -// icmp slt C1, C2 -// -// which is true if x + C2 has the NSW flags set and: -// *) C1 < C2 && C1 >= 0, or -// *) C2 < C1 && C1 <= 0. -// -static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS, - Value *RHS) { - // TODO: only support icmp slt for now. - if (Pred != CmpInst::ICMP_SLT) - return false; - - // Canonicalize nsw add as RHS. - if (!match(RHS, m_NSWAdd(m_Value(), m_Value()))) - std::swap(LHS, RHS); - if (!match(RHS, m_NSWAdd(m_Value(), m_Value()))) - return false; - - Value *X; - const APInt *C1, *C2; - if (!match(LHS, m_c_Add(m_Value(X), m_APInt(C1))) || - !match(RHS, m_c_Add(m_Specific(X), m_APInt(C2)))) - return false; - - return (C1->slt(*C2) && C1->isNonNegative()) || - (C2->slt(*C1) && C1->isNonPositive()); -} - - +static Value *simplifyICmpWithBinOpOnLHS( + CmpInst::Predicate Pred, BinaryOperator *LBO, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { + Type *ITy = GetCompareTy(RHS); // The return type. + + Value *Y = nullptr; + // icmp pred (or X, Y), X + if (match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))) { + if (Pred == ICmpInst::ICMP_ULT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_UGE) + return getTrue(ITy); + + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { + KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (RHSKnown.isNonNegative() && YKnown.isNegative()) + return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); + if (RHSKnown.isNegative() || YKnown.isNonNegative()) + return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy); + } + } + + // icmp pred (and X, Y), X + if (match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) { + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + + // icmp pred (urem X, Y), Y + if (match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) + break; + LLVM_FALLTHROUGH; + } + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return getFalse(ITy); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) + break; + LLVM_FALLTHROUGH; + } + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return getTrue(ITy); + } + } + + // icmp pred (urem X, Y), X + if (match(LBO, m_URem(m_Specific(RHS), m_Value()))) { + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + } + + // x >> y <=u x + // x udiv y <=u x. + if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) || + match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) { + // icmp pred (X op Y), X + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + + // (x*C1)/C2 <= x for C1 <= C2. + // This holds even if the multiplication overflows: Assume that x != 0 and + // arithmetic is modulo M. For overflow to occur we must have C1 >= M/x and + // thus C2 >= M/x. It follows that (x*C1)/C2 <= (M-1)/C2 <= ((M-1)*x)/M < x. + // + // Additionally, either the multiplication and division might be represented + // as shifts: + // (x*C1)>>C2 <= x for C1 < 2**C2. + // (x<<C1)/C2 <= x for 2**C1 < C2. + const APInt *C1, *C2; + if ((match(LBO, m_UDiv(m_Mul(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) && + C1->ule(*C2)) || + (match(LBO, m_LShr(m_Mul(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) && + C1->ule(APInt(C2->getBitWidth(), 1) << *C2)) || + (match(LBO, m_UDiv(m_Shl(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) && + (APInt(C1->getBitWidth(), 1) << *C1).ule(*C2))) { + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + + return nullptr; +} + + +// If only one of the icmp's operands has NSW flags, try to prove that: +// +// icmp slt (x + C1), (x +nsw C2) +// +// is equivalent to: +// +// icmp slt C1, C2 +// +// which is true if x + C2 has the NSW flags set and: +// *) C1 < C2 && C1 >= 0, or +// *) C2 < C1 && C1 <= 0. +// +static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS, + Value *RHS) { + // TODO: only support icmp slt for now. + if (Pred != CmpInst::ICMP_SLT) + return false; + + // Canonicalize nsw add as RHS. + if (!match(RHS, m_NSWAdd(m_Value(), m_Value()))) + std::swap(LHS, RHS); + if (!match(RHS, m_NSWAdd(m_Value(), m_Value()))) + return false; + + Value *X; + const APInt *C1, *C2; + if (!match(LHS, m_c_Add(m_Value(X), m_APInt(C1))) || + !match(RHS, m_c_Add(m_Specific(X), m_APInt(C2)))) + return false; + + return (C1->slt(*C2) && C1->isNonNegative()) || + (C2->slt(*C1) && C1->isNonPositive()); +} + + /// TODO: A large part of this logic is duplicated in InstCombine's /// foldICmpBinOp(). We should be able to share that and avoid the code /// duplication. @@ -3029,9 +3029,9 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return V; // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. - bool CanSimplify = (NoLHSWrapProblem && NoRHSWrapProblem) || - trySimplifyICmpWithAdds(Pred, LHS, RHS); - if (A && C && (A == C || A == D || B == C || B == D) && CanSimplify) { + bool CanSimplify = (NoLHSWrapProblem && NoRHSWrapProblem) || + trySimplifyICmpWithAdds(Pred, LHS, RHS); + if (A && C && (A == C || A == D || B == C || B == D) && CanSimplify) { // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y, *Z; if (A == C) { @@ -3057,66 +3057,66 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, } } - if (LBO) - if (Value *V = simplifyICmpWithBinOpOnLHS(Pred, LBO, RHS, Q, MaxRecurse)) - return V; + if (LBO) + if (Value *V = simplifyICmpWithBinOpOnLHS(Pred, LBO, RHS, Q, MaxRecurse)) + return V; - if (RBO) - if (Value *V = simplifyICmpWithBinOpOnLHS( - ICmpInst::getSwappedPredicate(Pred), RBO, LHS, Q, MaxRecurse)) - return V; + if (RBO) + if (Value *V = simplifyICmpWithBinOpOnLHS( + ICmpInst::getSwappedPredicate(Pred), RBO, LHS, Q, MaxRecurse)) + return V; // 0 - (zext X) pred C if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) { - const APInt *C; - if (match(RHS, m_APInt(C))) { - if (C->isStrictlyPositive()) { - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_NE) - return ConstantInt::getTrue(GetCompareTy(RHS)); - if (Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_EQ) - return ConstantInt::getFalse(GetCompareTy(RHS)); + const APInt *C; + if (match(RHS, m_APInt(C))) { + if (C->isStrictlyPositive()) { + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_NE) + return ConstantInt::getTrue(GetCompareTy(RHS)); + if (Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_EQ) + return ConstantInt::getFalse(GetCompareTy(RHS)); } - if (C->isNonNegative()) { + if (C->isNonNegative()) { if (Pred == ICmpInst::ICMP_SLE) - return ConstantInt::getTrue(GetCompareTy(RHS)); + return ConstantInt::getTrue(GetCompareTy(RHS)); if (Pred == ICmpInst::ICMP_SGT) - return ConstantInt::getFalse(GetCompareTy(RHS)); + return ConstantInt::getFalse(GetCompareTy(RHS)); } } } - // If C2 is a power-of-2 and C is not: - // (C2 << X) == C --> false - // (C2 << X) != C --> true - const APInt *C; - if (match(LHS, m_Shl(m_Power2(), m_Value())) && - match(RHS, m_APIntAllowUndef(C)) && !C->isPowerOf2()) { - // C2 << X can equal zero in some circumstances. - // This simplification might be unsafe if C is zero. - // - // We know it is safe if: - // - The shift is nsw. We can't shift out the one bit. - // - The shift is nuw. We can't shift out the one bit. - // - C2 is one. - // - C isn't zero. - if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) || - Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) || - match(LHS, m_Shl(m_One(), m_Value())) || !C->isNullValue()) { - if (Pred == ICmpInst::ICMP_EQ) - return ConstantInt::getFalse(GetCompareTy(RHS)); - if (Pred == ICmpInst::ICMP_NE) - return ConstantInt::getTrue(GetCompareTy(RHS)); + // If C2 is a power-of-2 and C is not: + // (C2 << X) == C --> false + // (C2 << X) != C --> true + const APInt *C; + if (match(LHS, m_Shl(m_Power2(), m_Value())) && + match(RHS, m_APIntAllowUndef(C)) && !C->isPowerOf2()) { + // C2 << X can equal zero in some circumstances. + // This simplification might be unsafe if C is zero. + // + // We know it is safe if: + // - The shift is nsw. We can't shift out the one bit. + // - The shift is nuw. We can't shift out the one bit. + // - C2 is one. + // - C isn't zero. + if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) || + Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) || + match(LHS, m_Shl(m_One(), m_Value())) || !C->isNullValue()) { + if (Pred == ICmpInst::ICMP_EQ) + return ConstantInt::getFalse(GetCompareTy(RHS)); + if (Pred == ICmpInst::ICMP_NE) + return ConstantInt::getTrue(GetCompareTy(RHS)); } } - // TODO: This is overly constrained. LHS can be any power-of-2. - // (1 << X) >u 0x8000 --> false - // (1 << X) <=u 0x8000 --> true - if (match(LHS, m_Shl(m_One(), m_Value())) && match(RHS, m_SignMask())) { + // TODO: This is overly constrained. LHS can be any power-of-2. + // (1 << X) >u 0x8000 --> false + // (1 << X) <=u 0x8000 --> true + if (match(LHS, m_Shl(m_One(), m_Value())) && match(RHS, m_SignMask())) { if (Pred == ICmpInst::ICMP_UGT) - return ConstantInt::getFalse(GetCompareTy(RHS)); + return ConstantInt::getFalse(GetCompareTy(RHS)); if (Pred == ICmpInst::ICMP_ULE) - return ConstantInt::getTrue(GetCompareTy(RHS)); + return ConstantInt::getTrue(GetCompareTy(RHS)); } if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && @@ -3320,31 +3320,31 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, } } - // Comparing 1 each of min/max with a common operand? - // Canonicalize min operand to RHS. - if (match(LHS, m_UMin(m_Value(), m_Value())) || - match(LHS, m_SMin(m_Value(), m_Value()))) { - std::swap(LHS, RHS); - Pred = ICmpInst::getSwappedPredicate(Pred); - } - + // Comparing 1 each of min/max with a common operand? + // Canonicalize min operand to RHS. + if (match(LHS, m_UMin(m_Value(), m_Value())) || + match(LHS, m_SMin(m_Value(), m_Value()))) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + Value *C, *D; if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && match(RHS, m_SMin(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { - // smax(A, B) >=s smin(A, D) --> true + // smax(A, B) >=s smin(A, D) --> true if (Pred == CmpInst::ICMP_SGE) return getTrue(ITy); - // smax(A, B) <s smin(A, D) --> false + // smax(A, B) <s smin(A, D) --> false if (Pred == CmpInst::ICMP_SLT) return getFalse(ITy); } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && match(RHS, m_UMin(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { - // umax(A, B) >=u umin(A, D) --> true + // umax(A, B) >=u umin(A, D) --> true if (Pred == CmpInst::ICMP_UGE) return getTrue(ITy); - // umax(A, B) <u umin(A, D) --> false + // umax(A, B) <u umin(A, D) --> false if (Pred == CmpInst::ICMP_ULT) return getFalse(ITy); } @@ -3398,12 +3398,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // For EQ and NE, we can always pick a value for the undef to make the // predicate pass or fail, so we can return undef. // Matches behavior in llvm::ConstantFoldCompareInstruction. - if (Q.isUndefValue(RHS) && ICmpInst::isEquality(Pred)) + if (Q.isUndefValue(RHS) && ICmpInst::isEquality(Pred)) return UndefValue::get(ITy); // icmp X, X -> true/false // icmp X, undef -> true/false because undef could be X. - if (LHS == RHS || Q.isUndefValue(RHS)) + if (LHS == RHS || Q.isUndefValue(RHS)) return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); if (Value *V = simplifyICmpOfBools(Pred, LHS, RHS, Q)) @@ -3659,7 +3659,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // expression GEP with the same indices and a null base pointer to see // what constant folding can make out of it. Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); - SmallVector<Value *, 4> IndicesLHS(GLHS->indices()); + SmallVector<Value *, 4> IndicesLHS(GLHS->indices()); Constant *NewLHS = ConstantExpr::getGetElementPtr( GLHS->getSourceElementType(), Null, IndicesLHS); @@ -3730,7 +3730,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // fcmp pred x, undef and fcmp pred undef, x // fold to true if unordered, false if ordered - if (Q.isUndefValue(LHS) || Q.isUndefValue(RHS)) { + if (Q.isUndefValue(LHS) || Q.isUndefValue(RHS)) { // Choosing NaN for the undef will always make unordered comparison succeed // and ordered comparison fail. return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred)); @@ -3774,21 +3774,21 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; } } - - // LHS == Inf - if (Pred == FCmpInst::FCMP_OEQ && isKnownNeverInfinity(LHS, Q.TLI)) - return getFalse(RetTy); - // LHS != Inf - if (Pred == FCmpInst::FCMP_UNE && isKnownNeverInfinity(LHS, Q.TLI)) - return getTrue(RetTy); - // LHS == Inf || LHS == NaN - if (Pred == FCmpInst::FCMP_UEQ && isKnownNeverInfinity(LHS, Q.TLI) && - isKnownNeverNaN(LHS, Q.TLI)) - return getFalse(RetTy); - // LHS != Inf && LHS != NaN - if (Pred == FCmpInst::FCMP_ONE && isKnownNeverInfinity(LHS, Q.TLI) && - isKnownNeverNaN(LHS, Q.TLI)) - return getTrue(RetTy); + + // LHS == Inf + if (Pred == FCmpInst::FCMP_OEQ && isKnownNeverInfinity(LHS, Q.TLI)) + return getFalse(RetTy); + // LHS != Inf + if (Pred == FCmpInst::FCMP_UNE && isKnownNeverInfinity(LHS, Q.TLI)) + return getTrue(RetTy); + // LHS == Inf || LHS == NaN + if (Pred == FCmpInst::FCMP_UEQ && isKnownNeverInfinity(LHS, Q.TLI) && + isKnownNeverNaN(LHS, Q.TLI)) + return getFalse(RetTy); + // LHS != Inf && LHS != NaN + if (Pred == FCmpInst::FCMP_ONE && isKnownNeverInfinity(LHS, Q.TLI) && + isKnownNeverNaN(LHS, Q.TLI)) + return getTrue(RetTy); } if (C->isNegative() && !C->isNegZero()) { assert(!C->isNaN() && "Unexpected NaN constant!"); @@ -3920,33 +3920,33 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // We can't replace %sel with %add unless we strip away the flags (which will // be done in InstCombine). // TODO: This is unsound, because it only catches some forms of refinement. - if (!AllowRefinement && canCreatePoison(cast<Operator>(I))) + if (!AllowRefinement && canCreatePoison(cast<Operator>(I))) return nullptr; - // The simplification queries below may return the original value. Consider: - // %div = udiv i32 %arg, %arg2 - // %mul = mul nsw i32 %div, %arg2 - // %cmp = icmp eq i32 %mul, %arg - // %sel = select i1 %cmp, i32 %div, i32 undef - // Replacing %arg by %mul, %div becomes "udiv i32 %mul, %arg2", which - // simplifies back to %arg. This can only happen because %mul does not - // dominate %div. To ensure a consistent return value contract, we make sure - // that this case returns nullptr as well. - auto PreventSelfSimplify = [V](Value *Simplified) { - return Simplified != V ? Simplified : nullptr; - }; - + // The simplification queries below may return the original value. Consider: + // %div = udiv i32 %arg, %arg2 + // %mul = mul nsw i32 %div, %arg2 + // %cmp = icmp eq i32 %mul, %arg + // %sel = select i1 %cmp, i32 %div, i32 undef + // Replacing %arg by %mul, %div becomes "udiv i32 %mul, %arg2", which + // simplifies back to %arg. This can only happen because %mul does not + // dominate %div. To ensure a consistent return value contract, we make sure + // that this case returns nullptr as well. + auto PreventSelfSimplify = [V](Value *Simplified) { + return Simplified != V ? Simplified : nullptr; + }; + // If this is a binary operator, try to simplify it with the replaced op. if (auto *B = dyn_cast<BinaryOperator>(I)) { if (MaxRecurse) { if (B->getOperand(0) == Op) - return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), RepOp, - B->getOperand(1), Q, - MaxRecurse - 1)); + return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), RepOp, + B->getOperand(1), Q, + MaxRecurse - 1)); if (B->getOperand(1) == Op) - return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), - B->getOperand(0), RepOp, Q, - MaxRecurse - 1)); + return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), + B->getOperand(0), RepOp, Q, + MaxRecurse - 1)); } } @@ -3954,13 +3954,13 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (CmpInst *C = dyn_cast<CmpInst>(I)) { if (MaxRecurse) { if (C->getOperand(0) == Op) - return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), RepOp, - C->getOperand(1), Q, - MaxRecurse - 1)); + return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), RepOp, + C->getOperand(1), Q, + MaxRecurse - 1)); if (C->getOperand(1) == Op) - return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), - C->getOperand(0), RepOp, Q, - MaxRecurse - 1)); + return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), + C->getOperand(0), RepOp, Q, + MaxRecurse - 1)); } } @@ -3970,8 +3970,8 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, SmallVector<Value *, 8> NewOps(GEP->getNumOperands()); transform(GEP->operands(), NewOps.begin(), [&](Value *V) { return V == Op ? RepOp : V; }); - return PreventSelfSimplify(SimplifyGEPInst(GEP->getSourceElementType(), - NewOps, Q, MaxRecurse - 1)); + return PreventSelfSimplify(SimplifyGEPInst(GEP->getSourceElementType(), + NewOps, Q, MaxRecurse - 1)); } } @@ -4090,8 +4090,8 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, // Test for a bogus zero-shift-guard-op around funnel-shift or rotate. Value *ShAmt; - auto isFsh = m_CombineOr(m_FShl(m_Value(X), m_Value(), m_Value(ShAmt)), - m_FShr(m_Value(), m_Value(X), m_Value(ShAmt))); + auto isFsh = m_CombineOr(m_FShl(m_Value(X), m_Value(), m_Value(ShAmt)), + m_FShr(m_Value(), m_Value(X), m_Value(ShAmt))); // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt) @@ -4102,24 +4102,24 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, // intrinsics do not have that problem. // We do not allow this transform for the general funnel shift case because // that would not preserve the poison safety of the original code. - auto isRotate = - m_CombineOr(m_FShl(m_Value(X), m_Deferred(X), m_Value(ShAmt)), - m_FShr(m_Value(X), m_Deferred(X), m_Value(ShAmt))); + auto isRotate = + m_CombineOr(m_FShl(m_Value(X), m_Deferred(X), m_Value(ShAmt)), + m_FShr(m_Value(X), m_Deferred(X), m_Value(ShAmt))); // (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt) // (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt) if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt && Pred == ICmpInst::ICMP_EQ) return FalseVal; - - // X == 0 ? abs(X) : -abs(X) --> -abs(X) - // X == 0 ? -abs(X) : abs(X) --> abs(X) - if (match(TrueVal, m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS))) && - match(FalseVal, m_Neg(m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS))))) - return FalseVal; - if (match(TrueVal, - m_Neg(m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS)))) && - match(FalseVal, m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS)))) - return FalseVal; + + // X == 0 ? abs(X) : -abs(X) --> -abs(X) + // X == 0 ? -abs(X) : abs(X) --> abs(X) + if (match(TrueVal, m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS))) && + match(FalseVal, m_Neg(m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS))))) + return FalseVal; + if (match(TrueVal, + m_Neg(m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS)))) && + match(FalseVal, m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS)))) + return FalseVal; } // Check for other compares that behave like bit test. @@ -4127,12 +4127,12 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, TrueVal, FalseVal)) return V; - // If we have a scalar equality comparison, then we know the value in one of - // the arms of the select. See if substituting this value into the arm and + // If we have a scalar equality comparison, then we know the value in one of + // the arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. - // Note that the equivalence/replacement opportunity does not hold for vectors - // because each element of a vector select is chosen independently. - if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) { + // Note that the equivalence/replacement opportunity does not hold for vectors + // because each element of a vector select is chosen independently. + if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) { if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, /* AllowRefinement */ false, MaxRecurse) == TrueVal || @@ -4193,7 +4193,7 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, return ConstantFoldSelectInstruction(CondC, TrueC, FalseC); // select undef, X, Y -> X or Y - if (Q.isUndefValue(CondC)) + if (Q.isUndefValue(CondC)) return isa<Constant>(FalseVal) ? FalseVal : TrueVal; // TODO: Vector constants with undef elements don't simplify. @@ -4219,24 +4219,24 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, if (TrueVal == FalseVal) return TrueVal; - // If the true or false value is undef, we can fold to the other value as - // long as the other value isn't poison. - // select ?, undef, X -> X - if (Q.isUndefValue(TrueVal) && - isGuaranteedNotToBeUndefOrPoison(FalseVal, Q.AC, Q.CxtI, Q.DT)) + // If the true or false value is undef, we can fold to the other value as + // long as the other value isn't poison. + // select ?, undef, X -> X + if (Q.isUndefValue(TrueVal) && + isGuaranteedNotToBeUndefOrPoison(FalseVal, Q.AC, Q.CxtI, Q.DT)) return FalseVal; - // select ?, X, undef -> X - if (Q.isUndefValue(FalseVal) && - isGuaranteedNotToBeUndefOrPoison(TrueVal, Q.AC, Q.CxtI, Q.DT)) + // select ?, X, undef -> X + if (Q.isUndefValue(FalseVal) && + isGuaranteedNotToBeUndefOrPoison(TrueVal, Q.AC, Q.CxtI, Q.DT)) return TrueVal; // Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC'' Constant *TrueC, *FalseC; - if (isa<FixedVectorType>(TrueVal->getType()) && - match(TrueVal, m_Constant(TrueC)) && + if (isa<FixedVectorType>(TrueVal->getType()) && + match(TrueVal, m_Constant(TrueC)) && match(FalseVal, m_Constant(FalseC))) { - unsigned NumElts = - cast<FixedVectorType>(TrueC->getType())->getNumElements(); + unsigned NumElts = + cast<FixedVectorType>(TrueC->getType())->getNumElements(); SmallVector<Constant *, 16> NewC; for (unsigned i = 0; i != NumElts; ++i) { // Bail out on incomplete vector constants. @@ -4249,11 +4249,11 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, // one element is undef, choose the defined element as the safe result. if (TEltC == FEltC) NewC.push_back(TEltC); - else if (Q.isUndefValue(TEltC) && - isGuaranteedNotToBeUndefOrPoison(FEltC)) + else if (Q.isUndefValue(TEltC) && + isGuaranteedNotToBeUndefOrPoison(FEltC)) NewC.push_back(FEltC); - else if (Q.isUndefValue(FEltC) && - isGuaranteedNotToBeUndefOrPoison(TEltC)) + else if (Q.isUndefValue(FEltC) && + isGuaranteedNotToBeUndefOrPoison(TEltC)) NewC.push_back(TEltC); else break; @@ -4304,12 +4304,12 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType())) GEPTy = VectorType::get(GEPTy, VT->getElementCount()); - // getelementptr poison, idx -> poison - // getelementptr baseptr, poison -> poison - if (any_of(Ops, [](const auto *V) { return isa<PoisonValue>(V); })) - return PoisonValue::get(GEPTy); - - if (Q.isUndefValue(Ops[0])) + // getelementptr poison, idx -> poison + // getelementptr baseptr, poison -> poison + if (any_of(Ops, [](const auto *V) { return isa<PoisonValue>(V); })) + return PoisonValue::get(GEPTy); + + if (Q.isUndefValue(Ops[0])) return UndefValue::get(GEPTy); bool IsScalableVec = isa<ScalableVectorType>(SrcTy); @@ -4332,7 +4332,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == Q.DL.getPointerSizeInBits(AS)) { - auto PtrToInt = [GEPTy](Value *P) -> Value * { + auto PtrToInt = [GEPTy](Value *P) -> Value * { Value *Temp; if (match(P, m_PtrToInt(m_Value(Temp)))) if (Temp->getType() == GEPTy) @@ -4340,14 +4340,14 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, return nullptr; }; - // FIXME: The following transforms are only legal if P and V have the - // same provenance (PR44403). Check whether getUnderlyingObject() is - // the same? - + // FIXME: The following transforms are only legal if P and V have the + // same provenance (PR44403). Check whether getUnderlyingObject() is + // the same? + // getelementptr V, (sub P, V) -> P if P points to a type of size 1. if (TyAllocSize == 1 && match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))))) - if (Value *R = PtrToInt(P)) + if (Value *R = PtrToInt(P)) return R; // getelementptr V, (ashr (sub P, V), C) -> Q @@ -4356,7 +4356,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), m_ConstantInt(C))) && TyAllocSize == 1ULL << C) - if (Value *R = PtrToInt(P)) + if (Value *R = PtrToInt(P)) return R; // getelementptr V, (sdiv (sub P, V), C) -> Q @@ -4364,7 +4364,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, if (match(Ops[1], m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), m_SpecificInt(TyAllocSize)))) - if (Value *R = PtrToInt(P)) + if (Value *R = PtrToInt(P)) return R; } } @@ -4381,21 +4381,21 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL, BasePtrOffset); - // Avoid creating inttoptr of zero here: While LLVMs treatment of - // inttoptr is generally conservative, this particular case is folded to - // a null pointer, which will have incorrect provenance. - + // Avoid creating inttoptr of zero here: While LLVMs treatment of + // inttoptr is generally conservative, this particular case is folded to + // a null pointer, which will have incorrect provenance. + // gep (gep V, C), (sub 0, V) -> C if (match(Ops.back(), - m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr)))) && - !BasePtrOffset.isNullValue()) { + m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr)))) && + !BasePtrOffset.isNullValue()) { auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset); return ConstantExpr::getIntToPtr(CI, GEPTy); } // gep (gep V, C), (xor V, -1) -> C-1 if (match(Ops.back(), - m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes())) && - !BasePtrOffset.isOneValue()) { + m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes())) && + !BasePtrOffset.isOneValue()) { auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1); return ConstantExpr::getIntToPtr(CI, GEPTy); } @@ -4426,7 +4426,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); // insertvalue x, undef, n -> x - if (Q.isUndefValue(Val)) + if (Q.isUndefValue(Val)) return Agg; // insertvalue x, (extractvalue y, n), n @@ -4434,7 +4434,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, if (EV->getAggregateOperand()->getType() == Agg->getType() && EV->getIndices() == Idxs) { // insertvalue undef, (extractvalue y, n), n -> y - if (Q.isUndefValue(Agg)) + if (Q.isUndefValue(Agg)) return EV->getAggregateOperand(); // insertvalue y, (extractvalue y, n), n -> y @@ -4458,23 +4458,23 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, auto *ValC = dyn_cast<Constant>(Val); auto *IdxC = dyn_cast<Constant>(Idx); if (VecC && ValC && IdxC) - return ConstantExpr::getInsertElement(VecC, ValC, IdxC); + return ConstantExpr::getInsertElement(VecC, ValC, IdxC); - // For fixed-length vector, fold into poison if index is out of bounds. + // For fixed-length vector, fold into poison if index is out of bounds. if (auto *CI = dyn_cast<ConstantInt>(Idx)) { if (isa<FixedVectorType>(Vec->getType()) && CI->uge(cast<FixedVectorType>(Vec->getType())->getNumElements())) - return PoisonValue::get(Vec->getType()); + return PoisonValue::get(Vec->getType()); } // If index is undef, it might be out of bounds (see above case) - if (Q.isUndefValue(Idx)) - return PoisonValue::get(Vec->getType()); + if (Q.isUndefValue(Idx)) + return PoisonValue::get(Vec->getType()); - // If the scalar is poison, or it is undef and there is no risk of - // propagating poison from the vector value, simplify to the vector value. - if (isa<PoisonValue>(Val) || - (Q.isUndefValue(Val) && isGuaranteedNotToBePoison(Vec))) + // If the scalar is poison, or it is undef and there is no risk of + // propagating poison from the vector value, simplify to the vector value. + if (isa<PoisonValue>(Val) || + (Q.isUndefValue(Val) && isGuaranteedNotToBePoison(Vec))) return Vec; // If we are extracting a value from a vector, then inserting it into the same @@ -4518,18 +4518,18 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, /// Given operands for an ExtractElementInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, - const SimplifyQuery &Q, unsigned) { +static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, + const SimplifyQuery &Q, unsigned) { auto *VecVTy = cast<VectorType>(Vec->getType()); if (auto *CVec = dyn_cast<Constant>(Vec)) { if (auto *CIdx = dyn_cast<Constant>(Idx)) - return ConstantExpr::getExtractElement(CVec, CIdx); + return ConstantExpr::getExtractElement(CVec, CIdx); // The index is not relevant if our vector is a splat. if (auto *Splat = CVec->getSplatValue()) return Splat; - if (Q.isUndefValue(Vec)) + if (Q.isUndefValue(Vec)) return UndefValue::get(VecVTy->getElementType()); } @@ -4538,16 +4538,16 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) { // For fixed-length vector, fold into undef if index is out of bounds. if (isa<FixedVectorType>(VecVTy) && - IdxC->getValue().uge(cast<FixedVectorType>(VecVTy)->getNumElements())) - return PoisonValue::get(VecVTy->getElementType()); + IdxC->getValue().uge(cast<FixedVectorType>(VecVTy)->getNumElements())) + return PoisonValue::get(VecVTy->getElementType()); if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) return Elt; } // An undef extract index can be arbitrarily chosen to be an out-of-range - // index value, which would result in the instruction being poison. - if (Q.isUndefValue(Idx)) - return PoisonValue::get(VecVTy->getElementType()); + // index value, which would result in the instruction being poison. + if (Q.isUndefValue(Idx)) + return PoisonValue::get(VecVTy->getElementType()); return nullptr; } @@ -4559,10 +4559,10 @@ Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx, /// See if we can fold the given phi. If not, returns null. static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { - // WARNING: no matter how worthwhile it may seem, we can not perform PHI CSE - // here, because the PHI we may succeed simplifying to was not - // def-reachable from the original PHI! - + // WARNING: no matter how worthwhile it may seem, we can not perform PHI CSE + // here, because the PHI we may succeed simplifying to was not + // def-reachable from the original PHI! + // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = nullptr; @@ -4570,7 +4570,7 @@ static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { for (Value *Incoming : PN->incoming_values()) { // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PN) continue; - if (Q.isUndefValue(Incoming)) { + if (Q.isUndefValue(Incoming)) { // Remember that we saw an undef value, but otherwise ignore them. HasUndefInput = true; continue; @@ -4648,7 +4648,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, return nullptr; // The mask value chooses which source operand we need to look at next. - int InVecNumElts = cast<FixedVectorType>(Op0->getType())->getNumElements(); + int InVecNumElts = cast<FixedVectorType>(Op0->getType())->getNumElements(); int RootElt = MaskVal; Value *SourceOp = Op0; if (MaskVal >= InVecNumElts) { @@ -4695,16 +4695,16 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, unsigned MaskNumElts = Mask.size(); ElementCount InVecEltCount = InVecTy->getElementCount(); - bool Scalable = InVecEltCount.isScalable(); + bool Scalable = InVecEltCount.isScalable(); SmallVector<int, 32> Indices; Indices.assign(Mask.begin(), Mask.end()); // Canonicalization: If mask does not select elements from an input vector, - // replace that input vector with poison. + // replace that input vector with poison. if (!Scalable) { bool MaskSelects0 = false, MaskSelects1 = false; - unsigned InVecNumElts = InVecEltCount.getKnownMinValue(); + unsigned InVecNumElts = InVecEltCount.getKnownMinValue(); for (unsigned i = 0; i != MaskNumElts; ++i) { if (Indices[i] == -1) continue; @@ -4714,9 +4714,9 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, MaskSelects1 = true; } if (!MaskSelects0) - Op0 = PoisonValue::get(InVecTy); + Op0 = PoisonValue::get(InVecTy); if (!MaskSelects1) - Op1 = PoisonValue::get(InVecTy); + Op1 = PoisonValue::get(InVecTy); } auto *Op0Const = dyn_cast<Constant>(Op0); @@ -4725,16 +4725,16 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, // If all operands are constant, constant fold the shuffle. This // transformation depends on the value of the mask which is not known at // compile time for scalable vectors - if (Op0Const && Op1Const) - return ConstantExpr::getShuffleVector(Op0Const, Op1Const, Mask); + if (Op0Const && Op1Const) + return ConstantExpr::getShuffleVector(Op0Const, Op1Const, Mask); // Canonicalization: if only one input vector is constant, it shall be the // second one. This transformation depends on the value of the mask which // is not known at compile time for scalable vectors if (!Scalable && Op0Const && !Op1Const) { std::swap(Op0, Op1); - ShuffleVectorInst::commuteShuffleMask(Indices, - InVecEltCount.getKnownMinValue()); + ShuffleVectorInst::commuteShuffleMask(Indices, + InVecEltCount.getKnownMinValue()); } // A splat of an inserted scalar constant becomes a vector constant: @@ -4766,7 +4766,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, // A shuffle of a splat is always the splat itself. Legal if the shuffle's // value type is same as the input vectors' type. if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0)) - if (Q.isUndefValue(Op1) && RetTy == InVecTy && + if (Q.isUndefValue(Op1) && RetTy == InVecTy && is_splat(OpShuf->getShuffleMask())) return Op0; @@ -4778,7 +4778,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, // Don't fold a shuffle with undef mask elements. This may get folded in a // better way using demanded bits or other analysis. // TODO: Should we allow this? - if (is_contained(Indices, -1)) + if (is_contained(Indices, -1)) return nullptr; // Check if every element of this shuffle can be mapped back to the @@ -4847,20 +4847,20 @@ static Constant *propagateNaN(Constant *In) { /// transforms based on undef/NaN because the operation itself makes no /// difference to the result. static Constant *simplifyFPOp(ArrayRef<Value *> Ops, - FastMathFlags FMF, - const SimplifyQuery &Q) { + FastMathFlags FMF, + const SimplifyQuery &Q) { for (Value *V : Ops) { bool IsNan = match(V, m_NaN()); bool IsInf = match(V, m_Inf()); - bool IsUndef = Q.isUndefValue(V); + bool IsUndef = Q.isUndefValue(V); // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand // (an undef operand can be chosen to be Nan/Inf), then the result of - // this operation is poison. + // this operation is poison. if (FMF.noNaNs() && (IsNan || IsUndef)) - return PoisonValue::get(V->getType()); + return PoisonValue::get(V->getType()); if (FMF.noInfs() && (IsInf || IsUndef)) - return PoisonValue::get(V->getType()); + return PoisonValue::get(V->getType()); if (IsUndef || IsNan) return propagateNaN(cast<Constant>(V)); @@ -4875,7 +4875,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) return C; // fadd X, -0 ==> X @@ -4922,7 +4922,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) return C; // fsub X, +0 ==> X @@ -4964,7 +4964,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) return C; // fmul X, 1.0 ==> X @@ -5031,7 +5031,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) return C; // X / 1.0 -> X @@ -5076,7 +5076,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q)) return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. @@ -5321,15 +5321,15 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, // bitreverse(bitreverse(x)) -> x if (match(Op0, m_BitReverse(m_Value(X)))) return X; break; - case Intrinsic::ctpop: { - // If everything but the lowest bit is zero, that bit is the pop-count. Ex: - // ctpop(and X, 1) --> and X, 1 - unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); - if (MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, BitWidth - 1), - Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return Op0; - break; - } + case Intrinsic::ctpop: { + // If everything but the lowest bit is zero, that bit is the pop-count. Ex: + // ctpop(and X, 1) --> and X, 1 + unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); + if (MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, BitWidth - 1), + Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + return Op0; + break; + } case Intrinsic::exp: // exp(log(x)) -> x if (Q.CxtI->hasAllowReassoc() && @@ -5382,156 +5382,156 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, return nullptr; } -static Intrinsic::ID getMaxMinOpposite(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::smax: return Intrinsic::smin; - case Intrinsic::smin: return Intrinsic::smax; - case Intrinsic::umax: return Intrinsic::umin; - case Intrinsic::umin: return Intrinsic::umax; - default: llvm_unreachable("Unexpected intrinsic"); - } -} - -static APInt getMaxMinLimit(Intrinsic::ID IID, unsigned BitWidth) { - switch (IID) { - case Intrinsic::smax: return APInt::getSignedMaxValue(BitWidth); - case Intrinsic::smin: return APInt::getSignedMinValue(BitWidth); - case Intrinsic::umax: return APInt::getMaxValue(BitWidth); - case Intrinsic::umin: return APInt::getMinValue(BitWidth); - default: llvm_unreachable("Unexpected intrinsic"); - } -} - -static ICmpInst::Predicate getMaxMinPredicate(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::smax: return ICmpInst::ICMP_SGE; - case Intrinsic::smin: return ICmpInst::ICMP_SLE; - case Intrinsic::umax: return ICmpInst::ICMP_UGE; - case Intrinsic::umin: return ICmpInst::ICMP_ULE; - default: llvm_unreachable("Unexpected intrinsic"); - } -} - -/// Given a min/max intrinsic, see if it can be removed based on having an -/// operand that is another min/max intrinsic with shared operand(s). The caller -/// is expected to swap the operand arguments to handle commutation. -static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) { - Value *X, *Y; - if (!match(Op0, m_MaxOrMin(m_Value(X), m_Value(Y)))) - return nullptr; - - auto *MM0 = dyn_cast<IntrinsicInst>(Op0); - if (!MM0) - return nullptr; - Intrinsic::ID IID0 = MM0->getIntrinsicID(); - - if (Op1 == X || Op1 == Y || - match(Op1, m_c_MaxOrMin(m_Specific(X), m_Specific(Y)))) { - // max (max X, Y), X --> max X, Y - if (IID0 == IID) - return MM0; - // max (min X, Y), X --> X - if (IID0 == getMaxMinOpposite(IID)) - return Op1; - } - return nullptr; -} - +static Intrinsic::ID getMaxMinOpposite(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::smax: return Intrinsic::smin; + case Intrinsic::smin: return Intrinsic::smax; + case Intrinsic::umax: return Intrinsic::umin; + case Intrinsic::umin: return Intrinsic::umax; + default: llvm_unreachable("Unexpected intrinsic"); + } +} + +static APInt getMaxMinLimit(Intrinsic::ID IID, unsigned BitWidth) { + switch (IID) { + case Intrinsic::smax: return APInt::getSignedMaxValue(BitWidth); + case Intrinsic::smin: return APInt::getSignedMinValue(BitWidth); + case Intrinsic::umax: return APInt::getMaxValue(BitWidth); + case Intrinsic::umin: return APInt::getMinValue(BitWidth); + default: llvm_unreachable("Unexpected intrinsic"); + } +} + +static ICmpInst::Predicate getMaxMinPredicate(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::smax: return ICmpInst::ICMP_SGE; + case Intrinsic::smin: return ICmpInst::ICMP_SLE; + case Intrinsic::umax: return ICmpInst::ICMP_UGE; + case Intrinsic::umin: return ICmpInst::ICMP_ULE; + default: llvm_unreachable("Unexpected intrinsic"); + } +} + +/// Given a min/max intrinsic, see if it can be removed based on having an +/// operand that is another min/max intrinsic with shared operand(s). The caller +/// is expected to swap the operand arguments to handle commutation. +static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) { + Value *X, *Y; + if (!match(Op0, m_MaxOrMin(m_Value(X), m_Value(Y)))) + return nullptr; + + auto *MM0 = dyn_cast<IntrinsicInst>(Op0); + if (!MM0) + return nullptr; + Intrinsic::ID IID0 = MM0->getIntrinsicID(); + + if (Op1 == X || Op1 == Y || + match(Op1, m_c_MaxOrMin(m_Specific(X), m_Specific(Y)))) { + // max (max X, Y), X --> max X, Y + if (IID0 == IID) + return MM0; + // max (min X, Y), X --> X + if (IID0 == getMaxMinOpposite(IID)) + return Op1; + } + return nullptr; +} + static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, const SimplifyQuery &Q) { Intrinsic::ID IID = F->getIntrinsicID(); Type *ReturnType = F->getReturnType(); - unsigned BitWidth = ReturnType->getScalarSizeInBits(); + unsigned BitWidth = ReturnType->getScalarSizeInBits(); switch (IID) { - case Intrinsic::abs: - // abs(abs(x)) -> abs(x). We don't need to worry about the nsw arg here. - // It is always ok to pick the earlier abs. We'll just lose nsw if its only - // on the outer abs. - if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(), m_Value()))) - return Op0; - break; - - case Intrinsic::smax: - case Intrinsic::smin: - case Intrinsic::umax: - case Intrinsic::umin: { - // If the arguments are the same, this is a no-op. - if (Op0 == Op1) - return Op0; - - // Canonicalize constant operand as Op1. - if (isa<Constant>(Op0)) - std::swap(Op0, Op1); - - // Assume undef is the limit value. - if (Q.isUndefValue(Op1)) - return ConstantInt::get(ReturnType, getMaxMinLimit(IID, BitWidth)); - - const APInt *C; - if (match(Op1, m_APIntAllowUndef(C))) { - // Clamp to limit value. For example: - // umax(i8 %x, i8 255) --> 255 - if (*C == getMaxMinLimit(IID, BitWidth)) - return ConstantInt::get(ReturnType, *C); - - // If the constant op is the opposite of the limit value, the other must - // be larger/smaller or equal. For example: - // umin(i8 %x, i8 255) --> %x - if (*C == getMaxMinLimit(getMaxMinOpposite(IID), BitWidth)) - return Op0; - - // Remove nested call if constant operands allow it. Example: - // max (max X, 7), 5 -> max X, 7 - auto *MinMax0 = dyn_cast<IntrinsicInst>(Op0); - if (MinMax0 && MinMax0->getIntrinsicID() == IID) { - // TODO: loosen undef/splat restrictions for vector constants. - Value *M00 = MinMax0->getOperand(0), *M01 = MinMax0->getOperand(1); - const APInt *InnerC; - if ((match(M00, m_APInt(InnerC)) || match(M01, m_APInt(InnerC))) && - ((IID == Intrinsic::smax && InnerC->sge(*C)) || - (IID == Intrinsic::smin && InnerC->sle(*C)) || - (IID == Intrinsic::umax && InnerC->uge(*C)) || - (IID == Intrinsic::umin && InnerC->ule(*C)))) - return Op0; - } - } - - if (Value *V = foldMinMaxSharedOp(IID, Op0, Op1)) - return V; - if (Value *V = foldMinMaxSharedOp(IID, Op1, Op0)) - return V; - - ICmpInst::Predicate Pred = getMaxMinPredicate(IID); - if (isICmpTrue(Pred, Op0, Op1, Q.getWithoutUndef(), RecursionLimit)) - return Op0; - if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit)) - return Op1; - - if (Optional<bool> Imp = - isImpliedByDomCondition(Pred, Op0, Op1, Q.CxtI, Q.DL)) - return *Imp ? Op0 : Op1; - if (Optional<bool> Imp = - isImpliedByDomCondition(Pred, Op1, Op0, Q.CxtI, Q.DL)) - return *Imp ? Op1 : Op0; - - break; - } + case Intrinsic::abs: + // abs(abs(x)) -> abs(x). We don't need to worry about the nsw arg here. + // It is always ok to pick the earlier abs. We'll just lose nsw if its only + // on the outer abs. + if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(), m_Value()))) + return Op0; + break; + + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: { + // If the arguments are the same, this is a no-op. + if (Op0 == Op1) + return Op0; + + // Canonicalize constant operand as Op1. + if (isa<Constant>(Op0)) + std::swap(Op0, Op1); + + // Assume undef is the limit value. + if (Q.isUndefValue(Op1)) + return ConstantInt::get(ReturnType, getMaxMinLimit(IID, BitWidth)); + + const APInt *C; + if (match(Op1, m_APIntAllowUndef(C))) { + // Clamp to limit value. For example: + // umax(i8 %x, i8 255) --> 255 + if (*C == getMaxMinLimit(IID, BitWidth)) + return ConstantInt::get(ReturnType, *C); + + // If the constant op is the opposite of the limit value, the other must + // be larger/smaller or equal. For example: + // umin(i8 %x, i8 255) --> %x + if (*C == getMaxMinLimit(getMaxMinOpposite(IID), BitWidth)) + return Op0; + + // Remove nested call if constant operands allow it. Example: + // max (max X, 7), 5 -> max X, 7 + auto *MinMax0 = dyn_cast<IntrinsicInst>(Op0); + if (MinMax0 && MinMax0->getIntrinsicID() == IID) { + // TODO: loosen undef/splat restrictions for vector constants. + Value *M00 = MinMax0->getOperand(0), *M01 = MinMax0->getOperand(1); + const APInt *InnerC; + if ((match(M00, m_APInt(InnerC)) || match(M01, m_APInt(InnerC))) && + ((IID == Intrinsic::smax && InnerC->sge(*C)) || + (IID == Intrinsic::smin && InnerC->sle(*C)) || + (IID == Intrinsic::umax && InnerC->uge(*C)) || + (IID == Intrinsic::umin && InnerC->ule(*C)))) + return Op0; + } + } + + if (Value *V = foldMinMaxSharedOp(IID, Op0, Op1)) + return V; + if (Value *V = foldMinMaxSharedOp(IID, Op1, Op0)) + return V; + + ICmpInst::Predicate Pred = getMaxMinPredicate(IID); + if (isICmpTrue(Pred, Op0, Op1, Q.getWithoutUndef(), RecursionLimit)) + return Op0; + if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit)) + return Op1; + + if (Optional<bool> Imp = + isImpliedByDomCondition(Pred, Op0, Op1, Q.CxtI, Q.DL)) + return *Imp ? Op0 : Op1; + if (Optional<bool> Imp = + isImpliedByDomCondition(Pred, Op1, Op0, Q.CxtI, Q.DL)) + return *Imp ? Op1 : Op0; + + break; + } case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: // X - X -> { 0, false } - // X - undef -> { 0, false } - // undef - X -> { 0, false } - if (Op0 == Op1 || Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) + // X - undef -> { 0, false } + // undef - X -> { 0, false } + if (Op0 == Op1 || Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) return Constant::getNullValue(ReturnType); - break; + break; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - // X + undef -> { -1, false } - // undef + x -> { -1, false } - if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) { + // X + undef -> { -1, false } + // undef + x -> { -1, false } + if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) { return ConstantStruct::get( cast<StructType>(ReturnType), - {Constant::getAllOnesValue(ReturnType->getStructElementType(0)), + {Constant::getAllOnesValue(ReturnType->getStructElementType(0)), Constant::getNullValue(ReturnType->getStructElementType(1))}); } break; @@ -5543,7 +5543,7 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return Constant::getNullValue(ReturnType); // undef * X -> { 0, false } // X * undef -> { 0, false } - if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) + if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) return Constant::getNullValue(ReturnType); break; case Intrinsic::uadd_sat: @@ -5557,7 +5557,7 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // sat(undef + X) -> -1 // For unsigned: Assume undef is MAX, thus we saturate to MAX (-1). // For signed: Assume undef is ~X, in which case X + ~X = -1. - if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) + if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) return Constant::getAllOnesValue(ReturnType); // X + 0 -> X @@ -5574,7 +5574,7 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, LLVM_FALLTHROUGH; case Intrinsic::ssub_sat: // X - X -> 0, X - undef -> 0, undef - X -> 0 - if (Op0 == Op1 || Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) + if (Op0 == Op1 || Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) return Constant::getNullValue(ReturnType); // X - 0 -> X if (match(Op1, m_Zero())) @@ -5612,44 +5612,44 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // If the arguments are the same, this is a no-op. if (Op0 == Op1) return Op0; - // Canonicalize constant operand as Op1. - if (isa<Constant>(Op0)) - std::swap(Op0, Op1); - - // If an argument is undef, return the other argument. - if (Q.isUndefValue(Op1)) + // Canonicalize constant operand as Op1. + if (isa<Constant>(Op0)) + std::swap(Op0, Op1); + + // If an argument is undef, return the other argument. + if (Q.isUndefValue(Op1)) return Op0; bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum; - bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum; - - // minnum(X, nan) -> X - // maxnum(X, nan) -> X - // minimum(X, nan) -> nan - // maximum(X, nan) -> nan + bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum; + + // minnum(X, nan) -> X + // maxnum(X, nan) -> X + // minimum(X, nan) -> nan + // maximum(X, nan) -> nan if (match(Op1, m_NaN())) - return PropagateNaN ? propagateNaN(cast<Constant>(Op1)) : Op0; - - // In the following folds, inf can be replaced with the largest finite - // float, if the ninf flag is set. - const APFloat *C; - if (match(Op1, m_APFloat(C)) && - (C->isInfinity() || (Q.CxtI->hasNoInfs() && C->isLargest()))) { - // minnum(X, -inf) -> -inf - // maxnum(X, +inf) -> +inf - // minimum(X, -inf) -> -inf if nnan - // maximum(X, +inf) -> +inf if nnan - if (C->isNegative() == IsMin && (!PropagateNaN || Q.CxtI->hasNoNaNs())) - return ConstantFP::get(ReturnType, *C); - - // minnum(X, +inf) -> X if nnan - // maxnum(X, -inf) -> X if nnan - // minimum(X, +inf) -> X - // maximum(X, -inf) -> X - if (C->isNegative() != IsMin && (PropagateNaN || Q.CxtI->hasNoNaNs())) - return Op0; - } - + return PropagateNaN ? propagateNaN(cast<Constant>(Op1)) : Op0; + + // In the following folds, inf can be replaced with the largest finite + // float, if the ninf flag is set. + const APFloat *C; + if (match(Op1, m_APFloat(C)) && + (C->isInfinity() || (Q.CxtI->hasNoInfs() && C->isLargest()))) { + // minnum(X, -inf) -> -inf + // maxnum(X, +inf) -> +inf + // minimum(X, -inf) -> -inf if nnan + // maximum(X, +inf) -> +inf if nnan + if (C->isNegative() == IsMin && (!PropagateNaN || Q.CxtI->hasNoNaNs())) + return ConstantFP::get(ReturnType, *C); + + // minnum(X, +inf) -> X if nnan + // maxnum(X, -inf) -> X if nnan + // minimum(X, +inf) -> X + // maximum(X, -inf) -> X + if (C->isNegative() != IsMin && (PropagateNaN || Q.CxtI->hasNoNaNs())) + return Op0; + } + // Min/max of the same operation with common operand: // m(m(X, Y)), X --> m(X, Y) (4 commuted variants) if (auto *M0 = dyn_cast<IntrinsicInst>(Op0)) @@ -5703,11 +5703,11 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { *ShAmtArg = Call->getArgOperand(2); // If both operands are undef, the result is undef. - if (Q.isUndefValue(Op0) && Q.isUndefValue(Op1)) + if (Q.isUndefValue(Op0) && Q.isUndefValue(Op1)) return UndefValue::get(F->getReturnType()); // If shift amount is undef, assume it is zero. - if (Q.isUndefValue(ShAmtArg)) + if (Q.isUndefValue(ShAmtArg)) return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1); const APInt *ShAmtC; @@ -5724,7 +5724,7 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { Value *Op0 = Call->getArgOperand(0); Value *Op1 = Call->getArgOperand(1); Value *Op2 = Call->getArgOperand(2); - if (Value *V = simplifyFPOp({ Op0, Op1, Op2 }, {}, Q)) + if (Value *V = simplifyFPOp({ Op0, Op1, Op2 }, {}, Q)) return V; return nullptr; } @@ -5733,9 +5733,9 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } } -static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) { - auto *F = dyn_cast<Function>(Call->getCalledOperand()); - if (!F || !canConstantFoldCallTo(Call, F)) +static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) { + auto *F = dyn_cast<Function>(Call->getCalledOperand()); + if (!F || !canConstantFoldCallTo(Call, F)) return nullptr; SmallVector<Constant *, 4> ConstantArgs; @@ -5754,33 +5754,33 @@ static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) { return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI); } -Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { - // musttail calls can only be simplified if they are also DCEd. - // As we can't guarantee this here, don't simplify them. - if (Call->isMustTailCall()) - return nullptr; - - // call undef -> poison - // call null -> poison - Value *Callee = Call->getCalledOperand(); - if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee)) - return PoisonValue::get(Call->getType()); - - if (Value *V = tryConstantFoldCall(Call, Q)) - return V; - - auto *F = dyn_cast<Function>(Callee); - if (F && F->isIntrinsic()) - if (Value *Ret = simplifyIntrinsic(Call, Q)) - return Ret; - - return nullptr; -} - +Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { + // musttail calls can only be simplified if they are also DCEd. + // As we can't guarantee this here, don't simplify them. + if (Call->isMustTailCall()) + return nullptr; + + // call undef -> poison + // call null -> poison + Value *Callee = Call->getCalledOperand(); + if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee)) + return PoisonValue::get(Call->getType()); + + if (Value *V = tryConstantFoldCall(Call, Q)) + return V; + + auto *F = dyn_cast<Function>(Callee); + if (F && F->isIntrinsic()) + if (Value *Ret = simplifyIntrinsic(Call, Q)) + return Ret; + + return nullptr; +} + /// Given operands for a Freeze, see if we can fold the result. static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { // Use a utility function defined in ValueTracking. - if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT)) + if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT)) return Op0; // We have room for improvement. return nullptr; @@ -5889,7 +5889,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, I->getOperand(2), Q); break; case Instruction::GetElementPtr: { - SmallVector<Value *, 8> Ops(I->operands()); + SmallVector<Value *, 8> Ops(I->operands()); Result = SimplifyGEPInst(cast<GetElementPtrInst>(I)->getSourceElementType(), Ops, Q); break; diff --git a/contrib/libs/llvm12/lib/Analysis/LazyCallGraph.cpp b/contrib/libs/llvm12/lib/Analysis/LazyCallGraph.cpp index f2c85a69f1..3d2bc6cb01 100644 --- a/contrib/libs/llvm12/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LazyCallGraph.cpp @@ -19,7 +19,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -256,24 +256,24 @@ void LazyCallGraph::SCC::verify() { "Must set low link to -1 when adding a node to an SCC!"); for (Edge &E : **N) assert(E.getNode().isPopulated() && "Can't have an unpopulated node!"); - -#ifdef EXPENSIVE_CHECKS - // Verify that all nodes in this SCC can reach all other nodes. - SmallVector<Node *, 4> Worklist; - SmallPtrSet<Node *, 4> Visited; - Worklist.push_back(N); - while (!Worklist.empty()) { - Node *VisitingNode = Worklist.pop_back_val(); - if (!Visited.insert(VisitingNode).second) - continue; - for (Edge &E : (*VisitingNode)->calls()) - Worklist.push_back(&E.getNode()); - } - for (Node *NodeToVisit : Nodes) { - assert(Visited.contains(NodeToVisit) && - "Cannot reach all nodes within SCC"); - } -#endif + +#ifdef EXPENSIVE_CHECKS + // Verify that all nodes in this SCC can reach all other nodes. + SmallVector<Node *, 4> Worklist; + SmallPtrSet<Node *, 4> Visited; + Worklist.push_back(N); + while (!Worklist.empty()) { + Node *VisitingNode = Worklist.pop_back_val(); + if (!Visited.insert(VisitingNode).second) + continue; + for (Edge &E : (*VisitingNode)->calls()) + Worklist.push_back(&E.getNode()); + } + for (Node *NodeToVisit : Nodes) { + assert(Visited.contains(NodeToVisit) && + "Cannot reach all nodes within SCC"); + } +#endif } } #endif @@ -376,31 +376,31 @@ void LazyCallGraph::RefSCC::verify() { } } } - -#ifdef EXPENSIVE_CHECKS - // Verify that all nodes in this RefSCC can reach all other nodes. - SmallVector<Node *> Nodes; - for (SCC *C : SCCs) { - for (Node &N : *C) - Nodes.push_back(&N); - } - for (Node *N : Nodes) { - SmallVector<Node *, 4> Worklist; - SmallPtrSet<Node *, 4> Visited; - Worklist.push_back(N); - while (!Worklist.empty()) { - Node *VisitingNode = Worklist.pop_back_val(); - if (!Visited.insert(VisitingNode).second) - continue; - for (Edge &E : **VisitingNode) - Worklist.push_back(&E.getNode()); - } - for (Node *NodeToVisit : Nodes) { - assert(Visited.contains(NodeToVisit) && - "Cannot reach all nodes within RefSCC"); - } - } -#endif + +#ifdef EXPENSIVE_CHECKS + // Verify that all nodes in this RefSCC can reach all other nodes. + SmallVector<Node *> Nodes; + for (SCC *C : SCCs) { + for (Node &N : *C) + Nodes.push_back(&N); + } + for (Node *N : Nodes) { + SmallVector<Node *, 4> Worklist; + SmallPtrSet<Node *, 4> Visited; + Worklist.push_back(N); + while (!Worklist.empty()) { + Node *VisitingNode = Worklist.pop_back_val(); + if (!Visited.insert(VisitingNode).second) + continue; + for (Edge &E : **VisitingNode) + Worklist.push_back(&E.getNode()); + } + for (Node *NodeToVisit : Nodes) { + assert(Visited.contains(NodeToVisit) && + "Cannot reach all nodes within RefSCC"); + } + } +#endif } #endif @@ -866,7 +866,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { PendingSCCStack.clear(); while (!DFSStack.empty()) OldSCC.Nodes.push_back(DFSStack.pop_back_val().first); - for (Node &N : drop_begin(OldSCC, OldSize)) { + for (Node &N : drop_begin(OldSCC, OldSize)) { N.DFSNumber = N.LowLink = -1; G->SCCMap[&N] = &OldSCC; } @@ -1586,215 +1586,215 @@ void LazyCallGraph::removeDeadFunction(Function &F) { // allocators. } -// Gets the Edge::Kind from one function to another by looking at the function's -// instructions. Asserts if there is no edge. -// Useful for determining what type of edge should exist between functions when -// the edge hasn't been created yet. -static LazyCallGraph::Edge::Kind getEdgeKind(Function &OriginalFunction, - Function &NewFunction) { - // In release builds, assume that if there are no direct calls to the new - // function, then there is a ref edge. In debug builds, keep track of - // references to assert that there is actually a ref edge if there is no call - // edge. -#ifndef NDEBUG - SmallVector<Constant *, 16> Worklist; - SmallPtrSet<Constant *, 16> Visited; -#endif - - for (Instruction &I : instructions(OriginalFunction)) { - if (auto *CB = dyn_cast<CallBase>(&I)) { - if (Function *Callee = CB->getCalledFunction()) { - if (Callee == &NewFunction) - return LazyCallGraph::Edge::Kind::Call; - } - } -#ifndef NDEBUG - for (Value *Op : I.operand_values()) { - if (Constant *C = dyn_cast<Constant>(Op)) { - if (Visited.insert(C).second) - Worklist.push_back(C); - } - } -#endif - } - -#ifndef NDEBUG - bool FoundNewFunction = false; - LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &F) { - if (&F == &NewFunction) - FoundNewFunction = true; - }); - assert(FoundNewFunction && "No edge from original function to new function"); -#endif - - return LazyCallGraph::Edge::Kind::Ref; -} - -void LazyCallGraph::addSplitFunction(Function &OriginalFunction, - Function &NewFunction) { - assert(lookup(OriginalFunction) && - "Original function's node should already exist"); - Node &OriginalN = get(OriginalFunction); - SCC *OriginalC = lookupSCC(OriginalN); - RefSCC *OriginalRC = lookupRefSCC(OriginalN); - -#ifndef NDEBUG - OriginalRC->verify(); - auto VerifyOnExit = make_scope_exit([&]() { OriginalRC->verify(); }); -#endif - - assert(!lookup(NewFunction) && - "New function's node should not already exist"); - Node &NewN = initNode(NewFunction); - - Edge::Kind EK = getEdgeKind(OriginalFunction, NewFunction); - - SCC *NewC = nullptr; - for (Edge &E : *NewN) { - Node &EN = E.getNode(); - if (EK == Edge::Kind::Call && E.isCall() && lookupSCC(EN) == OriginalC) { - // If the edge to the new function is a call edge and there is a call edge - // from the new function to any function in the original function's SCC, - // it is in the same SCC (and RefSCC) as the original function. - NewC = OriginalC; - NewC->Nodes.push_back(&NewN); - break; - } - } - - if (!NewC) { - for (Edge &E : *NewN) { - Node &EN = E.getNode(); - if (lookupRefSCC(EN) == OriginalRC) { - // If there is any edge from the new function to any function in the - // original function's RefSCC, it is in the same RefSCC as the original - // function but a new SCC. - RefSCC *NewRC = OriginalRC; - NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN})); - - // The new function's SCC is not the same as the original function's - // SCC, since that case was handled earlier. If the edge from the - // original function to the new function was a call edge, then we need - // to insert the newly created function's SCC before the original - // function's SCC. Otherwise either the new SCC comes after the original - // function's SCC, or it doesn't matter, and in both cases we can add it - // to the very end. - int InsertIndex = EK == Edge::Kind::Call ? NewRC->SCCIndices[OriginalC] - : NewRC->SCCIndices.size(); - NewRC->SCCs.insert(NewRC->SCCs.begin() + InsertIndex, NewC); - for (int I = InsertIndex, Size = NewRC->SCCs.size(); I < Size; ++I) - NewRC->SCCIndices[NewRC->SCCs[I]] = I; - - break; - } - } - } - - if (!NewC) { - // We didn't find any edges back to the original function's RefSCC, so the - // new function belongs in a new RefSCC. The new RefSCC goes before the - // original function's RefSCC. - RefSCC *NewRC = createRefSCC(*this); - NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN})); - NewRC->SCCIndices[NewC] = 0; - NewRC->SCCs.push_back(NewC); - auto OriginalRCIndex = RefSCCIndices.find(OriginalRC)->second; - PostOrderRefSCCs.insert(PostOrderRefSCCs.begin() + OriginalRCIndex, NewRC); - for (int I = OriginalRCIndex, Size = PostOrderRefSCCs.size(); I < Size; ++I) - RefSCCIndices[PostOrderRefSCCs[I]] = I; - } - - SCCMap[&NewN] = NewC; - - OriginalN->insertEdgeInternal(NewN, EK); +// Gets the Edge::Kind from one function to another by looking at the function's +// instructions. Asserts if there is no edge. +// Useful for determining what type of edge should exist between functions when +// the edge hasn't been created yet. +static LazyCallGraph::Edge::Kind getEdgeKind(Function &OriginalFunction, + Function &NewFunction) { + // In release builds, assume that if there are no direct calls to the new + // function, then there is a ref edge. In debug builds, keep track of + // references to assert that there is actually a ref edge if there is no call + // edge. +#ifndef NDEBUG + SmallVector<Constant *, 16> Worklist; + SmallPtrSet<Constant *, 16> Visited; +#endif + + for (Instruction &I : instructions(OriginalFunction)) { + if (auto *CB = dyn_cast<CallBase>(&I)) { + if (Function *Callee = CB->getCalledFunction()) { + if (Callee == &NewFunction) + return LazyCallGraph::Edge::Kind::Call; + } + } +#ifndef NDEBUG + for (Value *Op : I.operand_values()) { + if (Constant *C = dyn_cast<Constant>(Op)) { + if (Visited.insert(C).second) + Worklist.push_back(C); + } + } +#endif + } + +#ifndef NDEBUG + bool FoundNewFunction = false; + LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &F) { + if (&F == &NewFunction) + FoundNewFunction = true; + }); + assert(FoundNewFunction && "No edge from original function to new function"); +#endif + + return LazyCallGraph::Edge::Kind::Ref; } -void LazyCallGraph::addSplitRefRecursiveFunctions( - Function &OriginalFunction, ArrayRef<Function *> NewFunctions) { - assert(!NewFunctions.empty() && "Can't add zero functions"); - assert(lookup(OriginalFunction) && - "Original function's node should already exist"); - Node &OriginalN = get(OriginalFunction); - RefSCC *OriginalRC = lookupRefSCC(OriginalN); - -#ifndef NDEBUG - OriginalRC->verify(); - auto VerifyOnExit = make_scope_exit([&]() { - OriginalRC->verify(); -#ifdef EXPENSIVE_CHECKS - for (Function *NewFunction : NewFunctions) - lookupRefSCC(get(*NewFunction))->verify(); -#endif - }); -#endif - - bool ExistsRefToOriginalRefSCC = false; - - for (Function *NewFunction : NewFunctions) { - Node &NewN = initNode(*NewFunction); - - OriginalN->insertEdgeInternal(NewN, Edge::Kind::Ref); - - // Check if there is any edge from any new function back to any function in - // the original function's RefSCC. - for (Edge &E : *NewN) { - if (lookupRefSCC(E.getNode()) == OriginalRC) { - ExistsRefToOriginalRefSCC = true; - break; - } - } - } - - RefSCC *NewRC; - if (ExistsRefToOriginalRefSCC) { - // If there is any edge from any new function to any function in the - // original function's RefSCC, all new functions will be in the same RefSCC - // as the original function. - NewRC = OriginalRC; - } else { - // Otherwise the new functions are in their own RefSCC. - NewRC = createRefSCC(*this); - // The new RefSCC goes before the original function's RefSCC in postorder - // since there are only edges from the original function's RefSCC to the new - // RefSCC. - auto OriginalRCIndex = RefSCCIndices.find(OriginalRC)->second; - PostOrderRefSCCs.insert(PostOrderRefSCCs.begin() + OriginalRCIndex, NewRC); - for (int I = OriginalRCIndex, Size = PostOrderRefSCCs.size(); I < Size; ++I) - RefSCCIndices[PostOrderRefSCCs[I]] = I; - } - - for (Function *NewFunction : NewFunctions) { - Node &NewN = get(*NewFunction); - // Each new function is in its own new SCC. The original function can only - // have a ref edge to new functions, and no other existing functions can - // have references to new functions. Each new function only has a ref edge - // to the other new functions. - SCC *NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN})); - // The new SCCs are either sibling SCCs or parent SCCs to all other existing - // SCCs in the RefSCC. Either way, they can go at the back of the postorder - // SCC list. - auto Index = NewRC->SCCIndices.size(); - NewRC->SCCIndices[NewC] = Index; - NewRC->SCCs.push_back(NewC); - SCCMap[&NewN] = NewC; - } - -#ifndef NDEBUG - for (Function *F1 : NewFunctions) { - assert(getEdgeKind(OriginalFunction, *F1) == Edge::Kind::Ref && - "Expected ref edges from original function to every new function"); - Node &N1 = get(*F1); - for (Function *F2 : NewFunctions) { - if (F1 == F2) - continue; - Node &N2 = get(*F2); - assert(!N1->lookup(N2)->isCall() && - "Edges between new functions must be ref edges"); - } - } -#endif +void LazyCallGraph::addSplitFunction(Function &OriginalFunction, + Function &NewFunction) { + assert(lookup(OriginalFunction) && + "Original function's node should already exist"); + Node &OriginalN = get(OriginalFunction); + SCC *OriginalC = lookupSCC(OriginalN); + RefSCC *OriginalRC = lookupRefSCC(OriginalN); + +#ifndef NDEBUG + OriginalRC->verify(); + auto VerifyOnExit = make_scope_exit([&]() { OriginalRC->verify(); }); +#endif + + assert(!lookup(NewFunction) && + "New function's node should not already exist"); + Node &NewN = initNode(NewFunction); + + Edge::Kind EK = getEdgeKind(OriginalFunction, NewFunction); + + SCC *NewC = nullptr; + for (Edge &E : *NewN) { + Node &EN = E.getNode(); + if (EK == Edge::Kind::Call && E.isCall() && lookupSCC(EN) == OriginalC) { + // If the edge to the new function is a call edge and there is a call edge + // from the new function to any function in the original function's SCC, + // it is in the same SCC (and RefSCC) as the original function. + NewC = OriginalC; + NewC->Nodes.push_back(&NewN); + break; + } + } + + if (!NewC) { + for (Edge &E : *NewN) { + Node &EN = E.getNode(); + if (lookupRefSCC(EN) == OriginalRC) { + // If there is any edge from the new function to any function in the + // original function's RefSCC, it is in the same RefSCC as the original + // function but a new SCC. + RefSCC *NewRC = OriginalRC; + NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN})); + + // The new function's SCC is not the same as the original function's + // SCC, since that case was handled earlier. If the edge from the + // original function to the new function was a call edge, then we need + // to insert the newly created function's SCC before the original + // function's SCC. Otherwise either the new SCC comes after the original + // function's SCC, or it doesn't matter, and in both cases we can add it + // to the very end. + int InsertIndex = EK == Edge::Kind::Call ? NewRC->SCCIndices[OriginalC] + : NewRC->SCCIndices.size(); + NewRC->SCCs.insert(NewRC->SCCs.begin() + InsertIndex, NewC); + for (int I = InsertIndex, Size = NewRC->SCCs.size(); I < Size; ++I) + NewRC->SCCIndices[NewRC->SCCs[I]] = I; + + break; + } + } + } + + if (!NewC) { + // We didn't find any edges back to the original function's RefSCC, so the + // new function belongs in a new RefSCC. The new RefSCC goes before the + // original function's RefSCC. + RefSCC *NewRC = createRefSCC(*this); + NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN})); + NewRC->SCCIndices[NewC] = 0; + NewRC->SCCs.push_back(NewC); + auto OriginalRCIndex = RefSCCIndices.find(OriginalRC)->second; + PostOrderRefSCCs.insert(PostOrderRefSCCs.begin() + OriginalRCIndex, NewRC); + for (int I = OriginalRCIndex, Size = PostOrderRefSCCs.size(); I < Size; ++I) + RefSCCIndices[PostOrderRefSCCs[I]] = I; + } + + SCCMap[&NewN] = NewC; + + OriginalN->insertEdgeInternal(NewN, EK); } +void LazyCallGraph::addSplitRefRecursiveFunctions( + Function &OriginalFunction, ArrayRef<Function *> NewFunctions) { + assert(!NewFunctions.empty() && "Can't add zero functions"); + assert(lookup(OriginalFunction) && + "Original function's node should already exist"); + Node &OriginalN = get(OriginalFunction); + RefSCC *OriginalRC = lookupRefSCC(OriginalN); + +#ifndef NDEBUG + OriginalRC->verify(); + auto VerifyOnExit = make_scope_exit([&]() { + OriginalRC->verify(); +#ifdef EXPENSIVE_CHECKS + for (Function *NewFunction : NewFunctions) + lookupRefSCC(get(*NewFunction))->verify(); +#endif + }); +#endif + + bool ExistsRefToOriginalRefSCC = false; + + for (Function *NewFunction : NewFunctions) { + Node &NewN = initNode(*NewFunction); + + OriginalN->insertEdgeInternal(NewN, Edge::Kind::Ref); + + // Check if there is any edge from any new function back to any function in + // the original function's RefSCC. + for (Edge &E : *NewN) { + if (lookupRefSCC(E.getNode()) == OriginalRC) { + ExistsRefToOriginalRefSCC = true; + break; + } + } + } + + RefSCC *NewRC; + if (ExistsRefToOriginalRefSCC) { + // If there is any edge from any new function to any function in the + // original function's RefSCC, all new functions will be in the same RefSCC + // as the original function. + NewRC = OriginalRC; + } else { + // Otherwise the new functions are in their own RefSCC. + NewRC = createRefSCC(*this); + // The new RefSCC goes before the original function's RefSCC in postorder + // since there are only edges from the original function's RefSCC to the new + // RefSCC. + auto OriginalRCIndex = RefSCCIndices.find(OriginalRC)->second; + PostOrderRefSCCs.insert(PostOrderRefSCCs.begin() + OriginalRCIndex, NewRC); + for (int I = OriginalRCIndex, Size = PostOrderRefSCCs.size(); I < Size; ++I) + RefSCCIndices[PostOrderRefSCCs[I]] = I; + } + + for (Function *NewFunction : NewFunctions) { + Node &NewN = get(*NewFunction); + // Each new function is in its own new SCC. The original function can only + // have a ref edge to new functions, and no other existing functions can + // have references to new functions. Each new function only has a ref edge + // to the other new functions. + SCC *NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN})); + // The new SCCs are either sibling SCCs or parent SCCs to all other existing + // SCCs in the RefSCC. Either way, they can go at the back of the postorder + // SCC list. + auto Index = NewRC->SCCIndices.size(); + NewRC->SCCIndices[NewC] = Index; + NewRC->SCCs.push_back(NewC); + SCCMap[&NewN] = NewC; + } + +#ifndef NDEBUG + for (Function *F1 : NewFunctions) { + assert(getEdgeKind(OriginalFunction, *F1) == Edge::Kind::Ref && + "Expected ref edges from original function to every new function"); + Node &N1 = get(*F1); + for (Function *F2 : NewFunctions) { + if (F1 == F2) + continue; + Node &N2 = get(*F2); + assert(!N1->lookup(N2)->isCall() && + "Edges between new functions must be ref edges"); + } + } +#endif +} + LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) { return *new (MappedN = BPA.Allocate()) Node(*this, F); } @@ -1809,11 +1809,11 @@ void LazyCallGraph::updateGraphPtrs() { RC->G = this; } -LazyCallGraph::Node &LazyCallGraph::initNode(Function &F) { +LazyCallGraph::Node &LazyCallGraph::initNode(Function &F) { Node &N = get(F); N.DFSNumber = N.LowLink = -1; N.populate(); - NodeMap[&F] = &N; + NodeMap[&F] = &N; return N; } @@ -1958,7 +1958,7 @@ void LazyCallGraph::buildRefSCCs() { for (Edge &E : *this) Roots.push_back(&E.getNode()); - // The roots will be iterated in order. + // The roots will be iterated in order. buildGenericSCCs( Roots, [](Node &N) { diff --git a/contrib/libs/llvm12/lib/Analysis/LazyValueInfo.cpp b/contrib/libs/llvm12/lib/Analysis/LazyValueInfo.cpp index ba2b6fe94c..03bd788ae6 100644 --- a/contrib/libs/llvm12/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LazyValueInfo.cpp @@ -36,7 +36,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/KnownBits.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include <map> using namespace llvm; @@ -151,21 +151,21 @@ namespace { } // end anonymous namespace namespace { - using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>; - + using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>; + /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { /// This is all of the cached information for one basic block. It contains /// the per-value lattice elements, as well as a separate set for - /// overdefined values to reduce memory usage. Additionally pointers - /// dereferenced in the block are cached for nullability queries. + /// overdefined values to reduce memory usage. Additionally pointers + /// dereferenced in the block are cached for nullability queries. struct BlockCacheEntry { SmallDenseMap<AssertingVH<Value>, ValueLatticeElement, 4> LatticeElements; SmallDenseSet<AssertingVH<Value>, 4> OverDefined; - // None indicates that the nonnull pointers for this basic block - // block have not been computed yet. - Optional<NonNullPointerSet> NonNullPointers; + // None indicates that the nonnull pointers for this basic block + // block have not been computed yet. + Optional<NonNullPointerSet> NonNullPointers; }; /// Cached information per basic block. @@ -227,19 +227,19 @@ namespace { return LatticeIt->second; } - bool isNonNullAtEndOfBlock( - Value *V, BasicBlock *BB, - function_ref<NonNullPointerSet(BasicBlock *)> InitFn) { - BlockCacheEntry *Entry = getOrCreateBlockEntry(BB); - if (!Entry->NonNullPointers) { - Entry->NonNullPointers = InitFn(BB); - for (Value *V : *Entry->NonNullPointers) - addValueHandle(V); - } - - return Entry->NonNullPointers->count(V); - } - + bool isNonNullAtEndOfBlock( + Value *V, BasicBlock *BB, + function_ref<NonNullPointerSet(BasicBlock *)> InitFn) { + BlockCacheEntry *Entry = getOrCreateBlockEntry(BB); + if (!Entry->NonNullPointers) { + Entry->NonNullPointers = InitFn(BB); + for (Value *V : *Entry->NonNullPointers) + addValueHandle(V); + } + + return Entry->NonNullPointers->count(V); + } + /// clear - Empty the cache. void clear() { BlockCache.clear(); @@ -264,8 +264,8 @@ void LazyValueInfoCache::eraseValue(Value *V) { for (auto &Pair : BlockCache) { Pair.second->LatticeElements.erase(V); Pair.second->OverDefined.erase(V); - if (Pair.second->NonNullPointers) - Pair.second->NonNullPointers->erase(V); + if (Pair.second->NonNullPointers) + Pair.second->NonNullPointers->erase(V); } auto HandleIt = ValueHandles.find_as(V); @@ -333,7 +333,7 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, if (!changed) continue; - llvm::append_range(worklist, successors(ToUpdate)); + llvm::append_range(worklist, successors(ToUpdate)); } } @@ -410,8 +410,8 @@ class LazyValueInfoImpl { BasicBlock *BB); Optional<ValueLatticeElement> solveBlockValueSelect(SelectInst *S, BasicBlock *BB); - Optional<ConstantRange> getRangeFor(Value *V, Instruction *CxtI, - BasicBlock *BB); + Optional<ConstantRange> getRangeFor(Value *V, Instruction *CxtI, + BasicBlock *BB); Optional<ValueLatticeElement> solveBlockValueBinaryOpImpl( Instruction *I, BasicBlock *BB, std::function<ConstantRange(const ConstantRange &, @@ -426,7 +426,7 @@ class LazyValueInfoImpl { BasicBlock *BB); Optional<ValueLatticeElement> solveBlockValueExtractValue( ExtractValueInst *EVI, BasicBlock *BB); - bool isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB); + bool isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, ValueLatticeElement &BBLV, Instruction *BBI); @@ -434,16 +434,16 @@ class LazyValueInfoImpl { void solve(); public: - /// This is the query interface to determine the lattice value for the - /// specified Value* at the context instruction (if specified) or at the - /// start of the block. + /// This is the query interface to determine the lattice value for the + /// specified Value* at the context instruction (if specified) or at the + /// start of the block. ValueLatticeElement getValueInBlock(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); - /// This is the query interface to determine the lattice value for the - /// specified Value* at the specified instruction using only information - /// from assumes/guards and range metadata. Unlike getValueInBlock(), no - /// recursive query is performed. + /// This is the query interface to determine the lattice value for the + /// specified Value* at the specified instruction using only information + /// from assumes/guards and range metadata. Unlike getValueInBlock(), no + /// recursive query is performed. ValueLatticeElement getValueAt(Value *V, Instruction *CxtI); /// This is the query interface to determine the lattice @@ -628,43 +628,43 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueImpl( return getFromRangeMetadata(BBI); } -static void AddNonNullPointer(Value *Ptr, NonNullPointerSet &PtrSet) { - // TODO: Use NullPointerIsDefined instead. - if (Ptr->getType()->getPointerAddressSpace() == 0) - PtrSet.insert(getUnderlyingObject(Ptr)); -} - -static void AddNonNullPointersByInstruction( - Instruction *I, NonNullPointerSet &PtrSet) { +static void AddNonNullPointer(Value *Ptr, NonNullPointerSet &PtrSet) { + // TODO: Use NullPointerIsDefined instead. + if (Ptr->getType()->getPointerAddressSpace() == 0) + PtrSet.insert(getUnderlyingObject(Ptr)); +} + +static void AddNonNullPointersByInstruction( + Instruction *I, NonNullPointerSet &PtrSet) { if (LoadInst *L = dyn_cast<LoadInst>(I)) { - AddNonNullPointer(L->getPointerOperand(), PtrSet); - } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { - AddNonNullPointer(S->getPointerOperand(), PtrSet); - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { - if (MI->isVolatile()) return; + AddNonNullPointer(L->getPointerOperand(), PtrSet); + } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { + AddNonNullPointer(S->getPointerOperand(), PtrSet); + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + if (MI->isVolatile()) return; // FIXME: check whether it has a valuerange that excludes zero? ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength()); - if (!Len || Len->isZero()) return; + if (!Len || Len->isZero()) return; - AddNonNullPointer(MI->getRawDest(), PtrSet); + AddNonNullPointer(MI->getRawDest(), PtrSet); if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) - AddNonNullPointer(MTI->getRawSource(), PtrSet); + AddNonNullPointer(MTI->getRawSource(), PtrSet); } } -bool LazyValueInfoImpl::isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB) { - if (NullPointerIsDefined(BB->getParent(), - Val->getType()->getPointerAddressSpace())) - return false; +bool LazyValueInfoImpl::isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB) { + if (NullPointerIsDefined(BB->getParent(), + Val->getType()->getPointerAddressSpace())) + return false; - Val = getUnderlyingObject(Val); - return TheCache.isNonNullAtEndOfBlock(Val, BB, [](BasicBlock *BB) { - NonNullPointerSet NonNullPointers; + Val = getUnderlyingObject(Val); + return TheCache.isNonNullAtEndOfBlock(Val, BB, [](BasicBlock *BB) { + NonNullPointerSet NonNullPointers; for (Instruction &I : *BB) - AddNonNullPointersByInstruction(&I, NonNullPointers); - return NonNullPointers; - }); + AddNonNullPointersByInstruction(&I, NonNullPointers); + return NonNullPointers; + }); } Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueNonLocal( @@ -675,7 +675,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueNonLocal( // value is overdefined. if (BB == &BB->getParent()->getEntryBlock()) { assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); - return ValueLatticeElement::getOverdefined(); + return ValueLatticeElement::getOverdefined(); } // Loop over all of our predecessors, merging what we know from them into @@ -772,23 +772,23 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( } // If guards are not used in the module, don't spend time looking for them - if (GuardDecl && !GuardDecl->use_empty() && - BBI->getIterator() != BB->begin()) { - for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()), - BB->rend())) { - Value *Cond = nullptr; - if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond)))) - BBLV = intersect(BBLV, getValueFromCondition(Val, Cond)); - } - } - - if (BBLV.isOverdefined()) { - // Check whether we're checking at the terminator, and the pointer has - // been dereferenced in this block. - PointerType *PTy = dyn_cast<PointerType>(Val->getType()); - if (PTy && BB->getTerminator() == BBI && - isNonNullAtEndOfBlock(Val, BB)) - BBLV = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); + if (GuardDecl && !GuardDecl->use_empty() && + BBI->getIterator() != BB->begin()) { + for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()), + BB->rend())) { + Value *Cond = nullptr; + if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond)))) + BBLV = intersect(BBLV, getValueFromCondition(Val, Cond)); + } + } + + if (BBLV.isOverdefined()) { + // Check whether we're checking at the terminator, and the pointer has + // been dereferenced in this block. + PointerType *PTy = dyn_cast<PointerType>(Val->getType()); + if (PTy && BB->getTerminator() == BBI && + isNonNullAtEndOfBlock(Val, BB)) + BBLV = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); } } @@ -922,19 +922,19 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect( return Result; } -Optional<ConstantRange> LazyValueInfoImpl::getRangeFor(Value *V, - Instruction *CxtI, - BasicBlock *BB) { - Optional<ValueLatticeElement> OptVal = getBlockValue(V, BB); +Optional<ConstantRange> LazyValueInfoImpl::getRangeFor(Value *V, + Instruction *CxtI, + BasicBlock *BB) { + Optional<ValueLatticeElement> OptVal = getBlockValue(V, BB); if (!OptVal) return None; ValueLatticeElement &Val = *OptVal; - intersectAssumeOrGuardBlockValueConstantRange(V, Val, CxtI); + intersectAssumeOrGuardBlockValueConstantRange(V, Val, CxtI); if (Val.isConstantRange()) return Val.getConstantRange(); - const unsigned OperandBitWidth = DL.getTypeSizeInBits(V->getType()); + const unsigned OperandBitWidth = DL.getTypeSizeInBits(V->getType()); return ConstantRange::getFull(OperandBitWidth); } @@ -964,7 +964,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueCast( // Figure out the range of the LHS. If that fails, we still apply the // transfer rule on the full set since we may be able to locally infer // interesting facts. - Optional<ConstantRange> LHSRes = getRangeFor(CI->getOperand(0), CI, BB); + Optional<ConstantRange> LHSRes = getRangeFor(CI->getOperand(0), CI, BB); if (!LHSRes.hasValue()) // More work to do before applying this transfer rule. return None; @@ -987,8 +987,8 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueBinaryOpImpl( // conservative range, but apply the transfer rule anyways. This // lets us pick up facts from expressions like "and i32 (call i32 // @foo()), 32" - Optional<ConstantRange> LHSRes = getRangeFor(I->getOperand(0), I, BB); - Optional<ConstantRange> RHSRes = getRangeFor(I->getOperand(1), I, BB); + Optional<ConstantRange> LHSRes = getRangeFor(I->getOperand(0), I, BB); + Optional<ConstantRange> RHSRes = getRangeFor(I->getOperand(1), I, BB); if (!LHSRes.hasValue() || !RHSRes.hasValue()) // More work to do before applying this transfer rule. return None; @@ -1040,22 +1040,22 @@ LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(WithOverflowInst *WO, Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueIntrinsic( IntrinsicInst *II, BasicBlock *BB) { - if (!ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { - LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined (unknown intrinsic).\n"); - return ValueLatticeElement::getOverdefined(); - } - - SmallVector<ConstantRange, 2> OpRanges; - for (Value *Op : II->args()) { - Optional<ConstantRange> Range = getRangeFor(Op, II, BB); - if (!Range) - return None; - OpRanges.push_back(*Range); - } - - return ValueLatticeElement::getRange( - ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges)); + if (!ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined (unknown intrinsic).\n"); + return ValueLatticeElement::getOverdefined(); + } + + SmallVector<ConstantRange, 2> OpRanges; + for (Value *Op : II->args()) { + Optional<ConstantRange> Range = getRangeFor(Op, II, BB); + if (!Range) + return None; + OpRanges.push_back(*Range); + } + + return ValueLatticeElement::getRange( + ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges)); } Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueExtractValue( @@ -1099,26 +1099,26 @@ static bool matchICmpOperand(const APInt *&Offset, Value *LHS, Value *Val, return false; } -/// Get value range for a "(Val + Offset) Pred RHS" condition. -static ValueLatticeElement getValueFromSimpleICmpCondition( - CmpInst::Predicate Pred, Value *RHS, const APInt *Offset) { - ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(), - /*isFullSet=*/true); - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) - RHSRange = ConstantRange(CI->getValue()); - else if (Instruction *I = dyn_cast<Instruction>(RHS)) - if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) - RHSRange = getConstantRangeFromMetadata(*Ranges); - - ConstantRange TrueValues = - ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); - - if (Offset) - TrueValues = TrueValues.subtract(*Offset); - - return ValueLatticeElement::getRange(std::move(TrueValues)); -} - +/// Get value range for a "(Val + Offset) Pred RHS" condition. +static ValueLatticeElement getValueFromSimpleICmpCondition( + CmpInst::Predicate Pred, Value *RHS, const APInt *Offset) { + ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(), + /*isFullSet=*/true); + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) + RHSRange = ConstantRange(CI->getValue()); + else if (Instruction *I = dyn_cast<Instruction>(RHS)) + if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) + RHSRange = getConstantRangeFromMetadata(*Ranges); + + ConstantRange TrueValues = + ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); + + if (Offset) + TrueValues = TrueValues.subtract(*Offset); + + return ValueLatticeElement::getRange(std::move(TrueValues)); +} + static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, bool isTrueDest) { Value *LHS = ICI->getOperand(0); @@ -1141,27 +1141,27 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, return ValueLatticeElement::getOverdefined(); const APInt *Offset = nullptr; - if (matchICmpOperand(Offset, LHS, Val, EdgePred)) - return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset); - - CmpInst::Predicate SwappedPred = CmpInst::getSwappedPredicate(EdgePred); - if (matchICmpOperand(Offset, RHS, Val, SwappedPred)) - return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset); - - // If (Val & Mask) == C then all the masked bits are known and we can compute - // a value range based on that. - const APInt *Mask, *C; - if (EdgePred == ICmpInst::ICMP_EQ && - match(LHS, m_And(m_Specific(Val), m_APInt(Mask))) && - match(RHS, m_APInt(C))) { - KnownBits Known; - Known.Zero = ~*C & *Mask; - Known.One = *C & *Mask; - return ValueLatticeElement::getRange( - ConstantRange::fromKnownBits(Known, /*IsSigned*/ false)); - } + if (matchICmpOperand(Offset, LHS, Val, EdgePred)) + return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset); - return ValueLatticeElement::getOverdefined(); + CmpInst::Predicate SwappedPred = CmpInst::getSwappedPredicate(EdgePred); + if (matchICmpOperand(Offset, RHS, Val, SwappedPred)) + return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset); + + // If (Val & Mask) == C then all the masked bits are known and we can compute + // a value range based on that. + const APInt *Mask, *C; + if (EdgePred == ICmpInst::ICMP_EQ && + match(LHS, m_And(m_Specific(Val), m_APInt(Mask))) && + match(RHS, m_APInt(C))) { + KnownBits Known; + Known.Zero = ~*C & *Mask; + Known.One = *C & *Mask; + return ValueLatticeElement::getRange( + ConstantRange::fromKnownBits(Known, /*IsSigned*/ false)); + } + + return ValueLatticeElement::getOverdefined(); } // Handle conditions of the form @@ -1201,36 +1201,36 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 1) return getValueFromOverflowCondition(Val, WO, isTrueDest); - Value *L, *R; - bool IsAnd; - if (match(Cond, m_LogicalAnd(m_Value(L), m_Value(R)))) - IsAnd = true; - else if (match(Cond, m_LogicalOr(m_Value(L), m_Value(R)))) - IsAnd = false; - else + Value *L, *R; + bool IsAnd; + if (match(Cond, m_LogicalAnd(m_Value(L), m_Value(R)))) + IsAnd = true; + else if (match(Cond, m_LogicalOr(m_Value(L), m_Value(R)))) + IsAnd = false; + else return ValueLatticeElement::getOverdefined(); // Prevent infinite recursion if Cond references itself as in this example: // Cond: "%tmp4 = and i1 %tmp4, undef" // BL: "%tmp4 = and i1 %tmp4, undef" // BR: "i1 undef" - if (L == Cond || R == Cond) + if (L == Cond || R == Cond) return ValueLatticeElement::getOverdefined(); - // if (L && R) -> intersect L and R - // if (!(L || R)) -> intersect L and R - // if (L || R) -> union L and R - // if (!(L && R)) -> union L and R - if (isTrueDest ^ IsAnd) { - ValueLatticeElement V = getValueFromCondition(Val, L, isTrueDest, Visited); - if (V.isOverdefined()) - return V; - V.mergeIn(getValueFromCondition(Val, R, isTrueDest, Visited)); - return V; - } - - return intersect(getValueFromCondition(Val, L, isTrueDest, Visited), - getValueFromCondition(Val, R, isTrueDest, Visited)); + // if (L && R) -> intersect L and R + // if (!(L || R)) -> intersect L and R + // if (L || R) -> union L and R + // if (!(L && R)) -> union L and R + if (isTrueDest ^ IsAnd) { + ValueLatticeElement V = getValueFromCondition(Val, L, isTrueDest, Visited); + if (V.isOverdefined()) + return V; + V.mergeIn(getValueFromCondition(Val, R, isTrueDest, Visited)); + return V; + } + + return intersect(getValueFromCondition(Val, L, isTrueDest, Visited), + getValueFromCondition(Val, R, isTrueDest, Visited)); } static ValueLatticeElement @@ -1254,15 +1254,15 @@ ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, // Return true if Usr has Op as an operand, otherwise false. static bool usesOperand(User *Usr, Value *Op) { - return is_contained(Usr->operands(), Op); + return is_contained(Usr->operands(), Op); } // Return true if the instruction type of Val is supported by -// constantFoldUser(). Currently CastInst, BinaryOperator and FreezeInst only. -// Call this before calling constantFoldUser() to find out if it's even worth -// attempting to call it. +// constantFoldUser(). Currently CastInst, BinaryOperator and FreezeInst only. +// Call this before calling constantFoldUser() to find out if it's even worth +// attempting to call it. static bool isOperationFoldable(User *Usr) { - return isa<CastInst>(Usr) || isa<BinaryOperator>(Usr) || isa<FreezeInst>(Usr); + return isa<CastInst>(Usr) || isa<BinaryOperator>(Usr) || isa<FreezeInst>(Usr); } // Check if Usr can be simplified to an integer constant when the value of one @@ -1293,9 +1293,9 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op, SimplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) { return ValueLatticeElement::getRange(ConstantRange(C->getValue())); } - } else if (isa<FreezeInst>(Usr)) { - assert(cast<FreezeInst>(Usr)->getOperand(0) == Op && "Operand 0 isn't Op"); - return ValueLatticeElement::getRange(ConstantRange(OpConstVal)); + } else if (isa<FreezeInst>(Usr)) { + assert(cast<FreezeInst>(Usr)->getOperand(0) == Op && "Operand 0 isn't Op"); + return ValueLatticeElement::getRange(ConstantRange(OpConstVal)); } return ValueLatticeElement::getOverdefined(); } @@ -1598,12 +1598,12 @@ static bool isKnownNonConstant(Value *V) { return false; } -Constant *LazyValueInfo::getConstant(Value *V, Instruction *CxtI) { +Constant *LazyValueInfo::getConstant(Value *V, Instruction *CxtI) { // Bail out early if V is known not to be a Constant. if (isKnownNonConstant(V)) return nullptr; - BasicBlock *BB = CxtI->getParent(); + BasicBlock *BB = CxtI->getParent(); ValueLatticeElement Result = getImpl(PImpl, AC, BB->getModule()).getValueInBlock(V, BB, CxtI); @@ -1617,11 +1617,11 @@ Constant *LazyValueInfo::getConstant(Value *V, Instruction *CxtI) { return nullptr; } -ConstantRange LazyValueInfo::getConstantRange(Value *V, Instruction *CxtI, +ConstantRange LazyValueInfo::getConstantRange(Value *V, Instruction *CxtI, bool UndefAllowed) { assert(V->getType()->isIntegerTy()); unsigned Width = V->getType()->getIntegerBitWidth(); - BasicBlock *BB = CxtI->getParent(); + BasicBlock *BB = CxtI->getParent(); ValueLatticeElement Result = getImpl(PImpl, AC, BB->getModule()).getValueInBlock(V, BB, CxtI); if (Result.isUnknown()) @@ -1754,7 +1754,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, - Instruction *CxtI, bool UseBlockValue) { + Instruction *CxtI, bool UseBlockValue) { // Is or is not NonNull are common predicates being queried. If // isKnownNonZero can tell us the result of the predicate, we can // return it quickly. But this is only a fastpath, and falling @@ -1768,10 +1768,10 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, else if (Pred == ICmpInst::ICMP_NE) return LazyValueInfo::True; } - - ValueLatticeElement Result = UseBlockValue - ? getImpl(PImpl, AC, M).getValueInBlock(V, CxtI->getParent(), CxtI) - : getImpl(PImpl, AC, M).getValueAt(V, CxtI); + + ValueLatticeElement Result = UseBlockValue + ? getImpl(PImpl, AC, M).getValueInBlock(V, CxtI->getParent(), CxtI) + : getImpl(PImpl, AC, M).getValueAt(V, CxtI); Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI); if (Ret != Unknown) return Ret; diff --git a/contrib/libs/llvm12/lib/Analysis/LegacyDivergenceAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/LegacyDivergenceAnalysis.cpp index 30eec5a611..76706913f9 100644 --- a/contrib/libs/llvm12/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -299,9 +299,9 @@ FunctionPass *llvm::createLegacyDivergenceAnalysisPass() { } void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequiredTransitive<PostDominatorTreeWrapperPass>(); - AU.addRequiredTransitive<LoopInfoWrapperPass>(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequiredTransitive<PostDominatorTreeWrapperPass>(); + AU.addRequiredTransitive<LoopInfoWrapperPass>(); AU.setPreservesAll(); } diff --git a/contrib/libs/llvm12/lib/Analysis/Lint.cpp b/contrib/libs/llvm12/lib/Analysis/Lint.cpp index e188c23cf3..82e95d2276 100644 --- a/contrib/libs/llvm12/lib/Analysis/Lint.cpp +++ b/contrib/libs/llvm12/lib/Analysis/Lint.cpp @@ -63,7 +63,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -81,102 +81,102 @@ using namespace llvm; namespace { -namespace MemRef { -static const unsigned Read = 1; -static const unsigned Write = 2; -static const unsigned Callee = 4; -static const unsigned Branchee = 8; -} // end namespace MemRef - -class Lint : public InstVisitor<Lint> { - friend class InstVisitor<Lint>; - - void visitFunction(Function &F); - - void visitCallBase(CallBase &CB); - void visitMemoryReference(Instruction &I, const MemoryLocation &Loc, - MaybeAlign Alignment, Type *Ty, unsigned Flags); - void visitEHBeginCatch(IntrinsicInst *II); - void visitEHEndCatch(IntrinsicInst *II); - - void visitReturnInst(ReturnInst &I); - void visitLoadInst(LoadInst &I); - void visitStoreInst(StoreInst &I); - void visitXor(BinaryOperator &I); - void visitSub(BinaryOperator &I); - void visitLShr(BinaryOperator &I); - void visitAShr(BinaryOperator &I); - void visitShl(BinaryOperator &I); - void visitSDiv(BinaryOperator &I); - void visitUDiv(BinaryOperator &I); - void visitSRem(BinaryOperator &I); - void visitURem(BinaryOperator &I); - void visitAllocaInst(AllocaInst &I); - void visitVAArgInst(VAArgInst &I); - void visitIndirectBrInst(IndirectBrInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitInsertElementInst(InsertElementInst &I); - void visitUnreachableInst(UnreachableInst &I); - - Value *findValue(Value *V, bool OffsetOk) const; - Value *findValueImpl(Value *V, bool OffsetOk, - SmallPtrSetImpl<Value *> &Visited) const; - -public: - Module *Mod; - const DataLayout *DL; - AliasAnalysis *AA; - AssumptionCache *AC; - DominatorTree *DT; - TargetLibraryInfo *TLI; - - std::string Messages; - raw_string_ostream MessagesStr; - - Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, - AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) - : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), - MessagesStr(Messages) {} - - void WriteValues(ArrayRef<const Value *> Vs) { - for (const Value *V : Vs) { - if (!V) - continue; - if (isa<Instruction>(V)) { - MessagesStr << *V << '\n'; - } else { - V->printAsOperand(MessagesStr, true, Mod); - MessagesStr << '\n'; +namespace MemRef { +static const unsigned Read = 1; +static const unsigned Write = 2; +static const unsigned Callee = 4; +static const unsigned Branchee = 8; +} // end namespace MemRef + +class Lint : public InstVisitor<Lint> { + friend class InstVisitor<Lint>; + + void visitFunction(Function &F); + + void visitCallBase(CallBase &CB); + void visitMemoryReference(Instruction &I, const MemoryLocation &Loc, + MaybeAlign Alignment, Type *Ty, unsigned Flags); + void visitEHBeginCatch(IntrinsicInst *II); + void visitEHEndCatch(IntrinsicInst *II); + + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSetImpl<Value *> &Visited) const; + +public: + Module *Mod; + const DataLayout *DL; + AliasAnalysis *AA; + AssumptionCache *AC; + DominatorTree *DT; + TargetLibraryInfo *TLI; + + std::string Messages; + raw_string_ostream MessagesStr; + + Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, + AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) + : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), + MessagesStr(Messages) {} + + void WriteValues(ArrayRef<const Value *> Vs) { + for (const Value *V : Vs) { + if (!V) + continue; + if (isa<Instruction>(V)) { + MessagesStr << *V << '\n'; + } else { + V->printAsOperand(MessagesStr, true, Mod); + MessagesStr << '\n'; } } - } - - /// A check failed, so printout out the condition and the message. - /// - /// This provides a nice place to put a breakpoint if you want to see why - /// something is not correct. - void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } - - /// A check failed (with values to print). - /// - /// This calls the Message-only version so that the above is easier to set - /// a breakpoint on. - template <typename T1, typename... Ts> - void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { - CheckFailed(Message); - WriteValues({V1, Vs...}); - } -}; + } + + /// A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } + + /// A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set + /// a breakpoint on. + template <typename T1, typename... Ts> + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { + CheckFailed(Message); + WriteValues({V1, Vs...}); + } +}; } // end anonymous namespace // Assert - We know that cond should be true, if not print an error message. -#define Assert(C, ...) \ - do { \ - if (!(C)) { \ - CheckFailed(__VA_ARGS__); \ - return; \ - } \ - } while (false) +#define Assert(C, ...) \ + do { \ + if (!(C)) { \ + CheckFailed(__VA_ARGS__); \ + return; \ + } \ + } while (false) void Lint::visitFunction(Function &F) { // This isn't undefined behavior, it's just a little unusual, and it's a @@ -190,7 +190,7 @@ void Lint::visitFunction(Function &F) { void Lint::visitCallBase(CallBase &I) { Value *Callee = I.getCalledOperand(); - visitMemoryReference(I, MemoryLocation::getAfter(Callee), None, nullptr, + visitMemoryReference(I, MemoryLocation::getAfter(Callee), None, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, @@ -250,10 +250,10 @@ void Lint::visitCallBase(CallBase &I) { // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { - Type *Ty = Formal->getParamStructRetType(); - MemoryLocation Loc( - Actual, LocationSize::precise(DL->getTypeStoreSize(Ty))); - visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty, + Type *Ty = Formal->getParamStructRetType(); + MemoryLocation Loc( + Actual, LocationSize::precise(DL->getTypeStoreSize(Ty))); + visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty, MemRef::Read | MemRef::Write); } } @@ -280,22 +280,22 @@ void Lint::visitCallBase(CallBase &I) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { - default: - break; + default: + break; - // TODO: Check more intrinsics + // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); - visitMemoryReference(I, MemoryLocation::getForDest(MCI), + visitMemoryReference(I, MemoryLocation::getForDest(MCI), MCI->getDestAlign(), nullptr, MemRef::Write); - visitMemoryReference(I, MemoryLocation::getForSource(MCI), + visitMemoryReference(I, MemoryLocation::getForSource(MCI), MCI->getSourceAlign(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. - auto Size = LocationSize::afterPointer(); + auto Size = LocationSize::afterPointer(); if (const ConstantInt *Len = dyn_cast<ConstantInt>(findValue(MCI->getLength(), /*OffsetOk=*/false))) @@ -309,10 +309,10 @@ void Lint::visitCallBase(CallBase &I) { case Intrinsic::memcpy_inline: { MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I); const uint64_t Size = MCII->getLength()->getValue().getLimitedValue(); - visitMemoryReference(I, MemoryLocation::getForDest(MCII), - MCII->getDestAlign(), nullptr, MemRef::Write); - visitMemoryReference(I, MemoryLocation::getForSource(MCII), - MCII->getSourceAlign(), nullptr, MemRef::Read); + visitMemoryReference(I, MemoryLocation::getForDest(MCII), + MCII->getDestAlign(), nullptr, MemRef::Write); + visitMemoryReference(I, MemoryLocation::getForSource(MCII), + MCII->getSourceAlign(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial @@ -324,15 +324,15 @@ void Lint::visitCallBase(CallBase &I) { } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); - visitMemoryReference(I, MemoryLocation::getForDest(MMI), + visitMemoryReference(I, MemoryLocation::getForDest(MMI), MMI->getDestAlign(), nullptr, MemRef::Write); - visitMemoryReference(I, MemoryLocation::getForSource(MMI), + visitMemoryReference(I, MemoryLocation::getForSource(MMI), MMI->getSourceAlign(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); - visitMemoryReference(I, MemoryLocation::getForDest(MSI), + visitMemoryReference(I, MemoryLocation::getForDest(MSI), MSI->getDestAlign(), nullptr, MemRef::Write); break; } @@ -342,32 +342,32 @@ void Lint::visitCallBase(CallBase &I) { "Undefined behavior: va_start called in a non-varargs function", &I); - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, - nullptr, MemRef::Read | MemRef::Write); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, + nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, - nullptr, MemRef::Write); - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), None, - nullptr, MemRef::Read); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, + nullptr, MemRef::Write); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), None, + nullptr, MemRef::Read); break; case Intrinsic::vaend: - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, - nullptr, MemRef::Read | MemRef::Write); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, + nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, - nullptr, MemRef::Read | MemRef::Write); - break; - case Intrinsic::get_active_lane_mask: - if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1))) - Assert(!TripCount->isZero(), "get_active_lane_mask: operand #2 " - "must be greater than 0", &I); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, + nullptr, MemRef::Read | MemRef::Write); break; + case Intrinsic::get_active_lane_mask: + if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1))) + Assert(!TripCount->isZero(), "get_active_lane_mask: operand #2 " + "must be greater than 0", &I); + break; } } @@ -384,14 +384,14 @@ void Lint::visitReturnInst(ReturnInst &I) { // TODO: Check that the reference is in bounds. // TODO: Check readnone/readonly function attributes. -void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, +void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, MaybeAlign Align, Type *Ty, unsigned Flags) { // If no memory is being referenced, it doesn't matter if the pointer // is valid. - if (Loc.Size.isZero()) + if (Loc.Size.isZero()) return; - Value *Ptr = const_cast<Value *>(Loc.Ptr); + Value *Ptr = const_cast<Value *>(Loc.Ptr); Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); Assert(!isa<ConstantPointerNull>(UnderlyingObject), "Undefined behavior: Null pointer dereference", &I); @@ -459,8 +459,8 @@ void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, // Accesses from before the start or after the end of the object are not // defined. - Assert(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize || - (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize), + Assert(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize || + (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize), "Undefined behavior: Buffer overflow", &I); // Accesses that say that the memory is more aligned than it is are not @@ -474,13 +474,13 @@ void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, } void Lint::visitLoadInst(LoadInst &I) { - visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(), - MemRef::Read); + visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(), + MemRef::Read); } void Lint::visitStoreInst(StoreInst &I) { - visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), - I.getOperand(0)->getType(), MemRef::Write); + visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), + I.getOperand(0)->getType(), MemRef::Write); } void Lint::visitXor(BinaryOperator &I) { @@ -522,8 +522,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast<VectorType>(V->getType()); if (!VecTy) { - KnownBits Known = - computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT); + KnownBits Known = + computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT); return Known.isZero(); } @@ -537,8 +537,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, // For a vector, KnownZero will only be true if all values are zero, so check // this per component - for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements(); - I != N; ++I) { + for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements(); + I != N; ++I) { Constant *Elem = C->getAggregateElement(I); if (isa<UndefValue>(Elem)) return true; @@ -581,12 +581,12 @@ void Lint::visitAllocaInst(AllocaInst &I) { } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, MemoryLocation::get(&I), None, nullptr, - MemRef::Read | MemRef::Write); + visitMemoryReference(I, MemoryLocation::get(&I), None, nullptr, + MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), None, + visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), None, nullptr, MemRef::Branchee); Assert(I.getNumDestinations() != 0, @@ -596,17 +596,17 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) { void Lint::visitExtractElementInst(ExtractElementInst &I) { if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), /*OffsetOk=*/false))) - Assert( - CI->getValue().ult( - cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()), - "Undefined result: extractelement index out of range", &I); + Assert( + CI->getValue().ult( + cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()), + "Undefined result: extractelement index out of range", &I); } void Lint::visitInsertElementInst(InsertElementInst &I) { if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), /*OffsetOk=*/false))) - Assert(CI->getValue().ult( - cast<FixedVectorType>(I.getType())->getNumElements()), + Assert(CI->getValue().ult( + cast<FixedVectorType>(I.getType())->getNumElements()), "Undefined result: insertelement index out of range", &I); } @@ -643,7 +643,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // TODO: Look through eliminable cast pairs. // TODO: Look through calls with unique return values. // TODO: Look through vector insert/extract/shuffle. - V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts(); + V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts(); if (LoadInst *L = dyn_cast<LoadInst>(V)) { BasicBlock::iterator BBI = L->getIterator(); BasicBlock *BB = L->getParent(); @@ -652,13 +652,13 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (!VisitedBlocks.insert(BB).second) break; if (Value *U = - FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) + FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) return findValueImpl(U, OffsetOk, Visited); - if (BBI != BB->begin()) - break; + if (BBI != BB->begin()) + break; BB = BB->getUniquePredecessor(); - if (!BB) - break; + if (!BB) + break; BBI = BB->end(); } } else if (PHINode *PN = dyn_cast<PHINode>(V)) { @@ -668,8 +668,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (CI->isNoopCast(*DL)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { - if (Value *W = - FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) + if (Value *W = + FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { @@ -700,75 +700,75 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, return V; } -PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { - auto *Mod = F.getParent(); - auto *DL = &F.getParent()->getDataLayout(); - auto *AA = &AM.getResult<AAManager>(F); - auto *AC = &AM.getResult<AssumptionAnalysis>(F); - auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); - Lint L(Mod, DL, AA, AC, DT, TLI); - L.visit(F); - dbgs() << L.MessagesStr.str(); - return PreservedAnalyses::all(); -} - -class LintLegacyPass : public FunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - LintLegacyPass() : FunctionPass(ID) { - initializeLintLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - } - void print(raw_ostream &O, const Module *M) const override {} -}; - -char LintLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", - false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", - false, true) - -bool LintLegacyPass::runOnFunction(Function &F) { - auto *Mod = F.getParent(); - auto *DL = &F.getParent()->getDataLayout(); - auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - Lint L(Mod, DL, AA, AC, DT, TLI); - L.visit(F); - dbgs() << L.MessagesStr.str(); - return false; -} - +PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *Mod = F.getParent(); + auto *DL = &F.getParent()->getDataLayout(); + auto *AA = &AM.getResult<AAManager>(F); + auto *AC = &AM.getResult<AssumptionAnalysis>(F); + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); + Lint L(Mod, DL, AA, AC, DT, TLI); + L.visit(F); + dbgs() << L.MessagesStr.str(); + return PreservedAnalyses::all(); +} + +class LintLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + LintLegacyPass() : FunctionPass(ID) { + initializeLintLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + } + void print(raw_ostream &O, const Module *M) const override {} +}; + +char LintLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", + false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", + false, true) + +bool LintLegacyPass::runOnFunction(Function &F) { + auto *Mod = F.getParent(); + auto *DL = &F.getParent()->getDataLayout(); + auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + Lint L(Mod, DL, AA, AC, DT, TLI); + L.visit(F); + dbgs() << L.MessagesStr.str(); + return false; +} + //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// -FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); } +FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); } /// lintFunction - Check a function for errors, printing messages on stderr. /// void llvm::lintFunction(const Function &f) { - Function &F = const_cast<Function &>(f); + Function &F = const_cast<Function &>(f); assert(!F.isDeclaration() && "Cannot lint external functions"); legacy::FunctionPassManager FPM(F.getParent()); - auto *V = new LintLegacyPass(); + auto *V = new LintLegacyPass(); FPM.add(V); FPM.run(F); } @@ -777,7 +777,7 @@ void llvm::lintFunction(const Function &f) { /// void llvm::lintModule(const Module &M) { legacy::PassManager PM; - auto *V = new LintLegacyPass(); + auto *V = new LintLegacyPass(); PM.add(V); - PM.run(const_cast<Module &>(M)); + PM.run(const_cast<Module &>(M)); } diff --git a/contrib/libs/llvm12/lib/Analysis/Loads.cpp b/contrib/libs/llvm12/lib/Analysis/Loads.cpp index 8f373f70f2..8f1417a552 100644 --- a/contrib/libs/llvm12/lib/Analysis/Loads.cpp +++ b/contrib/libs/llvm12/lib/Analysis/Loads.cpp @@ -12,9 +12,9 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -109,50 +109,50 @@ static bool isDereferenceableAndAlignedPointer( return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment, Size, DL, CtxI, DT, Visited, MaxDepth); - if (const auto *Call = dyn_cast<CallBase>(V)) { + if (const auto *Call = dyn_cast<CallBase>(V)) { if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI, DT, Visited, MaxDepth); - // If we have a call we can't recurse through, check to see if this is an - // allocation function for which we can establish an minimum object size. - // Such a minimum object size is analogous to a deref_or_null attribute in - // that we still need to prove the result non-null at point of use. - // NOTE: We can only use the object size as a base fact as we a) need to - // prove alignment too, and b) don't want the compile time impact of a - // separate recursive walk. - ObjectSizeOpts Opts; - // TODO: It may be okay to round to align, but that would imply that - // accessing slightly out of bounds was legal, and we're currently - // inconsistent about that. For the moment, be conservative. - Opts.RoundToAlign = false; - Opts.NullIsUnknownSize = true; - uint64_t ObjSize; - // TODO: Plumb through TLI so that malloc routines and such working. - if (getObjectSize(V, ObjSize, DL, nullptr, Opts)) { - APInt KnownDerefBytes(Size.getBitWidth(), ObjSize); - if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) && - isKnownNonZero(V, DL, 0, nullptr, CtxI, DT) && - // TODO: We're currently inconsistent about whether deref(N) is a - // global fact or a point in time fact. Once D61652 eventually - // lands, this check will be restricted to the point in time - // variant. For that variant, we need to prove that object hasn't - // been conditionally freed before ontext instruction - if it has, we - // might be hoisting over the inverse conditional and creating a - // dynamic use after free. - !PointerMayBeCapturedBefore(V, true, true, CtxI, DT, true)) { - // As we recursed through GEPs to get here, we've incrementally - // checked that each step advanced by a multiple of the alignment. If - // our base is properly aligned, then the original offset accessed - // must also be. - Type *Ty = V->getType(); - assert(Ty->isSized() && "must be sized"); - APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); - return isAligned(V, Offset, Alignment, DL); - } - } - } - + // If we have a call we can't recurse through, check to see if this is an + // allocation function for which we can establish an minimum object size. + // Such a minimum object size is analogous to a deref_or_null attribute in + // that we still need to prove the result non-null at point of use. + // NOTE: We can only use the object size as a base fact as we a) need to + // prove alignment too, and b) don't want the compile time impact of a + // separate recursive walk. + ObjectSizeOpts Opts; + // TODO: It may be okay to round to align, but that would imply that + // accessing slightly out of bounds was legal, and we're currently + // inconsistent about that. For the moment, be conservative. + Opts.RoundToAlign = false; + Opts.NullIsUnknownSize = true; + uint64_t ObjSize; + // TODO: Plumb through TLI so that malloc routines and such working. + if (getObjectSize(V, ObjSize, DL, nullptr, Opts)) { + APInt KnownDerefBytes(Size.getBitWidth(), ObjSize); + if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) && + isKnownNonZero(V, DL, 0, nullptr, CtxI, DT) && + // TODO: We're currently inconsistent about whether deref(N) is a + // global fact or a point in time fact. Once D61652 eventually + // lands, this check will be restricted to the point in time + // variant. For that variant, we need to prove that object hasn't + // been conditionally freed before ontext instruction - if it has, we + // might be hoisting over the inverse conditional and creating a + // dynamic use after free. + !PointerMayBeCapturedBefore(V, true, true, CtxI, DT, true)) { + // As we recursed through GEPs to get here, we've incrementally + // checked that each step advanced by a multiple of the alignment. If + // our base is properly aligned, then the original offset accessed + // must also be. + Type *Ty = V->getType(); + assert(Ty->isSized() && "must be sized"); + APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); + return isAligned(V, Offset, Alignment, DL); + } + } + } + // If we don't know, assume the worst. return false; } @@ -240,7 +240,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, Value *Ptr = LI->getPointerOperand(); APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), - DL.getTypeStoreSize(LI->getType()).getFixedSize()); + DL.getTypeStoreSize(LI->getType()).getFixedSize()); const Align Alignment = LI->getAlign(); Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); @@ -263,7 +263,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, if (Step->getAPInt() != EltSize) return false; - auto TC = SE.getSmallConstantMaxTripCount(L); + auto TC = SE.getSmallConstantMaxTripCount(L); if (!TC) return false; @@ -542,23 +542,23 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, // block. return nullptr; } - -bool llvm::canReplacePointersIfEqual(Value *A, Value *B, const DataLayout &DL, - Instruction *CtxI) { - Type *Ty = A->getType(); - assert(Ty == B->getType() && Ty->isPointerTy() && - "values must have matching pointer types"); - - // NOTE: The checks in the function are incomplete and currently miss illegal - // cases! The current implementation is a starting point and the - // implementation should be made stricter over time. - if (auto *C = dyn_cast<Constant>(B)) { - // Do not allow replacing a pointer with a constant pointer, unless it is - // either null or at least one byte is dereferenceable. - APInt OneByte(DL.getPointerTypeSizeInBits(Ty), 1); - return C->isNullValue() || - isDereferenceableAndAlignedPointer(B, Align(1), OneByte, DL, CtxI); - } - - return true; -} + +bool llvm::canReplacePointersIfEqual(Value *A, Value *B, const DataLayout &DL, + Instruction *CtxI) { + Type *Ty = A->getType(); + assert(Ty == B->getType() && Ty->isPointerTy() && + "values must have matching pointer types"); + + // NOTE: The checks in the function are incomplete and currently miss illegal + // cases! The current implementation is a starting point and the + // implementation should be made stricter over time. + if (auto *C = dyn_cast<Constant>(B)) { + // Do not allow replacing a pointer with a constant pointer, unless it is + // either null or at least one byte is dereferenceable. + APInt OneByte(DL.getPointerTypeSizeInBits(Ty), 1); + return C->isNullValue() || + isDereferenceableAndAlignedPointer(B, Align(1), OneByte, DL, CtxI); + } + + return true; +} diff --git a/contrib/libs/llvm12/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/LoopAccessAnalysis.cpp index e632fe25c2..6e59f1fa12 100644 --- a/contrib/libs/llvm12/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LoopAccessAnalysis.cpp @@ -149,23 +149,23 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, // symbolic stride replaced by one. ValueToValueMap::const_iterator SI = PtrToStride.find(OrigPtr ? OrigPtr : Ptr); - if (SI == PtrToStride.end()) - // For a non-symbolic stride, just return the original expression. - return OrigSCEV; + if (SI == PtrToStride.end()) + // For a non-symbolic stride, just return the original expression. + return OrigSCEV; - Value *StrideVal = stripIntegerCast(SI->second); + Value *StrideVal = stripIntegerCast(SI->second); - ScalarEvolution *SE = PSE.getSE(); - const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); - const auto *CT = - static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType())); + ScalarEvolution *SE = PSE.getSE(); + const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); + const auto *CT = + static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType())); - PSE.addPredicate(*SE->getEqualPredicate(U, CT)); - auto *Expr = PSE.getSCEV(Ptr); + PSE.addPredicate(*SE->getEqualPredicate(U, CT)); + auto *Expr = PSE.getSCEV(Ptr); - LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV - << " by: " << *Expr << "\n"); - return Expr; + LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV + << " by: " << *Expr << "\n"); + return Expr; } RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( @@ -223,10 +223,10 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); } // Add the size of the pointed element to ScEnd. - auto &DL = Lp->getHeader()->getModule()->getDataLayout(); - Type *IdxTy = DL.getIndexType(Ptr->getType()); - const SCEV *EltSizeSCEV = - SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType()); + auto &DL = Lp->getHeader()->getModule()->getDataLayout(); + Type *IdxTy = DL.getIndexType(Ptr->getType()); + const SCEV *EltSizeSCEV = + SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType()); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); } @@ -505,16 +505,16 @@ public: typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; - AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA, + AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, + MemoryDepChecker::DepCandidates &DA, PredicatedScalarEvolution &PSE) - : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), + : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), PSE(PSE) {} /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { Value *Ptr = const_cast<Value*>(Loc.Ptr); - AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); + AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); Accesses.insert(MemAccessInfo(Ptr, false)); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); @@ -523,7 +523,7 @@ public: /// Register a store. void addStore(MemoryLocation &Loc) { Value *Ptr = const_cast<Value*>(Loc.Ptr); - AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); + AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); Accesses.insert(MemAccessInfo(Ptr, true)); } @@ -727,7 +727,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // First, count how many write and read accesses are in the alias set. Also // collect MemAccessInfos for later. SmallVector<MemAccessInfo, 4> AccessInfos; - for (const auto &A : AS) { + for (const auto &A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); @@ -861,7 +861,7 @@ void AccessAnalysis::processMemAccesses() { // compatibility and potential for underlying-object overlap. As a result, we // only need to check for potential pointer dependencies within each alias // set. - for (const auto &AS : AST) { + for (const auto &AS : AST) { // Note that both the alias-set tracker and the alias sets themselves used // linked lists internally and so the iteration order here is deterministic // (matching the original instruction order within each set). @@ -881,12 +881,12 @@ void AccessAnalysis::processMemAccesses() { bool UseDeferred = SetIteration > 0; PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; - for (const auto &AV : AS) { + for (const auto &AV : AS) { Value *Ptr = AV.getValue(); // For a single memory access in AliasSetTracker, Accesses may contain // both read and write, and they both need to be handled for CheckDeps. - for (const auto &AC : S) { + for (const auto &AC : S) { if (AC.getPointer() != Ptr) continue; @@ -933,7 +933,7 @@ void AccessAnalysis::processMemAccesses() { typedef SmallVector<const Value *, 16> ValueVector; ValueVector TempObjects; - getUnderlyingObjects(Ptr, TempObjects, LI); + getUnderlyingObjects(Ptr, TempObjects, LI); LLVM_DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); for (const Value *UnderlyingObj : TempObjects) { @@ -987,7 +987,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, // Make sure there is only one non-const index and analyze that. Value *NonConstIndex = nullptr; - for (Value *Index : GEP->indices()) + for (Value *Index : GEP->indices()) if (!isa<ConstantInt>(Index)) { if (NonConstIndex) return false; @@ -1137,7 +1137,7 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, // first pointer in the array. Value *Ptr0 = VL[0]; const SCEV *Scev0 = SE.getSCEV(Ptr0); - Value *Obj0 = getUnderlyingObject(Ptr0); + Value *Obj0 = getUnderlyingObject(Ptr0); llvm::SmallSet<int64_t, 4> Offsets; for (auto *Ptr : VL) { @@ -1148,7 +1148,7 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, return false; // If a pointer refers to a different underlying object, bail - the // pointers are by definition incomparable. - Value *CurrObj = getUnderlyingObject(Ptr); + Value *CurrObj = getUnderlyingObject(Ptr); if (CurrObj != Obj0) return false; @@ -1338,7 +1338,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, // If the number of vector iteration between the store and the load are // small we could incur conflicts. if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { - MaxVFWithoutSLForwardIssues = (VF >> 1); + MaxVFWithoutSLForwardIssues = (VF >> 1); break; } } @@ -1654,7 +1654,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; - MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits); + MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits); return Dependence::BackwardVectorizable; } @@ -1766,7 +1766,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { << TheLoop->getHeader()->getName() << '\n'); // We can only analyze innermost loops. - if (!TheLoop->isInnermost()) { + if (!TheLoop->isInnermost()) { LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop"; return false; @@ -1783,7 +1783,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { // ScalarEvolution needs to be able to find the exit count. const SCEV *ExitCount = PSE->getBackedgeTakenCount(); - if (isa<SCEVCouldNotCompute>(ExitCount)) { + if (isa<SCEVCouldNotCompute>(ExitCount)) { recordAnalysis("CantComputeNumberOfIterations") << "could not determine number of loop iterations"; LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); @@ -1922,9 +1922,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } MemoryDepChecker::DepCandidates DependentAccesses; - AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); + AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); - // Holds the analyzed pointers. We don't want to call getUnderlyingObjects + // Holds the analyzed pointers. We don't want to call getUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once // for read and once for write, it will only appear once (on the write // list). This is okay, since we are going to check for conflicts between @@ -2126,8 +2126,8 @@ bool LoopAccessInfo::isUniform(Value *V) const { } void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { - Value *Ptr = getLoadStorePointerOperand(MemAccess); - if (!Ptr) + Value *Ptr = getLoadStorePointerOperand(MemAccess); + if (!Ptr) return; Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); diff --git a/contrib/libs/llvm12/lib/Analysis/LoopAnalysisManager.cpp b/contrib/libs/llvm12/lib/Analysis/LoopAnalysisManager.cpp index 4ad5641da1..69a3c8bba9 100644 --- a/contrib/libs/llvm12/lib/Analysis/LoopAnalysisManager.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LoopAnalysisManager.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/contrib/libs/llvm12/lib/Analysis/LoopCacheAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/LoopCacheAnalysis.cpp index cf68596bfb..a08eb3fbe9 100644 --- a/contrib/libs/llvm12/lib/Analysis/LoopCacheAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LoopCacheAnalysis.cpp @@ -29,11 +29,11 @@ #include "llvm/ADT/BreadthFirstIterator.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -149,7 +149,7 @@ IndexedReference::IndexedReference(Instruction &StoreOrLoadInst, Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other, unsigned CLS, - AAResults &AA) const { + AAResults &AA) const { assert(IsValid && "Expecting a valid reference"); if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { @@ -206,7 +206,7 @@ Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, - AAResults &AA) const { + AAResults &AA) const { assert(IsValid && "Expecting a valid reference"); if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { @@ -461,7 +461,7 @@ bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript, } bool IndexedReference::isAliased(const IndexedReference &Other, - AAResults &AA) const { + AAResults &AA) const { const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst); const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst); return AA.isMustAlias(Loc1, Loc2); @@ -480,7 +480,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, - AAResults &AA, DependenceInfo &DI, + AAResults &AA, DependenceInfo &DI, Optional<unsigned> TRT) : Loops(Loops), TripCounts(), LoopCosts(), TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT), @@ -499,13 +499,13 @@ CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, std::unique_ptr<CacheCost> CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional<unsigned> TRT) { - if (!Root.isOutermost()) { + if (!Root.isOutermost()) { LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n"); return nullptr; } LoopVectorTy Loops; - append_range(Loops, breadth_first(&Root)); + append_range(Loops, breadth_first(&Root)); if (!getInnerMostLoop(Loops)) { LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more " diff --git a/contrib/libs/llvm12/lib/Analysis/LoopInfo.cpp b/contrib/libs/llvm12/lib/Analysis/LoopInfo.cpp index a85869b163..c0b43316d4 100644 --- a/contrib/libs/llvm12/lib/Analysis/LoopInfo.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LoopInfo.cpp @@ -34,7 +34,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/PrintPasses.h" +#include "llvm/IR/PrintPasses.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -432,10 +432,10 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, for (const Use &U : I.uses()) { const Instruction *UI = cast<Instruction>(U.getUser()); const BasicBlock *UserBB = UI->getParent(); - - // For practical purposes, we consider that the use in a PHI - // occurs in the respective predecessor block. For more info, - // see the `phi` doc in LangRef and the LCSSA doc. + + // For practical purposes, we consider that the use in a PHI + // occurs in the respective predecessor block. For more info, + // see the `phi` doc in LangRef and the LCSSA doc. if (const PHINode *P = dyn_cast<PHINode>(UI)) UserBB = P->getIncomingBlock(U); @@ -540,22 +540,22 @@ void Loop::setLoopAlreadyUnrolled() { setLoopID(NewLoopID); } -void Loop::setLoopMustProgress() { - LLVMContext &Context = getHeader()->getContext(); - - MDNode *MustProgress = findOptionMDForLoop(this, "llvm.loop.mustprogress"); - - if (MustProgress) - return; - - MDNode *MustProgressMD = - MDNode::get(Context, MDString::get(Context, "llvm.loop.mustprogress")); - MDNode *LoopID = getLoopID(); - MDNode *NewLoopID = - makePostTransformationMetadata(Context, LoopID, {}, {MustProgressMD}); - setLoopID(NewLoopID); -} - +void Loop::setLoopMustProgress() { + LLVMContext &Context = getHeader()->getContext(); + + MDNode *MustProgress = findOptionMDForLoop(this, "llvm.loop.mustprogress"); + + if (MustProgress) + return; + + MDNode *MustProgressMD = + MDNode::get(Context, MDString::get(Context, "llvm.loop.mustprogress")); + MDNode *LoopID = getLoopID(); + MDNode *NewLoopID = + makePostTransformationMetadata(Context, LoopID, {}, {MustProgressMD}); + setLoopID(NewLoopID); +} + bool Loop::isAnnotatedParallel() const { MDNode *DesiredLoopIdMetadata = getLoopID(); @@ -567,7 +567,7 @@ bool Loop::isAnnotatedParallel() const { SmallPtrSet<MDNode *, 4> ParallelAccessGroups; // For scalable 'contains' check. if (ParallelAccesses) { - for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) { + for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) { MDNode *AccGroup = cast<MDNode>(MD.get()); assert(isValidAsAccessGroup(AccGroup) && "List item must be an access group"); @@ -785,7 +785,7 @@ void UnloopUpdater::removeBlocksFromAncestors() { /// Update the parent loop for all subloops directly nested within unloop. void UnloopUpdater::updateSubloopParents() { - while (!Unloop.isInnermost()) { + while (!Unloop.isInnermost()) { Loop *Subloop = *std::prev(Unloop.end()); Unloop.removeChildLoop(std::prev(Unloop.end())); @@ -883,7 +883,7 @@ void LoopInfo::erase(Loop *Unloop) { auto InvalidateOnExit = make_scope_exit([&]() { destroy(Unloop); }); // First handle the special case of no parent loop to simplify the algorithm. - if (Unloop->isOutermost()) { + if (Unloop->isOutermost()) { // Since BBLoop had no parent, Unloop blocks are no longer in a loop. for (Loop::block_iterator I = Unloop->block_begin(), E = Unloop->block_end(); @@ -908,7 +908,7 @@ void LoopInfo::erase(Loop *Unloop) { } // Move all of the subloops to the top-level. - while (!Unloop->isInnermost()) + while (!Unloop->isInnermost()) addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end()))); return; @@ -1038,7 +1038,7 @@ MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context, SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); + MDs.push_back(nullptr); // Remove metadata for the transformation that has been applied or that became // outdated. diff --git a/contrib/libs/llvm12/lib/Analysis/LoopNestAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/LoopNestAnalysis.cpp index 7133abcc35..9746a415bf 100644 --- a/contrib/libs/llvm12/lib/Analysis/LoopNestAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LoopNestAnalysis.cpp @@ -42,7 +42,7 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, LoopNest::LoopNest(Loop &Root, ScalarEvolution &SE) : MaxPerfectDepth(getMaxPerfectDepth(Root, SE)) { - append_range(Loops, breadth_first(&Root)); + append_range(Loops, breadth_first(&Root)); } std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root, @@ -52,8 +52,8 @@ std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root, bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) { - assert(!OuterLoop.isInnermost() && "Outer loop should have subloops"); - assert(!InnerLoop.isOutermost() && "Inner loop should have a parent"); + assert(!OuterLoop.isInnermost() && "Outer loop should have subloops"); + assert(!InnerLoop.isOutermost() && "Inner loop should have a parent"); LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName() << "' and '" << InnerLoop.getName() << "' are perfectly nested.\n"); @@ -205,31 +205,31 @@ unsigned LoopNest::getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE) { return CurrentDepth; } -const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From, - const BasicBlock *End) { - assert(From && "Expecting valid From"); - assert(End && "Expecting valid End"); - - if (From == End || !From->getSingleSuccessor()) - return *From; - - auto IsEmpty = [](const BasicBlock *BB) { - return (BB->getInstList().size() == 1); - }; - - // Visited is used to avoid running into an infinite loop. - SmallPtrSet<const BasicBlock *, 4> Visited; - const BasicBlock *BB = From->getSingleSuccessor(); - const BasicBlock *PredBB = BB; - while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB)) { - Visited.insert(BB); - PredBB = BB; - BB = BB->getSingleSuccessor(); - } - - return (BB == End) ? *End : *PredBB; -} - +const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From, + const BasicBlock *End) { + assert(From && "Expecting valid From"); + assert(End && "Expecting valid End"); + + if (From == End || !From->getSingleSuccessor()) + return *From; + + auto IsEmpty = [](const BasicBlock *BB) { + return (BB->getInstList().size() == 1); + }; + + // Visited is used to avoid running into an infinite loop. + SmallPtrSet<const BasicBlock *, 4> Visited; + const BasicBlock *BB = From->getSingleSuccessor(); + const BasicBlock *PredBB = BB; + while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB)) { + Visited.insert(BB); + PredBB = BB; + BB = BB->getSingleSuccessor(); + } + + return (BB == End) ? *End : *PredBB; +} + static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) { // The inner loop must be the only outer loop's child. @@ -252,92 +252,92 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, InnerLoop.getExitingBlock() != InnerLoopLatch || !InnerLoopExit) return false; - // Returns whether the block `ExitBlock` contains at least one LCSSA Phi node. - auto ContainsLCSSAPhi = [](const BasicBlock &ExitBlock) { - return any_of(ExitBlock.phis(), [](const PHINode &PN) { - return PN.getNumIncomingValues() == 1; - }); - }; - - // Returns whether the block `BB` qualifies for being an extra Phi block. The - // extra Phi block is the additional block inserted after the exit block of an - // "guarded" inner loop which contains "only" Phi nodes corresponding to the - // LCSSA Phi nodes in the exit block. - auto IsExtraPhiBlock = [&](const BasicBlock &BB) { - return BB.getFirstNonPHI() == BB.getTerminator() && - all_of(BB.phis(), [&](const PHINode &PN) { - return all_of(PN.blocks(), [&](const BasicBlock *IncomingBlock) { - return IncomingBlock == InnerLoopExit || - IncomingBlock == OuterLoopHeader; - }); - }); - }; - - const BasicBlock *ExtraPhiBlock = nullptr; + // Returns whether the block `ExitBlock` contains at least one LCSSA Phi node. + auto ContainsLCSSAPhi = [](const BasicBlock &ExitBlock) { + return any_of(ExitBlock.phis(), [](const PHINode &PN) { + return PN.getNumIncomingValues() == 1; + }); + }; + + // Returns whether the block `BB` qualifies for being an extra Phi block. The + // extra Phi block is the additional block inserted after the exit block of an + // "guarded" inner loop which contains "only" Phi nodes corresponding to the + // LCSSA Phi nodes in the exit block. + auto IsExtraPhiBlock = [&](const BasicBlock &BB) { + return BB.getFirstNonPHI() == BB.getTerminator() && + all_of(BB.phis(), [&](const PHINode &PN) { + return all_of(PN.blocks(), [&](const BasicBlock *IncomingBlock) { + return IncomingBlock == InnerLoopExit || + IncomingBlock == OuterLoopHeader; + }); + }); + }; + + const BasicBlock *ExtraPhiBlock = nullptr; // Ensure the only branch that may exist between the loops is the inner loop // guard. if (OuterLoopHeader != InnerLoopPreHeader) { - const BasicBlock &SingleSucc = - LoopNest::skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader); - - // no conditional branch present - if (&SingleSucc != InnerLoopPreHeader) { - const BranchInst *BI = dyn_cast<BranchInst>(SingleSucc.getTerminator()); - - if (!BI || BI != InnerLoop.getLoopGuardBranch()) - return false; - - bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); - - // The successors of the inner loop guard should be the inner loop - // preheader or the outer loop latch possibly through empty blocks. - for (const BasicBlock *Succ : BI->successors()) { - const BasicBlock *PotentialInnerPreHeader = Succ; - const BasicBlock *PotentialOuterLatch = Succ; - - // Ensure the inner loop guard successor is empty before skipping - // blocks. - if (Succ->getInstList().size() == 1) { - PotentialInnerPreHeader = - &LoopNest::skipEmptyBlockUntil(Succ, InnerLoopPreHeader); - PotentialOuterLatch = - &LoopNest::skipEmptyBlockUntil(Succ, OuterLoopLatch); - } - - if (PotentialInnerPreHeader == InnerLoopPreHeader) - continue; - if (PotentialOuterLatch == OuterLoopLatch) - continue; - - // If `InnerLoopExit` contains LCSSA Phi instructions, additional block - // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The - // loops are still considered perfectly nested if the extra block only - // contains Phi instructions from InnerLoopExit and OuterLoopHeader. - if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && - Succ->getSingleSuccessor() == OuterLoopLatch) { - // Points to the extra block so that we can reference it later in the - // final check. We can also conclude that the inner loop is - // guarded and there exists LCSSA Phi node in the exit block later if - // we see a non-null `ExtraPhiBlock`. - ExtraPhiBlock = Succ; - continue; - } - - DEBUG_WITH_TYPE(VerboseDebug, { - dbgs() << "Inner loop guard successor " << Succ->getName() - << " doesn't lead to inner loop preheader or " - "outer loop latch.\n"; - }); - return false; - } + const BasicBlock &SingleSucc = + LoopNest::skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader); + + // no conditional branch present + if (&SingleSucc != InnerLoopPreHeader) { + const BranchInst *BI = dyn_cast<BranchInst>(SingleSucc.getTerminator()); + + if (!BI || BI != InnerLoop.getLoopGuardBranch()) + return false; + + bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); + + // The successors of the inner loop guard should be the inner loop + // preheader or the outer loop latch possibly through empty blocks. + for (const BasicBlock *Succ : BI->successors()) { + const BasicBlock *PotentialInnerPreHeader = Succ; + const BasicBlock *PotentialOuterLatch = Succ; + + // Ensure the inner loop guard successor is empty before skipping + // blocks. + if (Succ->getInstList().size() == 1) { + PotentialInnerPreHeader = + &LoopNest::skipEmptyBlockUntil(Succ, InnerLoopPreHeader); + PotentialOuterLatch = + &LoopNest::skipEmptyBlockUntil(Succ, OuterLoopLatch); + } + + if (PotentialInnerPreHeader == InnerLoopPreHeader) + continue; + if (PotentialOuterLatch == OuterLoopLatch) + continue; + + // If `InnerLoopExit` contains LCSSA Phi instructions, additional block + // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The + // loops are still considered perfectly nested if the extra block only + // contains Phi instructions from InnerLoopExit and OuterLoopHeader. + if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && + Succ->getSingleSuccessor() == OuterLoopLatch) { + // Points to the extra block so that we can reference it later in the + // final check. We can also conclude that the inner loop is + // guarded and there exists LCSSA Phi node in the exit block later if + // we see a non-null `ExtraPhiBlock`. + ExtraPhiBlock = Succ; + continue; + } + + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Inner loop guard successor " << Succ->getName() + << " doesn't lead to inner loop preheader or " + "outer loop latch.\n"; + }); + return false; + } } } - // Ensure the inner loop exit block lead to the outer loop latch possibly - // through empty blocks. - const BasicBlock &SuccInner = - LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(), OuterLoopLatch); - if (&SuccInner != OuterLoopLatch && &SuccInner != ExtraPhiBlock) { + // Ensure the inner loop exit block lead to the outer loop latch possibly + // through empty blocks. + const BasicBlock &SuccInner = + LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(), OuterLoopLatch); + if (&SuccInner != OuterLoopLatch && &SuccInner != ExtraPhiBlock) { DEBUG_WITH_TYPE( VerboseDebug, dbgs() << "Inner loop exit block " << *InnerLoopExit @@ -348,8 +348,8 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, return true; } -AnalysisKey LoopNestAnalysis::Key; - +AnalysisKey LoopNestAnalysis::Key; + raw_ostream &llvm::operator<<(raw_ostream &OS, const LoopNest &LN) { OS << "IsPerfect="; if (LN.getMaxPerfectDepth() == LN.getNestDepth()) diff --git a/contrib/libs/llvm12/lib/Analysis/LoopPass.cpp b/contrib/libs/llvm12/lib/Analysis/LoopPass.cpp index 9e470e998e..e7e7a4dd52 100644 --- a/contrib/libs/llvm12/lib/Analysis/LoopPass.cpp +++ b/contrib/libs/llvm12/lib/Analysis/LoopPass.cpp @@ -19,8 +19,8 @@ #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PassTimingInfo.h" -#include "llvm/IR/PrintPasses.h" -#include "llvm/IR/StructuralHash.h" +#include "llvm/IR/PrintPasses.h" +#include "llvm/IR/StructuralHash.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TimeProfiler.h" @@ -77,7 +77,7 @@ LPPassManager::LPPassManager() // Insert loop into loop nest (LoopInfo) and loop queue (LQ). void LPPassManager::addLoop(Loop &L) { - if (L.isOutermost()) { + if (L.isOutermost()) { // This is the top level loop. LQ.push_front(&L); return; @@ -117,7 +117,7 @@ void LPPassManager::markLoopAsDeleted(Loop &L) { // there. However, we have to be careful to not remove the back of the queue // as that is assumed to match the current loop. assert(LQ.back() == CurrentLoop && "Loop queue back isn't the current loop!"); - llvm::erase_value(LQ, &L); + llvm::erase_value(LQ, &L); if (&L == CurrentLoop) { CurrentLoopDeleted = true; @@ -192,19 +192,19 @@ bool LPPassManager::runOnFunction(Function &F) { { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); TimeRegion PassTimer(getPassTimer(P)); -#ifdef EXPENSIVE_CHECKS - uint64_t RefHash = StructuralHash(F); -#endif +#ifdef EXPENSIVE_CHECKS + uint64_t RefHash = StructuralHash(F); +#endif LocalChanged = P->runOnLoop(CurrentLoop, *this); - -#ifdef EXPENSIVE_CHECKS - if (!LocalChanged && (RefHash != StructuralHash(F))) { - llvm::errs() << "Pass modifies its input and doesn't report it: " - << P->getPassName() << "\n"; - llvm_unreachable("Pass modifies its input and doesn't report it"); - } -#endif - + +#ifdef EXPENSIVE_CHECKS + if (!LocalChanged && (RefHash != StructuralHash(F))) { + llvm::errs() << "Pass modifies its input and doesn't report it: " + << P->getPassName() << "\n"; + llvm_unreachable("Pass modifies its input and doesn't report it"); + } +#endif + Changed |= LocalChanged; if (EmitICRemark) { unsigned NewSize = F.getInstructionCount(); @@ -254,8 +254,8 @@ bool LPPassManager::runOnFunction(Function &F) { F.getContext().yield(); } - if (LocalChanged) - removeNotPreservedAnalysis(P); + if (LocalChanged) + removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); removeDeadPasses(P, CurrentLoopDeleted ? "<deleted>" diff --git a/contrib/libs/llvm12/lib/Analysis/MLInlineAdvisor.cpp b/contrib/libs/llvm12/lib/Analysis/MLInlineAdvisor.cpp index 89f4ff427d..d152a42069 100644 --- a/contrib/libs/llvm12/lib/Analysis/MLInlineAdvisor.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MLInlineAdvisor.cpp @@ -11,16 +11,16 @@ // 'release' mode) or a runtime-loaded model (the 'development' case). // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) - +#include "llvm/Config/config.h" +#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) + #include <limits> #include <unordered_map> #include <unordered_set> #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/FunctionPropertiesAnalysis.h" +#include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/Analysis/MLModelRunner.h" @@ -66,8 +66,8 @@ CallBase *getInlinableCS(Instruction &I) { MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, std::unique_ptr<MLModelRunner> Runner) : InlineAdvisor( - M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), - ModelRunner(std::move(Runner)), CG(new CallGraph(M)), + M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), + ModelRunner(std::move(Runner)), CG(new CallGraph(M)), InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { assert(ModelRunner); @@ -118,8 +118,8 @@ void MLInlineAdvisor::onPassEntry() { } int64_t MLInlineAdvisor::getLocalCalls(Function &F) { - return FAM.getResult<FunctionPropertiesAnalysis>(F) - .DirectCallsToDefinedFunctions; + return FAM.getResult<FunctionPropertiesAnalysis>(F) + .DirectCallsToDefinedFunctions; } // Update the internal state of the advisor, and force invalidate feature @@ -134,7 +134,7 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, Function *Callee = Advice.getCallee(); // The caller features aren't valid anymore. - FAM.invalidate<FunctionPropertiesAnalysis>(*Caller); + FAM.invalidate<FunctionPropertiesAnalysis>(*Caller); int64_t IRSizeAfter = getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); @@ -147,15 +147,15 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, // For edges, we 'forget' the edges that the caller and callee used to have // before inlining, and add back what they currently have together. int64_t NewCallerAndCalleeEdges = - FAM.getResult<FunctionPropertiesAnalysis>(*Caller) + FAM.getResult<FunctionPropertiesAnalysis>(*Caller) .DirectCallsToDefinedFunctions; if (CalleeWasDeleted) --NodeCount; else - NewCallerAndCalleeEdges += - FAM.getResult<FunctionPropertiesAnalysis>(*Callee) - .DirectCallsToDefinedFunctions; + NewCallerAndCalleeEdges += + FAM.getResult<FunctionPropertiesAnalysis>(*Callee) + .DirectCallsToDefinedFunctions; EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); } @@ -168,7 +168,7 @@ int64_t MLInlineAdvisor::getModuleIRSize() const { return Ret; } -std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { +std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { auto &Caller = *CB.getCaller(); auto &Callee = *CB.getCalledFunction(); @@ -178,17 +178,17 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); - auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); + auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); // If this is a "never inline" case, there won't be any changes to internal // state we need to track, so we can just return the base InlineAdvice, which // will do nothing interesting. // Same thing if this is a recursive case. - if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || + if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || &Caller == &Callee) - return getMandatoryAdvice(CB, false); + return getMandatoryAdvice(CB, false); - bool Mandatory = - MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; + bool Mandatory = + MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; // If we need to stop, we won't want to track anymore any state changes, so // we just return the base InlineAdvice, which acts as a noop. @@ -214,15 +214,15 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { } if (Mandatory) - return getMandatoryAdvice(CB, true); + return getMandatoryAdvice(CB, true); auto NrCtantParams = 0; for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { NrCtantParams += (isa<Constant>(*I)); } - auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller); - auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee); + auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller); + auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee); ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount, CalleeBefore.BasicBlockCount); @@ -249,22 +249,22 @@ MLInlineAdvisor::getAdviceFromModel(CallBase &CB, return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run()); } -std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, - bool Advice) { - // Make sure we track inlinings in all cases - mandatory or not. - if (Advice && !ForceStop) - return getMandatoryAdviceImpl(CB); - - // If this is a "never inline" case, there won't be any changes to internal - // state we need to track, so we can just return the base InlineAdvice, which - // will do nothing interesting. - // Same if we are forced to stop - we don't track anymore. - return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); -} - +std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, + bool Advice) { + // Make sure we track inlinings in all cases - mandatory or not. + if (Advice && !ForceStop) + return getMandatoryAdviceImpl(CB); + + // If this is a "never inline" case, there won't be any changes to internal + // state we need to track, so we can just return the base InlineAdvice, which + // will do nothing interesting. + // Same if we are forced to stop - we don't track anymore. + return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); +} + std::unique_ptr<MLInlineAdvice> -MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { - return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); +MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { + return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); } void MLInlineAdvice::reportContextForRemark( @@ -310,5 +310,5 @@ void MLInlineAdvice::recordUnattemptedInliningImpl() { reportContextForRemark(R); return R; }); -} -#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) +} +#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) diff --git a/contrib/libs/llvm12/lib/Analysis/MemDepPrinter.cpp b/contrib/libs/llvm12/lib/Analysis/MemDepPrinter.cpp index 0064234710..cd0965305d 100644 --- a/contrib/libs/llvm12/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MemDepPrinter.cpp @@ -14,7 +14,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/libs/llvm12/lib/Analysis/MemDerefPrinter.cpp b/contrib/libs/llvm12/lib/Analysis/MemDerefPrinter.cpp index 0078ceacba..0a92e558e2 100644 --- a/contrib/libs/llvm12/lib/Analysis/MemDerefPrinter.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MemDerefPrinter.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/MemDerefPrinter.h" +#include "llvm/Analysis/MemDerefPrinter.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/DataLayout.h" @@ -18,7 +18,7 @@ #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" - + using namespace llvm; namespace { @@ -78,35 +78,35 @@ void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const { OS << "\n\n"; } } - -PreservedAnalyses MemDerefPrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - OS << "Memory Dereferencibility of pointers in function '" << F.getName() - << "'\n"; - - SmallVector<Value *, 4> Deref; - SmallPtrSet<Value *, 4> DerefAndAligned; - - const DataLayout &DL = F.getParent()->getDataLayout(); - for (auto &I : instructions(F)) { - if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { - Value *PO = LI->getPointerOperand(); - if (isDereferenceablePointer(PO, LI->getType(), DL)) - Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer( - PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL)) - DerefAndAligned.insert(PO); - } - } - - OS << "The following are dereferenceable:\n"; - for (Value *V : Deref) { - V->print(OS); - if (DerefAndAligned.count(V)) - OS << "\t(aligned)"; - else - OS << "\t(unaligned)"; - OS << "\n\n"; - } - return PreservedAnalyses::all(); -} + +PreservedAnalyses MemDerefPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + OS << "Memory Dereferencibility of pointers in function '" << F.getName() + << "'\n"; + + SmallVector<Value *, 4> Deref; + SmallPtrSet<Value *, 4> DerefAndAligned; + + const DataLayout &DL = F.getParent()->getDataLayout(); + for (auto &I : instructions(F)) { + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { + Value *PO = LI->getPointerOperand(); + if (isDereferenceablePointer(PO, LI->getType(), DL)) + Deref.push_back(PO); + if (isDereferenceableAndAlignedPointer( + PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL)) + DerefAndAligned.insert(PO); + } + } + + OS << "The following are dereferenceable:\n"; + for (Value *V : Deref) { + V->print(OS); + if (DerefAndAligned.count(V)) + OS << "\t(aligned)"; + else + OS << "\t(unaligned)"; + OS << "\n\n"; + } + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/MemoryBuiltins.cpp b/contrib/libs/llvm12/lib/Analysis/MemoryBuiltins.cpp index 5dda96a2ca..e07b3c41e6 100644 --- a/contrib/libs/llvm12/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MemoryBuiltins.cpp @@ -72,7 +72,7 @@ struct AllocFnsTy { // know which functions are nounwind, noalias, nocapture parameters, etc. static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = { {LibFunc_malloc, {MallocLike, 1, 0, -1}}, - {LibFunc_vec_malloc, {MallocLike, 1, 0, -1}}, + {LibFunc_vec_malloc, {MallocLike, 1, 0, -1}}, {LibFunc_valloc, {MallocLike, 1, 0, -1}}, {LibFunc_Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int) {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) @@ -104,9 +104,9 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = { {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow) {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1}}, {LibFunc_calloc, {CallocLike, 2, 0, 1}}, - {LibFunc_vec_calloc, {CallocLike, 2, 0, 1}}, + {LibFunc_vec_calloc, {CallocLike, 2, 0, 1}}, {LibFunc_realloc, {ReallocLike, 2, 1, -1}}, - {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1}}, + {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1}}, {LibFunc_reallocf, {ReallocLike, 2, 1, -1}}, {LibFunc_strdup, {StrDupLike, 1, -1, -1}}, {LibFunc_strndup, {StrDupLike, 2, 1, -1}} @@ -381,8 +381,8 @@ PointerType *llvm::getMallocType(const CallInst *CI, unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. - for (const User *U : CI->users()) - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + for (const User *U : CI->users()) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { MallocType = cast<PointerType>(BCI->getDestTy()); NumOfBitCastUses++; } @@ -568,16 +568,16 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, Value *UseZero = Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second); ResultSize = Builder.CreateZExtOrTrunc(ResultSize, ResultType); - Value *Ret = Builder.CreateSelect( - UseZero, ConstantInt::get(ResultType, 0), ResultSize); - - // The non-constant size expression cannot evaluate to -1. - if (!isa<Constant>(SizeOffsetPair.first) || - !isa<Constant>(SizeOffsetPair.second)) - Builder.CreateAssumption( - Builder.CreateICmpNE(Ret, ConstantInt::get(ResultType, -1))); - - return Ret; + Value *Ret = Builder.CreateSelect( + UseZero, ConstantInt::get(ResultType, 0), ResultSize); + + // The non-constant size expression cannot evaluate to -1. + if (!isa<Constant>(SizeOffsetPair.first) || + !isa<Constant>(SizeOffsetPair.second)) + Builder.CreateAssumption( + Builder.CreateICmpNE(Ret, ConstantInt::get(ResultType, -1))); + + return Ret; } } @@ -686,14 +686,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { } SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { - Type *MemoryTy = A.getPointeeInMemoryValueType(); + Type *MemoryTy = A.getPointeeInMemoryValueType(); // No interprocedural analysis is done at the moment. - if (!MemoryTy|| !MemoryTy->isSized()) { + if (!MemoryTy|| !MemoryTy->isSized()) { ++ObjectVisitorArgument; return unknown(); } - - APInt Size(IntTyBits, DL.getTypeAllocSize(MemoryTy)); + + APInt Size(IntTyBits, DL.getTypeAllocSize(MemoryTy)); return std::make_pair(align(Size, A.getParamAlignment()), Zero); } diff --git a/contrib/libs/llvm12/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/MemoryDependenceAnalysis.cpp index 886b5bf4ac..3c44207ba7 100644 --- a/contrib/libs/llvm12/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -148,7 +148,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, if (const CallInst *CI = isFreeCall(Inst, &TLI)) { // calls to free() deallocate the entire structure - Loc = MemoryLocation::getAfter(CI->getArgOperand(0)); + Loc = MemoryLocation::getAfter(CI->getArgOperand(0)); return ModRefInfo::Mod; } @@ -166,12 +166,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; - case Intrinsic::masked_load: - Loc = MemoryLocation::getForArgument(II, 0, TLI); - return ModRefInfo::Ref; - case Intrinsic::masked_store: - Loc = MemoryLocation::getForArgument(II, 1, TLI); - return ModRefInfo::Mod; + case Intrinsic::masked_load: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + return ModRefInfo::Ref; + case Intrinsic::masked_store: + Loc = MemoryLocation::getForArgument(II, 1, TLI); + return ModRefInfo::Mod; default: break; } @@ -344,9 +344,9 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // If we hit load/store with the same invariant.group metadata (and the // same pointer operand) we can assume that value pointed by pointer // operand didn't change. - if ((isa<LoadInst>(U) || - (isa<StoreInst>(U) && - cast<StoreInst>(U)->getPointerOperand() == Ptr)) && + if ((isa<LoadInst>(U) || + (isa<StoreInst>(U) && + cast<StoreInst>(U)->getPointerOperand() == Ptr)) && U->hasMetadata(LLVMContext::MD_invariant_group)) ClosestDependency = GetClosestDependency(ClosestDependency, U); } @@ -370,8 +370,8 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { - // We can batch AA queries, because IR does not change during a MemDep query. - BatchAAResults BatchAA(AA); + // We can batch AA queries, because IR does not change during a MemDep query. + BatchAAResults BatchAA(AA); bool isInvariantLoad = false; unsigned DefaultLimit = getDefaultBlockScanLimit(); @@ -450,32 +450,32 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - Intrinsic::ID ID = II->getIntrinsicID(); - switch (ID) { - case Intrinsic::lifetime_start: { + Intrinsic::ID ID = II->getIntrinsicID(); + switch (ID) { + case Intrinsic::lifetime_start: { // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point (the right approach is to use // GetPointerBaseWithConstantOffset). - MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1)); - if (BatchAA.isMustAlias(ArgLoc, MemLoc)) + MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1)); + if (BatchAA.isMustAlias(ArgLoc, MemLoc)) return MemDepResult::getDef(II); continue; } - case Intrinsic::masked_load: - case Intrinsic::masked_store: { - MemoryLocation Loc; - /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI); - AliasResult R = BatchAA.alias(Loc, MemLoc); - if (R == NoAlias) - continue; - if (R == MustAlias) - return MemDepResult::getDef(II); - if (ID == Intrinsic::masked_load) - continue; - return MemDepResult::getClobber(II); - } - } + case Intrinsic::masked_load: + case Intrinsic::masked_store: { + MemoryLocation Loc; + /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI); + AliasResult R = BatchAA.alias(Loc, MemLoc); + if (R == NoAlias) + continue; + if (R == MustAlias) + return MemDepResult::getDef(II); + if (ID == Intrinsic::masked_load) + continue; + return MemDepResult::getClobber(II); + } + } } // Values depend on loads if the pointers are must aliased. This means @@ -512,7 +512,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( MemoryLocation LoadLoc = MemoryLocation::get(LI); // If we found a pointer, check if it could be the same as our pointer. - AliasResult R = BatchAA.alias(LoadLoc, MemLoc); + AliasResult R = BatchAA.alias(LoadLoc, MemLoc); if (isLoad) { if (R == NoAlias) @@ -543,7 +543,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( continue; // Stores don't alias loads from read-only memory. - if (BatchAA.pointsToConstantMemory(LoadLoc)) + if (BatchAA.pointsToConstantMemory(LoadLoc)) continue; // Stores depend on may/must aliased loads. @@ -574,7 +574,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. - if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc))) + if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc))) continue; // Ok, this store might clobber the query pointer. Check to see if it is @@ -583,7 +583,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( MemoryLocation StoreLoc = MemoryLocation::get(SI); // If we found a pointer, check if it could be the same as our pointer. - AliasResult R = BatchAA.alias(StoreLoc, MemLoc); + AliasResult R = BatchAA.alias(StoreLoc, MemLoc); if (R == NoAlias) continue; @@ -601,8 +601,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // looking for a clobber in many cases; that's an alias property and is // handled by BasicAA. if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) { - const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr); - if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr)) + const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr); + if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); } @@ -619,10 +619,10 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( continue; // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc); + ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. if (isModAndRefSet(MR)) - // TODO: Support callCapturesBefore() on BatchAAResults. + // TODO: Support callCapturesBefore() on BatchAAResults. MR = AA.callCapturesBefore(Inst, MemLoc, &DT); switch (clearMust(MR)) { case ModRefInfo::NoModRef: @@ -754,7 +754,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) { } else { // Seed DirtyBlocks with each of the preds of QueryInst's block. BasicBlock *QueryBB = QueryCall->getParent(); - append_range(DirtyBlocks, PredCache.get(QueryBB)); + append_range(DirtyBlocks, PredCache.get(QueryBB)); ++NumUncacheNonLocal; } @@ -768,7 +768,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) { // Iterate while we still have blocks to update. while (!DirtyBlocks.empty()) { - BasicBlock *DirtyBB = DirtyBlocks.pop_back_val(); + BasicBlock *DirtyBB = DirtyBlocks.pop_back_val(); // Already processed this block? if (!Visited.insert(DirtyBB).second) @@ -838,7 +838,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) { // If the block *is* completely transparent to the load, we need to check // the predecessors of this block. Add them to our worklist. - append_range(DirtyBlocks, PredCache.get(DirtyBB)); + append_range(DirtyBlocks, PredCache.get(DirtyBB)); } } @@ -1015,7 +1015,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, NonLocalDepEntry Val = Cache.back(); Cache.pop_back(); MemoryDependenceResults::NonLocalDepInfo::iterator Entry = - llvm::upper_bound(Cache, Val); + llvm::upper_bound(Cache, Val); Cache.insert(Entry, Val); } break; diff --git a/contrib/libs/llvm12/lib/Analysis/MemoryLocation.cpp b/contrib/libs/llvm12/lib/Analysis/MemoryLocation.cpp index ef9cda37ce..bcd49d3f61 100644 --- a/contrib/libs/llvm12/lib/Analysis/MemoryLocation.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MemoryLocation.cpp @@ -20,10 +20,10 @@ using namespace llvm; void LocationSize::print(raw_ostream &OS) const { OS << "LocationSize::"; - if (*this == beforeOrAfterPointer()) - OS << "beforeOrAfterPointer"; - else if (*this == afterPointer()) - OS << "afterPointer"; + if (*this == beforeOrAfterPointer()) + OS << "beforeOrAfterPointer"; + else if (*this == afterPointer()) + OS << "afterPointer"; else if (*this == mapEmpty()) OS << "mapEmpty"; else if (*this == mapTombstone()) @@ -59,8 +59,8 @@ MemoryLocation MemoryLocation::get(const VAArgInst *VI) { AAMDNodes AATags; VI->getAAMetadata(AATags); - return MemoryLocation(VI->getPointerOperand(), - LocationSize::afterPointer(), AATags); + return MemoryLocation(VI->getPointerOperand(), + LocationSize::afterPointer(), AATags); } MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) { @@ -111,7 +111,7 @@ MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) { } MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) { - auto Size = LocationSize::afterPointer(); + auto Size = LocationSize::afterPointer(); if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) Size = LocationSize::precise(C->getValue().getZExtValue()); @@ -132,7 +132,7 @@ MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) { } MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) { - auto Size = LocationSize::afterPointer(); + auto Size = LocationSize::afterPointer(); if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength())) Size = LocationSize::precise(C->getValue().getZExtValue()); @@ -160,14 +160,14 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, break; case Intrinsic::memset: case Intrinsic::memcpy: - case Intrinsic::memcpy_inline: + case Intrinsic::memcpy_inline: case Intrinsic::memmove: assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for memory intrinsic"); if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()), AATags); - return MemoryLocation::getAfter(Arg, AATags); + return MemoryLocation::getAfter(Arg, AATags); case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: @@ -179,21 +179,21 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, cast<ConstantInt>(II->getArgOperand(0))->getZExtValue()), AATags); - case Intrinsic::masked_load: - assert(ArgIdx == 0 && "Invalid argument index"); - return MemoryLocation( - Arg, - LocationSize::upperBound(DL.getTypeStoreSize(II->getType())), - AATags); - - case Intrinsic::masked_store: - assert(ArgIdx == 1 && "Invalid argument index"); - return MemoryLocation( - Arg, - LocationSize::upperBound( - DL.getTypeStoreSize(II->getArgOperand(0)->getType())), - AATags); - + case Intrinsic::masked_load: + assert(ArgIdx == 0 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound(DL.getTypeStoreSize(II->getType())), + AATags); + + case Intrinsic::masked_store: + assert(ArgIdx == 1 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound( + DL.getTypeStoreSize(II->getArgOperand(0)->getType())), + AATags); + case Intrinsic::invariant_end: // The first argument to an invariant.end is a "descriptor" type (e.g. a // pointer to a empty struct) which is never actually dereferenced. @@ -228,48 +228,48 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 // whenever possible. LibFunc F; - if (TLI && TLI->getLibFunc(*Call, F) && TLI->has(F)) { - switch (F) { - case LibFunc_memset_pattern16: - assert((ArgIdx == 0 || ArgIdx == 1) && - "Invalid argument index for memset_pattern16"); - if (ArgIdx == 1) - return MemoryLocation(Arg, LocationSize::precise(16), AATags); - if (const ConstantInt *LenCI = - dyn_cast<ConstantInt>(Call->getArgOperand(2))) - return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()), - AATags); - return MemoryLocation::getAfter(Arg, AATags); - case LibFunc_bcmp: - case LibFunc_memcmp: - assert((ArgIdx == 0 || ArgIdx == 1) && - "Invalid argument index for memcmp/bcmp"); - if (const ConstantInt *LenCI = - dyn_cast<ConstantInt>(Call->getArgOperand(2))) - return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()), - AATags); - return MemoryLocation::getAfter(Arg, AATags); - case LibFunc_memchr: - assert((ArgIdx == 0) && "Invalid argument index for memchr"); - if (const ConstantInt *LenCI = - dyn_cast<ConstantInt>(Call->getArgOperand(2))) - return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()), - AATags); - return MemoryLocation::getAfter(Arg, AATags); - case LibFunc_memccpy: - assert((ArgIdx == 0 || ArgIdx == 1) && - "Invalid argument index for memccpy"); - // We only know an upper bound on the number of bytes read/written. - if (const ConstantInt *LenCI = - dyn_cast<ConstantInt>(Call->getArgOperand(3))) - return MemoryLocation( - Arg, LocationSize::upperBound(LenCI->getZExtValue()), AATags); - return MemoryLocation::getAfter(Arg, AATags); - default: - break; - }; + if (TLI && TLI->getLibFunc(*Call, F) && TLI->has(F)) { + switch (F) { + case LibFunc_memset_pattern16: + assert((ArgIdx == 0 || ArgIdx == 1) && + "Invalid argument index for memset_pattern16"); + if (ArgIdx == 1) + return MemoryLocation(Arg, LocationSize::precise(16), AATags); + if (const ConstantInt *LenCI = + dyn_cast<ConstantInt>(Call->getArgOperand(2))) + return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()), + AATags); + return MemoryLocation::getAfter(Arg, AATags); + case LibFunc_bcmp: + case LibFunc_memcmp: + assert((ArgIdx == 0 || ArgIdx == 1) && + "Invalid argument index for memcmp/bcmp"); + if (const ConstantInt *LenCI = + dyn_cast<ConstantInt>(Call->getArgOperand(2))) + return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()), + AATags); + return MemoryLocation::getAfter(Arg, AATags); + case LibFunc_memchr: + assert((ArgIdx == 0) && "Invalid argument index for memchr"); + if (const ConstantInt *LenCI = + dyn_cast<ConstantInt>(Call->getArgOperand(2))) + return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()), + AATags); + return MemoryLocation::getAfter(Arg, AATags); + case LibFunc_memccpy: + assert((ArgIdx == 0 || ArgIdx == 1) && + "Invalid argument index for memccpy"); + // We only know an upper bound on the number of bytes read/written. + if (const ConstantInt *LenCI = + dyn_cast<ConstantInt>(Call->getArgOperand(3))) + return MemoryLocation( + Arg, LocationSize::upperBound(LenCI->getZExtValue()), AATags); + return MemoryLocation::getAfter(Arg, AATags); + default: + break; + }; } // FIXME: Handle memset_pattern4 and memset_pattern8 also. - return MemoryLocation::getBeforeOrAfter(Call->getArgOperand(ArgIdx), AATags); + return MemoryLocation::getBeforeOrAfter(Call->getArgOperand(ArgIdx), AATags); } diff --git a/contrib/libs/llvm12/lib/Analysis/MemorySSA.cpp b/contrib/libs/llvm12/lib/Analysis/MemorySSA.cpp index 4722b68e20..a5ce422acf 100644 --- a/contrib/libs/llvm12/lib/Analysis/MemorySSA.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MemorySSA.cpp @@ -24,7 +24,7 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Analysis/CFGPrinter.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Config/llvm-config.h" @@ -60,11 +60,11 @@ using namespace llvm; #define DEBUG_TYPE "memoryssa" -static cl::opt<std::string> - DotCFGMSSA("dot-cfg-mssa", - cl::value_desc("file name for generated dot file"), - cl::desc("file name for generated dot file"), cl::init("")); - +static cl::opt<std::string> + DotCFGMSSA("dot-cfg-mssa", + cl::value_desc("file name for generated dot file"), + cl::desc("file name for generated dot file"), cl::init("")); + INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) @@ -284,7 +284,7 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: - case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::experimental_noalias_scope_decl: return {false, NoAlias}; case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: @@ -296,14 +296,14 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, } } - if (auto *CB = dyn_cast_or_null<CallBase>(UseInst)) { - ModRefInfo I = AA.getModRefInfo(DefInst, CB); + if (auto *CB = dyn_cast_or_null<CallBase>(UseInst)) { + ModRefInfo I = AA.getModRefInfo(DefInst, CB); AR = isMustSet(I) ? MustAlias : MayAlias; return {isModOrRefSet(I), AR}; } if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) - if (auto *UseLoad = dyn_cast_or_null<LoadInst>(UseInst)) + if (auto *UseLoad = dyn_cast_or_null<LoadInst>(UseInst)) return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias}; ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); @@ -362,10 +362,10 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, const Instruction *I) { // If the memory can't be changed, then loads of the memory can't be // clobbered. - if (auto *LI = dyn_cast<LoadInst>(I)) - return I->hasMetadata(LLVMContext::MD_invariant_load) || - AA.pointsToConstantMemory(MemoryLocation::get(LI)); - return false; + if (auto *LI = dyn_cast<LoadInst>(I)) + return I->hasMetadata(LLVMContext::MD_invariant_load) || + AA.pointsToConstantMemory(MemoryLocation::get(LI)); + return false; } /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing @@ -452,15 +452,15 @@ checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, } assert(isa<MemoryPhi>(MA)); - - // Add reachable phi predecessors - for (auto ItB = upward_defs_begin( - {const_cast<MemoryAccess *>(MA), MAP.second}, - MSSA.getDomTree()), - ItE = upward_defs_end(); - ItB != ItE; ++ItB) - if (MSSA.getDomTree().isReachableFromEntry(ItB.getPhiArgBlock())) - Worklist.emplace_back(*ItB); + + // Add reachable phi predecessors + for (auto ItB = upward_defs_begin( + {const_cast<MemoryAccess *>(MA), MAP.second}, + MSSA.getDomTree()), + ItE = upward_defs_end(); + ItB != ItE; ++ItB) + if (MSSA.getDomTree().isReachableFromEntry(ItB.getPhiArgBlock())) + Worklist.emplace_back(*ItB); } } @@ -511,16 +511,16 @@ template <class AliasAnalysisType> class ClobberWalker { UpwardsMemoryQuery *Query; unsigned *UpwardWalkLimit; - // Phi optimization bookkeeping: - // List of DefPath to process during the current phi optimization walk. + // Phi optimization bookkeeping: + // List of DefPath to process during the current phi optimization walk. SmallVector<DefPath, 32> Paths; - // List of visited <Access, Location> pairs; we can skip paths already - // visited with the same memory location. + // List of visited <Access, Location> pairs; we can skip paths already + // visited with the same memory location. DenseSet<ConstMemoryAccessPair> VisitedPhis; - // Record if phi translation has been performed during the current phi - // optimization walk, as merging alias results after phi translation can - // yield incorrect results. Context in PR46156. - bool PerformedPhiTranslation = false; + // Record if phi translation has been performed during the current phi + // optimization walk, as merging alias results after phi translation can + // yield incorrect results. Context in PR46156. + bool PerformedPhiTranslation = false; /// Find the nearest def or phi that `From` can legally be optimized to. const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { @@ -595,9 +595,9 @@ template <class AliasAnalysisType> class ClobberWalker { void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, ListIndex PriorNode) { - auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT, - &PerformedPhiTranslation); - auto UpwardDefs = make_range(UpwardDefsBegin, upward_defs_end()); + auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT, + &PerformedPhiTranslation); + auto UpwardDefs = make_range(UpwardDefsBegin, upward_defs_end()); for (const MemoryAccessPair &P : UpwardDefs) { PausedSearches.push_back(Paths.size()); Paths.emplace_back(P.second, P.first, PriorNode); @@ -651,16 +651,16 @@ template <class AliasAnalysisType> class ClobberWalker { // - We still cache things for A, so C only needs to walk up a bit. // If this behavior becomes problematic, we can fix without a ton of extra // work. - if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) { - if (PerformedPhiTranslation) { - // If visiting this path performed Phi translation, don't continue, - // since it may not be correct to merge results from two paths if one - // relies on the phi translation. - TerminatedPath Term{Node.Last, PathIndex}; - return Term; - } + if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) { + if (PerformedPhiTranslation) { + // If visiting this path performed Phi translation, don't continue, + // since it may not be correct to merge results from two paths if one + // relies on the phi translation. + TerminatedPath Term{Node.Last, PathIndex}; + return Term; + } continue; - } + } const MemoryAccess *SkipStopWhere = nullptr; if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) { @@ -773,7 +773,7 @@ template <class AliasAnalysisType> class ClobberWalker { /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, const MemoryLocation &Loc) { - assert(Paths.empty() && VisitedPhis.empty() && !PerformedPhiTranslation && + assert(Paths.empty() && VisitedPhis.empty() && !PerformedPhiTranslation && "Reset the optimization state."); Paths.emplace_back(Loc, Start, Phi, None); @@ -929,7 +929,7 @@ template <class AliasAnalysisType> class ClobberWalker { void resetPhiOptznState() { Paths.clear(); VisitedPhis.clear(); - PerformedPhiTranslation = false; + PerformedPhiTranslation = false; } public: @@ -1709,11 +1709,11 @@ MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, if (CreationMustSucceed) assert(NewAccess != nullptr && "Tried to create a memory access for a " "non-memory touching instruction"); - if (NewAccess) { - assert((!Definition || !isa<MemoryUse>(Definition)) && - "A use cannot be a defining access"); + if (NewAccess) { + assert((!Definition || !isa<MemoryUse>(Definition)) && + "A use cannot be a defining access"); NewAccess->setDefiningAccess(Definition); - } + } return NewAccess; } @@ -1742,15 +1742,15 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, // dependencies here. // FIXME: Replace this special casing with a more accurate modelling of // assume's control dependency. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - default: - break; - case Intrinsic::assume: - case Intrinsic::experimental_noalias_scope_decl: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::assume: + case Intrinsic::experimental_noalias_scope_decl: return nullptr; - } - } + } + } // Using a nonstandard AA pipelines might leave us with unexpected modref // results for I, so add a check to not model instructions that may not read @@ -1760,8 +1760,8 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, bool Def, Use; if (Template) { - Def = isa<MemoryDef>(Template); - Use = isa<MemoryUse>(Template); + Def = isa<MemoryDef>(Template); + Use = isa<MemoryUse>(Template); #if !defined(NDEBUG) ModRefInfo ModRef = AAP->getModRefInfo(I, None); bool DefCheck, UseCheck; @@ -1981,7 +1981,7 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const { "Incomplete MemoryPhi Node"); for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { verifyUseInDefs(Phi->getIncomingValue(I), Phi); - assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) && + assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) && "Incoming phi block not a block predecessor"); } #endif @@ -2228,98 +2228,98 @@ void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MemorySSAWrapperPass>(); } -class DOTFuncMSSAInfo { -private: - const Function &F; - MemorySSAAnnotatedWriter MSSAWriter; - -public: - DOTFuncMSSAInfo(const Function &F, MemorySSA &MSSA) - : F(F), MSSAWriter(&MSSA) {} - - const Function *getFunction() { return &F; } - MemorySSAAnnotatedWriter &getWriter() { return MSSAWriter; } -}; - -namespace llvm { - -template <> -struct GraphTraits<DOTFuncMSSAInfo *> : public GraphTraits<const BasicBlock *> { - static NodeRef getEntryNode(DOTFuncMSSAInfo *CFGInfo) { - return &(CFGInfo->getFunction()->getEntryBlock()); - } - - // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - using nodes_iterator = pointer_iterator<Function::const_iterator>; - - static nodes_iterator nodes_begin(DOTFuncMSSAInfo *CFGInfo) { - return nodes_iterator(CFGInfo->getFunction()->begin()); - } - - static nodes_iterator nodes_end(DOTFuncMSSAInfo *CFGInfo) { - return nodes_iterator(CFGInfo->getFunction()->end()); - } - - static size_t size(DOTFuncMSSAInfo *CFGInfo) { - return CFGInfo->getFunction()->size(); - } -}; - -template <> -struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits { - - DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} - - static std::string getGraphName(DOTFuncMSSAInfo *CFGInfo) { - return "MSSA CFG for '" + CFGInfo->getFunction()->getName().str() + - "' function"; - } - - std::string getNodeLabel(const BasicBlock *Node, DOTFuncMSSAInfo *CFGInfo) { - return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel( - Node, nullptr, - [CFGInfo](raw_string_ostream &OS, const BasicBlock &BB) -> void { - BB.print(OS, &CFGInfo->getWriter(), true, true); - }, - [](std::string &S, unsigned &I, unsigned Idx) -> void { - std::string Str = S.substr(I, Idx - I); - StringRef SR = Str; - if (SR.count(" = MemoryDef(") || SR.count(" = MemoryPhi(") || - SR.count("MemoryUse(")) - return; - DOTGraphTraits<DOTFuncInfo *>::eraseComment(S, I, Idx); - }); - } - - static std::string getEdgeSourceLabel(const BasicBlock *Node, - const_succ_iterator I) { - return DOTGraphTraits<DOTFuncInfo *>::getEdgeSourceLabel(Node, I); - } - - /// Display the raw branch weights from PGO. - std::string getEdgeAttributes(const BasicBlock *Node, const_succ_iterator I, - DOTFuncMSSAInfo *CFGInfo) { - return ""; - } - - std::string getNodeAttributes(const BasicBlock *Node, - DOTFuncMSSAInfo *CFGInfo) { - return getNodeLabel(Node, CFGInfo).find(';') != std::string::npos - ? "style=filled, fillcolor=lightpink" - : ""; - } -}; - -} // namespace llvm - +class DOTFuncMSSAInfo { +private: + const Function &F; + MemorySSAAnnotatedWriter MSSAWriter; + +public: + DOTFuncMSSAInfo(const Function &F, MemorySSA &MSSA) + : F(F), MSSAWriter(&MSSA) {} + + const Function *getFunction() { return &F; } + MemorySSAAnnotatedWriter &getWriter() { return MSSAWriter; } +}; + +namespace llvm { + +template <> +struct GraphTraits<DOTFuncMSSAInfo *> : public GraphTraits<const BasicBlock *> { + static NodeRef getEntryNode(DOTFuncMSSAInfo *CFGInfo) { + return &(CFGInfo->getFunction()->getEntryBlock()); + } + + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + using nodes_iterator = pointer_iterator<Function::const_iterator>; + + static nodes_iterator nodes_begin(DOTFuncMSSAInfo *CFGInfo) { + return nodes_iterator(CFGInfo->getFunction()->begin()); + } + + static nodes_iterator nodes_end(DOTFuncMSSAInfo *CFGInfo) { + return nodes_iterator(CFGInfo->getFunction()->end()); + } + + static size_t size(DOTFuncMSSAInfo *CFGInfo) { + return CFGInfo->getFunction()->size(); + } +}; + +template <> +struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits { + + DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} + + static std::string getGraphName(DOTFuncMSSAInfo *CFGInfo) { + return "MSSA CFG for '" + CFGInfo->getFunction()->getName().str() + + "' function"; + } + + std::string getNodeLabel(const BasicBlock *Node, DOTFuncMSSAInfo *CFGInfo) { + return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel( + Node, nullptr, + [CFGInfo](raw_string_ostream &OS, const BasicBlock &BB) -> void { + BB.print(OS, &CFGInfo->getWriter(), true, true); + }, + [](std::string &S, unsigned &I, unsigned Idx) -> void { + std::string Str = S.substr(I, Idx - I); + StringRef SR = Str; + if (SR.count(" = MemoryDef(") || SR.count(" = MemoryPhi(") || + SR.count("MemoryUse(")) + return; + DOTGraphTraits<DOTFuncInfo *>::eraseComment(S, I, Idx); + }); + } + + static std::string getEdgeSourceLabel(const BasicBlock *Node, + const_succ_iterator I) { + return DOTGraphTraits<DOTFuncInfo *>::getEdgeSourceLabel(Node, I); + } + + /// Display the raw branch weights from PGO. + std::string getEdgeAttributes(const BasicBlock *Node, const_succ_iterator I, + DOTFuncMSSAInfo *CFGInfo) { + return ""; + } + + std::string getNodeAttributes(const BasicBlock *Node, + DOTFuncMSSAInfo *CFGInfo) { + return getNodeLabel(Node, CFGInfo).find(';') != std::string::npos + ? "style=filled, fillcolor=lightpink" + : ""; + } +}; + +} // namespace llvm + bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); - if (DotCFGMSSA != "") { - DOTFuncMSSAInfo CFGInfo(F, MSSA); - WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); - } else - MSSA.print(dbgs()); - + if (DotCFGMSSA != "") { + DOTFuncMSSAInfo CFGInfo(F, MSSA); + WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); + } else + MSSA.print(dbgs()); + if (VerifyMemorySSA) MSSA.verifyMemorySSA(); return false; @@ -2345,14 +2345,14 @@ bool MemorySSAAnalysis::Result::invalidate( PreservedAnalyses MemorySSAPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); - if (DotCFGMSSA != "") { - DOTFuncMSSAInfo CFGInfo(F, MSSA); - WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); - } else { - OS << "MemorySSA for function: " << F.getName() << "\n"; - MSSA.print(OS); - } + auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); + if (DotCFGMSSA != "") { + DOTFuncMSSAInfo CFGInfo(F, MSSA); + WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); + } else { + OS << "MemorySSA for function: " << F.getName() << "\n"; + MSSA.print(OS); + } return PreservedAnalyses::all(); } @@ -2422,7 +2422,7 @@ MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase( UpwardsMemoryQuery Q; Q.OriginalAccess = StartingUseOrDef; Q.StartingLoc = Loc; - Q.Inst = nullptr; + Q.Inst = nullptr; Q.IsCall = false; // Unlike the other function, do not walk to the def of a def, because we are @@ -2544,19 +2544,19 @@ void MemoryDef::deleteMe(DerivedUser *Self) { void MemoryUse::deleteMe(DerivedUser *Self) { delete static_cast<MemoryUse *>(Self); } - -bool upward_defs_iterator::IsGuaranteedLoopInvariant(Value *Ptr) const { - auto IsGuaranteedLoopInvariantBase = [](Value *Ptr) { - Ptr = Ptr->stripPointerCasts(); - if (!isa<Instruction>(Ptr)) - return true; - return isa<AllocaInst>(Ptr); - }; - - Ptr = Ptr->stripPointerCasts(); - if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { - return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && - GEP->hasAllConstantIndices(); - } - return IsGuaranteedLoopInvariantBase(Ptr); -} + +bool upward_defs_iterator::IsGuaranteedLoopInvariant(Value *Ptr) const { + auto IsGuaranteedLoopInvariantBase = [](Value *Ptr) { + Ptr = Ptr->stripPointerCasts(); + if (!isa<Instruction>(Ptr)) + return true; + return isa<AllocaInst>(Ptr); + }; + + Ptr = Ptr->stripPointerCasts(); + if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { + return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && + GEP->hasAllConstantIndices(); + } + return IsGuaranteedLoopInvariantBase(Ptr); +} diff --git a/contrib/libs/llvm12/lib/Analysis/MemorySSAUpdater.cpp b/contrib/libs/llvm12/lib/Analysis/MemorySSAUpdater.cpp index 99fa58b887..dfde63f94c 100644 --- a/contrib/libs/llvm12/lib/Analysis/MemorySSAUpdater.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MemorySSAUpdater.cpp @@ -319,7 +319,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { bool DefBeforeSameBlock = false; if (DefBefore->getBlock() == MD->getBlock() && !(isa<MemoryPhi>(DefBefore) && - llvm::is_contained(InsertedPHIs, DefBefore))) + llvm::is_contained(InsertedPHIs, DefBefore))) DefBeforeSameBlock = true; // There is a def before us, which means we can replace any store/phi uses @@ -342,8 +342,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); - SmallSet<WeakVH, 8> ExistingPhis; - + SmallSet<WeakVH, 8> ExistingPhis; + // Remember the index where we may insert new phis. unsigned NewPhiIndex = InsertedPHIs.size(); if (!DefBeforeSameBlock) { @@ -384,8 +384,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { if (!MPhi) { MPhi = MSSA->createMemoryPhi(BBIDF); NewInsertedPHIs.push_back(MPhi); - } else { - ExistingPhis.insert(MPhi); + } else { + ExistingPhis.insert(MPhi); } // Add the phis created into the IDF blocks to NonOptPhis, so they are not // optimized out as trivial by the call to getPreviousDefFromEnd below. @@ -431,10 +431,10 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { if (NewPhiSize) tryRemoveTrivialPhis(ArrayRef<WeakVH>(&InsertedPHIs[NewPhiIndex], NewPhiSize)); - // Now that all fixups are done, rename all uses if we are asked. Skip - // renaming for defs in unreachable blocks. - BasicBlock *StartBlock = MD->getBlock(); - if (RenameUses && MSSA->getDomTree().getNode(StartBlock)) { + // Now that all fixups are done, rename all uses if we are asked. Skip + // renaming for defs in unreachable blocks. + BasicBlock *StartBlock = MD->getBlock(); + if (RenameUses && MSSA->getDomTree().getNode(StartBlock)) { SmallPtrSet<BasicBlock *, 16> Visited; // We are guaranteed there is a def in the block, because we just got it // handed to us in this function. @@ -452,13 +452,13 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { if (Phi) MSSA->renamePass(Phi->getBlock(), nullptr, Visited); } - // Existing Phi blocks may need renaming too, if an access was previously - // optimized and the inserted Defs "covers" the Optimized value. - for (auto &MP : ExistingPhis) { - MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP); - if (Phi) - MSSA->renamePass(Phi->getBlock(), nullptr, Visited); - } + // Existing Phi blocks may need renaming too, if an access was previously + // optimized and the inserted Defs "covers" the Optimized value. + for (auto &MP : ExistingPhis) { + MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP); + if (Phi) + MSSA->renamePass(Phi->getBlock(), nullptr, Visited); + } } } @@ -555,20 +555,20 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From, } } -/// If all arguments of a MemoryPHI are defined by the same incoming -/// argument, return that argument. -static MemoryAccess *onlySingleValue(MemoryPhi *MP) { - MemoryAccess *MA = nullptr; - - for (auto &Arg : MP->operands()) { - if (!MA) - MA = cast<MemoryAccess>(Arg); - else if (MA != Arg) - return nullptr; - } - return MA; -} - +/// If all arguments of a MemoryPHI are defined by the same incoming +/// argument, return that argument. +static MemoryAccess *onlySingleValue(MemoryPhi *MP) { + MemoryAccess *MA = nullptr; + + for (auto &Arg : MP->operands()) { + if (!MA) + MA = cast<MemoryAccess>(Arg); + else if (MA != Arg) + return nullptr; + } + return MA; +} + static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap, @@ -725,10 +725,10 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, NewPhi->addIncoming(IncPhi, IncBB); } } - if (auto *SingleAccess = onlySingleValue(NewPhi)) { - MPhiMap[Phi] = SingleAccess; - removeMemoryAccess(NewPhi); - } + if (auto *SingleAccess = onlySingleValue(NewPhi)) { + MPhiMap[Phi] = SingleAccess; + removeMemoryAccess(NewPhi); + } }; auto ProcessBlock = [&](BasicBlock *BB) { @@ -811,42 +811,42 @@ void MemorySSAUpdater::updateExitBlocksForClonedLoop( } void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates, - DominatorTree &DT, bool UpdateDT) { + DominatorTree &DT, bool UpdateDT) { SmallVector<CFGUpdate, 4> DeleteUpdates; - SmallVector<CFGUpdate, 4> RevDeleteUpdates; + SmallVector<CFGUpdate, 4> RevDeleteUpdates; SmallVector<CFGUpdate, 4> InsertUpdates; for (auto &Update : Updates) { if (Update.getKind() == DT.Insert) InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); - else { + else { DeleteUpdates.push_back({DT.Delete, Update.getFrom(), Update.getTo()}); - RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); - } + RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); + } } if (!DeleteUpdates.empty()) { - if (!UpdateDT) { - SmallVector<CFGUpdate, 0> Empty; - // Deletes are reversed applied, because this CFGView is pretending the - // deletes did not happen yet, hence the edges still exist. - DT.applyUpdates(Empty, RevDeleteUpdates); - } else { - // Apply all updates, with the RevDeleteUpdates as PostCFGView. - DT.applyUpdates(Updates, RevDeleteUpdates); - } - - // Note: the MSSA update below doesn't distinguish between a GD with - // (RevDelete,false) and (Delete, true), but this matters for the DT - // updates above; for "children" purposes they are equivalent; but the - // updates themselves convey the desired update, used inside DT only. - GraphDiff<BasicBlock *> GD(RevDeleteUpdates); - applyInsertUpdates(InsertUpdates, DT, &GD); - // Update DT to redelete edges; this matches the real CFG so we can perform - // the standard update without a postview of the CFG. - DT.applyUpdates(DeleteUpdates); + if (!UpdateDT) { + SmallVector<CFGUpdate, 0> Empty; + // Deletes are reversed applied, because this CFGView is pretending the + // deletes did not happen yet, hence the edges still exist. + DT.applyUpdates(Empty, RevDeleteUpdates); + } else { + // Apply all updates, with the RevDeleteUpdates as PostCFGView. + DT.applyUpdates(Updates, RevDeleteUpdates); + } + + // Note: the MSSA update below doesn't distinguish between a GD with + // (RevDelete,false) and (Delete, true), but this matters for the DT + // updates above; for "children" purposes they are equivalent; but the + // updates themselves convey the desired update, used inside DT only. + GraphDiff<BasicBlock *> GD(RevDeleteUpdates); + applyInsertUpdates(InsertUpdates, DT, &GD); + // Update DT to redelete edges; this matches the real CFG so we can perform + // the standard update without a postview of the CFG. + DT.applyUpdates(DeleteUpdates); } else { - if (UpdateDT) - DT.applyUpdates(Updates); + if (UpdateDT) + DT.applyUpdates(Updates); GraphDiff<BasicBlock *> GD; applyInsertUpdates(InsertUpdates, DT, &GD); } @@ -876,8 +876,8 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, // Check number of predecessors, we only care if there's more than one. unsigned Count = 0; BasicBlock *Pred = nullptr; - for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BB)) { - Pred = Pi; + for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BB)) { + Pred = Pi; Count++; if (Count == 2) break; @@ -970,7 +970,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, auto *BB = BBPredPair.first; const auto &AddedBlockSet = BBPredPair.second.Added; auto &PrevBlockSet = BBPredPair.second.Prev; - for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BB)) { + for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BB)) { if (!AddedBlockSet.count(Pi)) PrevBlockSet.insert(Pi); EdgeCountMap[{Pi, BB}]++; @@ -1121,7 +1121,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I) IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I))); } else { - for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BBIDF)) + for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BBIDF)) IDFPhi->addIncoming(GetLastDef(Pi), Pi); } } @@ -1341,7 +1341,7 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) { // Note: We assume MemorySSA is not used in metadata since it's not really // part of the IR. - assert(NewDefTarget != MA && "Going into an infinite loop"); + assert(NewDefTarget != MA && "Going into an infinite loop"); while (!MA->use_empty()) { Use &U = *MA->use_begin(); if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser())) diff --git a/contrib/libs/llvm12/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/libs/llvm12/lib/Analysis/ModuleDebugInfoPrinter.cpp index 64fd5eb1ac..6cbe391b32 100644 --- a/contrib/libs/llvm12/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -14,11 +14,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ModuleDebugInfoPrinter.h" +#include "llvm/Analysis/ModuleDebugInfoPrinter.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" @@ -26,34 +26,34 @@ using namespace llvm; namespace { -class ModuleDebugInfoLegacyPrinter : public ModulePass { - DebugInfoFinder Finder; - -public: - static char ID; // Pass identification, replacement for typeid - ModuleDebugInfoLegacyPrinter() : ModulePass(ID) { - initializeModuleDebugInfoLegacyPrinterPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - void print(raw_ostream &O, const Module *M) const override; -}; +class ModuleDebugInfoLegacyPrinter : public ModulePass { + DebugInfoFinder Finder; + +public: + static char ID; // Pass identification, replacement for typeid + ModuleDebugInfoLegacyPrinter() : ModulePass(ID) { + initializeModuleDebugInfoLegacyPrinterPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + void print(raw_ostream &O, const Module *M) const override; +}; } -char ModuleDebugInfoLegacyPrinter::ID = 0; -INITIALIZE_PASS(ModuleDebugInfoLegacyPrinter, "module-debuginfo", +char ModuleDebugInfoLegacyPrinter::ID = 0; +INITIALIZE_PASS(ModuleDebugInfoLegacyPrinter, "module-debuginfo", "Decodes module-level debug info", false, true) ModulePass *llvm::createModuleDebugInfoPrinterPass() { - return new ModuleDebugInfoLegacyPrinter(); + return new ModuleDebugInfoLegacyPrinter(); } -bool ModuleDebugInfoLegacyPrinter::runOnModule(Module &M) { +bool ModuleDebugInfoLegacyPrinter::runOnModule(Module &M) { Finder.processModule(M); return false; } @@ -71,8 +71,8 @@ static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory, O << ":" << Line; } -static void printModuleDebugInfo(raw_ostream &O, const Module *M, - const DebugInfoFinder &Finder) { +static void printModuleDebugInfo(raw_ostream &O, const Module *M, + const DebugInfoFinder &Finder) { // Printing the nodes directly isn't particularly helpful (since they // reference other nodes that won't be printed, particularly for the // filenames), so just print a few useful things. @@ -131,18 +131,18 @@ static void printModuleDebugInfo(raw_ostream &O, const Module *M, O << '\n'; } } - -void ModuleDebugInfoLegacyPrinter::print(raw_ostream &O, - const Module *M) const { - printModuleDebugInfo(O, M, Finder); -} - -ModuleDebugInfoPrinterPass::ModuleDebugInfoPrinterPass(raw_ostream &OS) - : OS(OS) {} - -PreservedAnalyses ModuleDebugInfoPrinterPass::run(Module &M, - ModuleAnalysisManager &AM) { - Finder.processModule(M); - printModuleDebugInfo(OS, &M, Finder); - return PreservedAnalyses::all(); -} + +void ModuleDebugInfoLegacyPrinter::print(raw_ostream &O, + const Module *M) const { + printModuleDebugInfo(O, M, Finder); +} + +ModuleDebugInfoPrinterPass::ModuleDebugInfoPrinterPass(raw_ostream &OS) + : OS(OS) {} + +PreservedAnalyses ModuleDebugInfoPrinterPass::run(Module &M, + ModuleAnalysisManager &AM) { + Finder.processModule(M); + printModuleDebugInfo(OS, &M, Finder); + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/ModuleSummaryAnalysis.cpp index 5f7746eeed..aa144883f6 100644 --- a/contrib/libs/llvm12/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -145,7 +145,7 @@ static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid, SetVector<FunctionSummary::ConstVCall> &ConstVCalls) { std::vector<uint64_t> Args; // Start from the second argument to skip the "this" pointer. - for (auto &Arg : drop_begin(Call.CB.args())) { + for (auto &Arg : drop_begin(Call.CB.args())) { auto *CI = dyn_cast<ConstantInt>(Arg); if (!CI || CI->getBitWidth() > 64) { VCalls.insert({Guid, Call.Offset}); @@ -472,7 +472,7 @@ static void computeFunctionSummary( F.hasFnAttribute(Attribute::AlwaysInline)}; std::vector<FunctionSummary::ParamAccess> ParamAccesses; if (auto *SSI = GetSSICallback(F)) - ParamAccesses = SSI->getParamAccesses(Index); + ParamAccesses = SSI->getParamAccesses(Index); auto FuncSummary = std::make_unique<FunctionSummary>( Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs), CallGraphEdges.takeVector(), TypeTests.takeVector(), diff --git a/contrib/libs/llvm12/lib/Analysis/MustExecute.cpp b/contrib/libs/llvm12/lib/Analysis/MustExecute.cpp index 1e7626013e..2d7343db27 100644 --- a/contrib/libs/llvm12/lib/Analysis/MustExecute.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MustExecute.cpp @@ -16,11 +16,11 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -302,31 +302,31 @@ bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const Instruction &I, } namespace { -struct MustExecutePrinter : public FunctionPass { - - static char ID; // Pass identification, replacement for typeid - MustExecutePrinter() : FunctionPass(ID) { - initializeMustExecutePrinterPass(*PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - } - bool runOnFunction(Function &F) override; -}; -struct MustBeExecutedContextPrinter : public ModulePass { - static char ID; - - MustBeExecutedContextPrinter() : ModulePass(ID) { - initializeMustBeExecutedContextPrinterPass( - *PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - bool runOnModule(Module &M) override; -}; +struct MustExecutePrinter : public FunctionPass { + + static char ID; // Pass identification, replacement for typeid + MustExecutePrinter() : FunctionPass(ID) { + initializeMustExecutePrinterPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + } + bool runOnFunction(Function &F) override; +}; +struct MustBeExecutedContextPrinter : public ModulePass { + static char ID; + + MustBeExecutedContextPrinter() : ModulePass(ID) { + initializeMustBeExecutedContextPrinterPass( + *PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnModule(Module &M) override; +}; } char MustExecutePrinter::ID = 0; @@ -342,16 +342,16 @@ FunctionPass *llvm::createMustExecutePrinter() { } char MustBeExecutedContextPrinter::ID = 0; -INITIALIZE_PASS_BEGIN(MustBeExecutedContextPrinter, - "print-must-be-executed-contexts", - "print the must-be-executed-context for all instructions", - false, true) +INITIALIZE_PASS_BEGIN(MustBeExecutedContextPrinter, + "print-must-be-executed-contexts", + "print the must-be-executed-context for all instructions", + false, true) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(MustBeExecutedContextPrinter, "print-must-be-executed-contexts", - "print the must-be-executed-context for all instructions", + "print the must-be-executed-context for all instructions", false, true) ModulePass *llvm::createMustBeExecutedContextPrinter() { @@ -631,7 +631,7 @@ MustBeExecutedContextExplorer::findForwardJoinPoint(const BasicBlock *InitBB) { if (!TransfersExecution) return nullptr; - append_range(Worklist, successors(ToBB)); + append_range(Worklist, successors(ToBB)); } } @@ -838,42 +838,42 @@ const Instruction *MustBeExecutedIterator::advance() { Tail = nullptr; return nullptr; } - -PreservedAnalyses MustExecutePrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &LI = AM.getResult<LoopAnalysis>(F); - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - - MustExecuteAnnotatedWriter Writer(F, DT, LI); - F.print(OS, &Writer); - return PreservedAnalyses::all(); -} - -PreservedAnalyses -MustBeExecutedContextPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { - FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); - GetterTy<const LoopInfo> LIGetter = [&](const Function &F) { - return &FAM.getResult<LoopAnalysis>(const_cast<Function &>(F)); - }; - GetterTy<const DominatorTree> DTGetter = [&](const Function &F) { - return &FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F)); - }; - GetterTy<const PostDominatorTree> PDTGetter = [&](const Function &F) { - return &FAM.getResult<PostDominatorTreeAnalysis>(const_cast<Function &>(F)); - }; - - MustBeExecutedContextExplorer Explorer( - /* ExploreInterBlock */ true, - /* ExploreCFGForward */ true, - /* ExploreCFGBackward */ true, LIGetter, DTGetter, PDTGetter); - - for (Function &F : M) { - for (Instruction &I : instructions(F)) { - OS << "-- Explore context of: " << I << "\n"; - for (const Instruction *CI : Explorer.range(&I)) - OS << " [F: " << CI->getFunction()->getName() << "] " << *CI << "\n"; - } - } - return PreservedAnalyses::all(); -} + +PreservedAnalyses MustExecutePrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + + MustExecuteAnnotatedWriter Writer(F, DT, LI); + F.print(OS, &Writer); + return PreservedAnalyses::all(); +} + +PreservedAnalyses +MustBeExecutedContextPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + GetterTy<const LoopInfo> LIGetter = [&](const Function &F) { + return &FAM.getResult<LoopAnalysis>(const_cast<Function &>(F)); + }; + GetterTy<const DominatorTree> DTGetter = [&](const Function &F) { + return &FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F)); + }; + GetterTy<const PostDominatorTree> PDTGetter = [&](const Function &F) { + return &FAM.getResult<PostDominatorTreeAnalysis>(const_cast<Function &>(F)); + }; + + MustBeExecutedContextExplorer Explorer( + /* ExploreInterBlock */ true, + /* ExploreCFGForward */ true, + /* ExploreCFGBackward */ true, LIGetter, DTGetter, PDTGetter); + + for (Function &F : M) { + for (Instruction &I : instructions(F)) { + OS << "-- Explore context of: " << I << "\n"; + for (const Instruction *CI : Explorer.range(&I)) + OS << " [F: " << CI->getFunction()->getName() << "] " << *CI << "\n"; + } + } + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/ObjCARCAliasAnalysis.cpp index 786d03f694..2f59520cbe 100644 --- a/contrib/libs/llvm12/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -54,11 +54,11 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *UA = GetUnderlyingObjCPtr(SA); - const Value *UB = GetUnderlyingObjCPtr(SB); + const Value *UA = GetUnderlyingObjCPtr(SA); + const Value *UB = GetUnderlyingObjCPtr(SB); if (UA != SA || UB != SB) { - Result = AAResultBase::alias(MemoryLocation::getBeforeOrAfter(UA), - MemoryLocation::getBeforeOrAfter(UB), AAQI); + Result = AAResultBase::alias(MemoryLocation::getBeforeOrAfter(UA), + MemoryLocation::getBeforeOrAfter(UB), AAQI); // We can't use MustAlias or PartialAlias results here because // GetUnderlyingObjCPtr may return an offsetted pointer value. if (Result == NoAlias) @@ -84,10 +84,10 @@ bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc, // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *U = GetUnderlyingObjCPtr(S); + const Value *U = GetUnderlyingObjCPtr(S); if (U != S) - return AAResultBase::pointsToConstantMemory( - MemoryLocation::getBeforeOrAfter(U), AAQI, OrLocal); + return AAResultBase::pointsToConstantMemory( + MemoryLocation::getBeforeOrAfter(U), AAQI, OrLocal); // If that failed, fail. We don't need to chain here, since that's covered // by the earlier precise query. @@ -134,8 +134,8 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call, return AAResultBase::getModRefInfo(Call, Loc, AAQI); } -AnalysisKey ObjCARCAA::Key; - +AnalysisKey ObjCARCAA::Key; + ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) { return ObjCARCAAResult(F.getParent()->getDataLayout()); } diff --git a/contrib/libs/llvm12/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/libs/llvm12/lib/Analysis/ObjCARCAnalysisUtils.cpp index d34a3c6363..4af82760b5 100644 --- a/contrib/libs/llvm12/lib/Analysis/ObjCARCAnalysisUtils.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ObjCARCAnalysisUtils.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ObjCARCAnalysisUtils.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -24,22 +24,22 @@ bool llvm::objcarc::EnableARCOpts; static cl::opt<bool, true> EnableARCOptimizations( "enable-objc-arc-opts", cl::desc("enable/disable all ARC Optimizations"), cl::location(EnableARCOpts), cl::init(true), cl::Hidden); - -bool llvm::objcarc::IsPotentialRetainableObjPtr(const Value *Op, - AAResults &AA) { - // First make the rudimentary check. - if (!IsPotentialRetainableObjPtr(Op)) - return false; - - // Objects in constant memory are not reference-counted. - if (AA.pointsToConstantMemory(Op)) - return false; - - // Pointers in constant memory are not pointing to reference-counted objects. - if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) - if (AA.pointsToConstantMemory(LI->getPointerOperand())) - return false; - - // Otherwise assume the worst. - return true; -} + +bool llvm::objcarc::IsPotentialRetainableObjPtr(const Value *Op, + AAResults &AA) { + // First make the rudimentary check. + if (!IsPotentialRetainableObjPtr(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} diff --git a/contrib/libs/llvm12/lib/Analysis/OptimizationRemarkEmitter.cpp b/contrib/libs/llvm12/lib/Analysis/OptimizationRemarkEmitter.cpp index 6f3d4d536c..47a5a0ef1a 100644 --- a/contrib/libs/llvm12/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/contrib/libs/llvm12/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -15,7 +15,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" @@ -37,7 +37,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) LI.analyze(DT); // Then compute BranchProbabilityInfo. - BranchProbabilityInfo BPI(*F, LI, nullptr, &DT, nullptr); + BranchProbabilityInfo BPI(*F, LI, nullptr, &DT, nullptr); // Finally compute BFI. OwnedBFI = std::make_unique<BlockFrequencyInfo>(*F, BPI, LI); @@ -97,17 +97,17 @@ OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass() bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) { BlockFrequencyInfo *BFI; - auto &Context = Fn.getContext(); - if (Context.getDiagnosticsHotnessRequested()) { + auto &Context = Fn.getContext(); + if (Context.getDiagnosticsHotnessRequested()) { BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); - // Get hotness threshold from PSI. This should only happen once. - if (Context.isDiagnosticsHotnessThresholdSetFromPSI()) { - if (ProfileSummaryInfo *PSI = - &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI()) - Context.setDiagnosticsHotnessThreshold( - PSI->getOrCompHotCountThreshold()); - } - } else + // Get hotness threshold from PSI. This should only happen once. + if (Context.isDiagnosticsHotnessThresholdSetFromPSI()) { + if (ProfileSummaryInfo *PSI = + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI()) + Context.setDiagnosticsHotnessThreshold( + PSI->getOrCompHotCountThreshold()); + } + } else BFI = nullptr; ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn, BFI); @@ -117,7 +117,7 @@ bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) { void OptimizationRemarkEmitterWrapperPass::getAnalysisUsage( AnalysisUsage &AU) const { LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.setPreservesAll(); } @@ -127,19 +127,19 @@ OptimizationRemarkEmitter OptimizationRemarkEmitterAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BlockFrequencyInfo *BFI; - auto &Context = F.getContext(); + auto &Context = F.getContext(); - if (Context.getDiagnosticsHotnessRequested()) { + if (Context.getDiagnosticsHotnessRequested()) { BFI = &AM.getResult<BlockFrequencyAnalysis>(F); - // Get hotness threshold from PSI. This should only happen once. - if (Context.isDiagnosticsHotnessThresholdSetFromPSI()) { - auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); - if (ProfileSummaryInfo *PSI = - MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent())) - Context.setDiagnosticsHotnessThreshold( - PSI->getOrCompHotCountThreshold()); - } - } else + // Get hotness threshold from PSI. This should only happen once. + if (Context.isDiagnosticsHotnessThresholdSetFromPSI()) { + auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); + if (ProfileSummaryInfo *PSI = + MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent())) + Context.setDiagnosticsHotnessThreshold( + PSI->getOrCompHotCountThreshold()); + } + } else BFI = nullptr; return OptimizationRemarkEmitter(&F, BFI); @@ -152,6 +152,6 @@ static const char ore_name[] = "Optimization Remark Emitter"; INITIALIZE_PASS_BEGIN(OptimizationRemarkEmitterWrapperPass, ORE_NAME, ore_name, false, true) INITIALIZE_PASS_DEPENDENCY(LazyBFIPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(OptimizationRemarkEmitterWrapperPass, ORE_NAME, ore_name, false, true) diff --git a/contrib/libs/llvm12/lib/Analysis/RegionPass.cpp b/contrib/libs/llvm12/lib/Analysis/RegionPass.cpp index a73607dbef..82a9a1d5de 100644 --- a/contrib/libs/llvm12/lib/Analysis/RegionPass.cpp +++ b/contrib/libs/llvm12/lib/Analysis/RegionPass.cpp @@ -15,7 +15,7 @@ #include "llvm/Analysis/RegionPass.h" #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassTimingInfo.h" -#include "llvm/IR/StructuralHash.h" +#include "llvm/IR/StructuralHash.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" @@ -87,54 +87,54 @@ bool RGPassManager::runOnFunction(Function &F) { initializeAnalysisImpl(P); - bool LocalChanged = false; + bool LocalChanged = false; { PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry()); TimeRegion PassTimer(getPassTimer(P)); -#ifdef EXPENSIVE_CHECKS - uint64_t RefHash = StructuralHash(F); -#endif - LocalChanged = P->runOnRegion(CurrentRegion, *this); - -#ifdef EXPENSIVE_CHECKS - if (!LocalChanged && (RefHash != StructuralHash(F))) { - llvm::errs() << "Pass modifies its input and doesn't report it: " - << P->getPassName() << "\n"; - llvm_unreachable("Pass modifies its input and doesn't report it"); - } -#endif - - Changed |= LocalChanged; +#ifdef EXPENSIVE_CHECKS + uint64_t RefHash = StructuralHash(F); +#endif + LocalChanged = P->runOnRegion(CurrentRegion, *this); + +#ifdef EXPENSIVE_CHECKS + if (!LocalChanged && (RefHash != StructuralHash(F))) { + llvm::errs() << "Pass modifies its input and doesn't report it: " + << P->getPassName() << "\n"; + llvm_unreachable("Pass modifies its input and doesn't report it"); + } +#endif + + Changed |= LocalChanged; } if (isPassDebuggingExecutionsOrMore()) { - if (LocalChanged) + if (LocalChanged) dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, CurrentRegion->getNameStr()); dumpPreservedSet(P); } - // Manually check that this region is still healthy. This is done - // instead of relying on RegionInfo::verifyRegion since RegionInfo - // is a function pass and it's really expensive to verify every - // Region in the function every time. That level of checking can be - // enabled with the -verify-region-info option. - { - TimeRegion PassTimer(getPassTimer(P)); - CurrentRegion->verifyRegion(); + // Manually check that this region is still healthy. This is done + // instead of relying on RegionInfo::verifyRegion since RegionInfo + // is a function pass and it's really expensive to verify every + // Region in the function every time. That level of checking can be + // enabled with the -verify-region-info option. + { + TimeRegion PassTimer(getPassTimer(P)); + CurrentRegion->verifyRegion(); } - // Then call the regular verifyAnalysis functions. - verifyPreservedAnalysis(P); - - if (LocalChanged) - removeNotPreservedAnalysis(P); + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(P); + + if (LocalChanged) + removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); removeDeadPasses(P, - (!isPassDebuggingExecutionsOrMore()) - ? "<deleted>" - : CurrentRegion->getNameStr(), + (!isPassDebuggingExecutionsOrMore()) + ? "<deleted>" + : CurrentRegion->getNameStr(), ON_REGION_MSG); } diff --git a/contrib/libs/llvm12/lib/Analysis/ReleaseModeModelRunner.cpp b/contrib/libs/llvm12/lib/Analysis/ReleaseModeModelRunner.cpp index 02a4327e54..cdf46d2a74 100644 --- a/contrib/libs/llvm12/lib/Analysis/ReleaseModeModelRunner.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ReleaseModeModelRunner.cpp @@ -1,90 +1,90 @@ -//===- ReleaseModeModelRunner.cpp - Fast, precompiled model runner -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a model runner wrapping an AOT compiled ML model. -// Only inference is supported. -// -//===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#if defined(LLVM_HAVE_TF_AOT) - -#include "llvm/Analysis/InlineModelFeatureMaps.h" -#include "llvm/Analysis/MLInlineAdvisor.h" - -// codegen-ed file -#error #include "InlinerSizeModel.h" // NOLINT - -#include <memory> -#include <vector> - -using namespace llvm; -namespace { - -const char FeedPrefix[] = "feed_"; -const char FetchPrefix[] = "fetch_"; - -/// MLModelRunner - production mode implementation. It uses a AOT-compiled -/// SavedModel for efficient execution. -class ReleaseModeModelRunner final : public MLModelRunner { -public: - ReleaseModeModelRunner(LLVMContext &Ctx); - virtual ~ReleaseModeModelRunner() = default; - - bool run() override; - - void setFeature(FeatureIndex Index, int64_t Value) override; - int64_t getFeature(int Index) const override; - -private: - std::vector<int32_t> FeatureIndices; - int32_t ResultIndex = -1; - std::unique_ptr<llvm::InlinerSizeModel> CompiledModel; -}; -} // namespace - -ReleaseModeModelRunner::ReleaseModeModelRunner(LLVMContext &Ctx) - : MLModelRunner(Ctx), - CompiledModel(std::make_unique<llvm::InlinerSizeModel>()) { - assert(CompiledModel && "The CompiledModel should be valid"); - - FeatureIndices.reserve(NumberOfFeatures); - - for (size_t I = 0; I < NumberOfFeatures; ++I) { - const int Index = - CompiledModel->LookupArgIndex(FeedPrefix + FeatureNameMap[I]); - assert(Index >= 0 && "Cannot find Feature in inlining model"); - FeatureIndices[I] = Index; - } - - ResultIndex = - CompiledModel->LookupResultIndex(std::string(FetchPrefix) + DecisionName); - assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model"); -} - -int64_t ReleaseModeModelRunner::getFeature(int Index) const { - return *static_cast<int64_t *>( - CompiledModel->arg_data(FeatureIndices[Index])); -} - -void ReleaseModeModelRunner::setFeature(FeatureIndex Index, int64_t Value) { - *static_cast<int64_t *>(CompiledModel->arg_data( - FeatureIndices[static_cast<size_t>(Index)])) = Value; -} - -bool ReleaseModeModelRunner::run() { - CompiledModel->Run(); - return static_cast<bool>( - *static_cast<int64_t *>(CompiledModel->result_data(ResultIndex))); -} - -std::unique_ptr<InlineAdvisor> -llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { - auto AOTRunner = std::make_unique<ReleaseModeModelRunner>(M.getContext()); - return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); -} -#endif // defined(LLVM_HAVE_TF_AOT) +//===- ReleaseModeModelRunner.cpp - Fast, precompiled model runner -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a model runner wrapping an AOT compiled ML model. +// Only inference is supported. +// +//===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" +#if defined(LLVM_HAVE_TF_AOT) + +#include "llvm/Analysis/InlineModelFeatureMaps.h" +#include "llvm/Analysis/MLInlineAdvisor.h" + +// codegen-ed file +#error #include "InlinerSizeModel.h" // NOLINT + +#include <memory> +#include <vector> + +using namespace llvm; +namespace { + +const char FeedPrefix[] = "feed_"; +const char FetchPrefix[] = "fetch_"; + +/// MLModelRunner - production mode implementation. It uses a AOT-compiled +/// SavedModel for efficient execution. +class ReleaseModeModelRunner final : public MLModelRunner { +public: + ReleaseModeModelRunner(LLVMContext &Ctx); + virtual ~ReleaseModeModelRunner() = default; + + bool run() override; + + void setFeature(FeatureIndex Index, int64_t Value) override; + int64_t getFeature(int Index) const override; + +private: + std::vector<int32_t> FeatureIndices; + int32_t ResultIndex = -1; + std::unique_ptr<llvm::InlinerSizeModel> CompiledModel; +}; +} // namespace + +ReleaseModeModelRunner::ReleaseModeModelRunner(LLVMContext &Ctx) + : MLModelRunner(Ctx), + CompiledModel(std::make_unique<llvm::InlinerSizeModel>()) { + assert(CompiledModel && "The CompiledModel should be valid"); + + FeatureIndices.reserve(NumberOfFeatures); + + for (size_t I = 0; I < NumberOfFeatures; ++I) { + const int Index = + CompiledModel->LookupArgIndex(FeedPrefix + FeatureNameMap[I]); + assert(Index >= 0 && "Cannot find Feature in inlining model"); + FeatureIndices[I] = Index; + } + + ResultIndex = + CompiledModel->LookupResultIndex(std::string(FetchPrefix) + DecisionName); + assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model"); +} + +int64_t ReleaseModeModelRunner::getFeature(int Index) const { + return *static_cast<int64_t *>( + CompiledModel->arg_data(FeatureIndices[Index])); +} + +void ReleaseModeModelRunner::setFeature(FeatureIndex Index, int64_t Value) { + *static_cast<int64_t *>(CompiledModel->arg_data( + FeatureIndices[static_cast<size_t>(Index)])) = Value; +} + +bool ReleaseModeModelRunner::run() { + CompiledModel->Run(); + return static_cast<bool>( + *static_cast<int64_t *>(CompiledModel->result_data(ResultIndex))); +} + +std::unique_ptr<InlineAdvisor> +llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { + auto AOTRunner = std::make_unique<ReleaseModeModelRunner>(M.getContext()); + return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); +} +#endif // defined(LLVM_HAVE_TF_AOT) diff --git a/contrib/libs/llvm12/lib/Analysis/ReplayInlineAdvisor.cpp b/contrib/libs/llvm12/lib/Analysis/ReplayInlineAdvisor.cpp index b9dac2f3ff..9833a5635c 100644 --- a/contrib/libs/llvm12/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ReplayInlineAdvisor.cpp @@ -1,82 +1,82 @@ -//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements ReplayInlineAdvisor that replays inline decisions based -// on previous inline remarks from optimization remark log. This is a best -// effort approach useful for testing compiler/source changes while holding -// inlining steady. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ReplayInlineAdvisor.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/LineIterator.h" - -using namespace llvm; - -#define DEBUG_TYPE "inline-replay" - -ReplayInlineAdvisor::ReplayInlineAdvisor( - Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, - std::unique_ptr<InlineAdvisor> OriginalAdvisor, StringRef RemarksFile, - bool EmitRemarks) - : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)), - HasReplayRemarks(false), EmitRemarks(EmitRemarks) { - auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile); - std::error_code EC = BufferOrErr.getError(); - if (EC) { - Context.emitError("Could not open remarks file: " + EC.message()); - return; - } - - // Example for inline remarks to parse: - // main:3:1.1: _Z3subii inlined into main at callsite sum:1 @ main:3:1.1 - // We use the callsite string after `at callsite` to replay inlining. - line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true); - for (; !LineIt.is_at_eof(); ++LineIt) { - StringRef Line = *LineIt; - auto Pair = Line.split(" at callsite "); - - auto Callee = Pair.first.split(" inlined into").first.rsplit(": ").second; - - auto CallSite = Pair.second.split(";").first; - - if (Callee.empty() || CallSite.empty()) - continue; - - std::string Combined = (Callee + CallSite).str(); - InlineSitesFromRemarks.insert(Combined); - } - - HasReplayRemarks = true; -} - -std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdviceImpl(CallBase &CB) { - assert(HasReplayRemarks); - - Function &Caller = *CB.getCaller(); - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); - - if (InlineSitesFromRemarks.empty()) - return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE, - EmitRemarks); - - std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc()); - StringRef Callee = CB.getCalledFunction()->getName(); - std::string Combined = (Callee + CallSiteLoc).str(); - auto Iter = InlineSitesFromRemarks.find(Combined); - - Optional<InlineCost> InlineRecommended = None; - if (Iter != InlineSitesFromRemarks.end()) { - InlineRecommended = llvm::InlineCost::getAlways("found in replay"); - } - - return std::make_unique<DefaultInlineAdvice>(this, CB, InlineRecommended, ORE, - EmitRemarks); -} +//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements ReplayInlineAdvisor that replays inline decisions based +// on previous inline remarks from optimization remark log. This is a best +// effort approach useful for testing compiler/source changes while holding +// inlining steady. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ReplayInlineAdvisor.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/LineIterator.h" + +using namespace llvm; + +#define DEBUG_TYPE "inline-replay" + +ReplayInlineAdvisor::ReplayInlineAdvisor( + Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, + std::unique_ptr<InlineAdvisor> OriginalAdvisor, StringRef RemarksFile, + bool EmitRemarks) + : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)), + HasReplayRemarks(false), EmitRemarks(EmitRemarks) { + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile); + std::error_code EC = BufferOrErr.getError(); + if (EC) { + Context.emitError("Could not open remarks file: " + EC.message()); + return; + } + + // Example for inline remarks to parse: + // main:3:1.1: _Z3subii inlined into main at callsite sum:1 @ main:3:1.1 + // We use the callsite string after `at callsite` to replay inlining. + line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true); + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef Line = *LineIt; + auto Pair = Line.split(" at callsite "); + + auto Callee = Pair.first.split(" inlined into").first.rsplit(": ").second; + + auto CallSite = Pair.second.split(";").first; + + if (Callee.empty() || CallSite.empty()) + continue; + + std::string Combined = (Callee + CallSite).str(); + InlineSitesFromRemarks.insert(Combined); + } + + HasReplayRemarks = true; +} + +std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdviceImpl(CallBase &CB) { + assert(HasReplayRemarks); + + Function &Caller = *CB.getCaller(); + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); + + if (InlineSitesFromRemarks.empty()) + return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE, + EmitRemarks); + + std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc()); + StringRef Callee = CB.getCalledFunction()->getName(); + std::string Combined = (Callee + CallSiteLoc).str(); + auto Iter = InlineSitesFromRemarks.find(Combined); + + Optional<InlineCost> InlineRecommended = None; + if (Iter != InlineSitesFromRemarks.end()) { + InlineRecommended = llvm::InlineCost::getAlways("found in replay"); + } + + return std::make_unique<DefaultInlineAdvice>(this, CB, InlineRecommended, ORE, + EmitRemarks); +} diff --git a/contrib/libs/llvm12/lib/Analysis/ScalarEvolution.cpp b/contrib/libs/llvm12/lib/Analysis/ScalarEvolution.cpp index 1a9ae68573..5ff63868c9 100644 --- a/contrib/libs/llvm12/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ScalarEvolution.cpp @@ -135,7 +135,7 @@ #include <vector> using namespace llvm; -using namespace PatternMatch; +using namespace PatternMatch; #define DEBUG_TYPE "scalar-evolution" @@ -227,11 +227,11 @@ ClassifyExpressions("scalar-evolution-classify-expressions", cl::Hidden, cl::init(true), cl::desc("When printing analysis, include information on every instruction")); -static cl::opt<bool> UseExpensiveRangeSharpening( - "scalar-evolution-use-expensive-range-sharpening", cl::Hidden, - cl::init(false), - cl::desc("Use more powerful methods of sharpening expression ranges. May " - "be costly in terms of compile time")); +static cl::opt<bool> UseExpensiveRangeSharpening( + "scalar-evolution-use-expensive-range-sharpening", cl::Hidden, + cl::init(false), + cl::desc("Use more powerful methods of sharpening expression ranges. May " + "be costly in terms of compile time")); //===----------------------------------------------------------------------===// // SCEV class definitions @@ -249,17 +249,17 @@ LLVM_DUMP_METHOD void SCEV::dump() const { #endif void SCEV::print(raw_ostream &OS) const { - switch (getSCEVType()) { + switch (getSCEVType()) { case scConstant: cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false); return; - case scPtrToInt: { - const SCEVPtrToIntExpr *PtrToInt = cast<SCEVPtrToIntExpr>(this); - const SCEV *Op = PtrToInt->getOperand(); - OS << "(ptrtoint " << *Op->getType() << " " << *Op << " to " - << *PtrToInt->getType() << ")"; - return; - } + case scPtrToInt: { + const SCEVPtrToIntExpr *PtrToInt = cast<SCEVPtrToIntExpr>(this); + const SCEV *Op = PtrToInt->getOperand(); + OS << "(ptrtoint " << *Op->getType() << " " << *Op << " to " + << *PtrToInt->getType() << ")"; + return; + } case scTruncate: { const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this); const SCEV *Op = Trunc->getOperand(); @@ -317,8 +317,8 @@ void SCEV::print(raw_ostream &OS) const { case scSMinExpr: OpStr = " smin "; break; - default: - llvm_unreachable("There are no other nary expression types."); + default: + llvm_unreachable("There are no other nary expression types."); } OS << "("; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); @@ -335,10 +335,10 @@ void SCEV::print(raw_ostream &OS) const { OS << "<nuw>"; if (NAry->hasNoSignedWrap()) OS << "<nsw>"; - break; - default: - // Nothing to print for other nary expressions. - break; + break; + default: + // Nothing to print for other nary expressions. + break; } return; } @@ -380,10 +380,10 @@ void SCEV::print(raw_ostream &OS) const { } Type *SCEV::getType() const { - switch (getSCEVType()) { + switch (getSCEVType()) { case scConstant: return cast<SCEVConstant>(this)->getType(); - case scPtrToInt: + case scPtrToInt: case scTruncate: case scZeroExtend: case scSignExtend: @@ -465,42 +465,42 @@ ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { return getConstant(ConstantInt::get(ITy, V, isSigned)); } -SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, - const SCEV *op, Type *ty) - : SCEV(ID, SCEVTy, computeExpressionSize(op)), Ty(ty) { - Operands[0] = op; -} - -SCEVPtrToIntExpr::SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op, - Type *ITy) - : SCEVCastExpr(ID, scPtrToInt, Op, ITy) { - assert(getOperand()->getType()->isPointerTy() && Ty->isIntegerTy() && - "Must be a non-bit-width-changing pointer-to-integer cast!"); -} - -SCEVIntegralCastExpr::SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID, - SCEVTypes SCEVTy, const SCEV *op, - Type *ty) - : SCEVCastExpr(ID, SCEVTy, op, ty) {} - -SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, - Type *ty) - : SCEVIntegralCastExpr(ID, scTruncate, op, ty) { - assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && +SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, + const SCEV *op, Type *ty) + : SCEV(ID, SCEVTy, computeExpressionSize(op)), Ty(ty) { + Operands[0] = op; +} + +SCEVPtrToIntExpr::SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op, + Type *ITy) + : SCEVCastExpr(ID, scPtrToInt, Op, ITy) { + assert(getOperand()->getType()->isPointerTy() && Ty->isIntegerTy() && + "Must be a non-bit-width-changing pointer-to-integer cast!"); +} + +SCEVIntegralCastExpr::SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID, + SCEVTypes SCEVTy, const SCEV *op, + Type *ty) + : SCEVCastExpr(ID, SCEVTy, op, ty) {} + +SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, + Type *ty) + : SCEVIntegralCastExpr(ID, scTruncate, op, ty) { + assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate non-integer value!"); } SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) - : SCEVIntegralCastExpr(ID, scZeroExtend, op, ty) { - assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && + : SCEVIntegralCastExpr(ID, scZeroExtend, op, ty) { + assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot zero extend non-integer value!"); } SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) - : SCEVIntegralCastExpr(ID, scSignExtend, op, ty) { - assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && + : SCEVIntegralCastExpr(ID, scSignExtend, op, ty) { + assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot sign extend non-integer value!"); } @@ -699,7 +699,7 @@ static int CompareSCEVComplexity( return 0; // Primarily, sort the SCEVs by their getSCEVType(). - SCEVTypes LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + SCEVTypes LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); if (LType != RType) return (int)LType - (int)RType; @@ -708,7 +708,7 @@ static int CompareSCEVComplexity( // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, // so that (a + b) and (b + a) don't end up as different expressions. - switch (LType) { + switch (LType) { case scUnknown: { const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); @@ -810,7 +810,7 @@ static int CompareSCEVComplexity( return X; } - case scPtrToInt: + case scPtrToInt: case scTruncate: case scZeroExtend: case scSignExtend: { @@ -1034,115 +1034,115 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, // SCEV Expression folder implementations //===----------------------------------------------------------------------===// -const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty, - unsigned Depth) { - assert(Ty->isIntegerTy() && "Target type must be an integer type!"); - assert(Depth <= 1 && "getPtrToIntExpr() should self-recurse at most once."); - - // We could be called with an integer-typed operands during SCEV rewrites. - // Since the operand is an integer already, just perform zext/trunc/self cast. - if (!Op->getType()->isPointerTy()) - return getTruncateOrZeroExtend(Op, Ty); - - // What would be an ID for such a SCEV cast expression? - FoldingSetNodeID ID; - ID.AddInteger(scPtrToInt); - ID.AddPointer(Op); - - void *IP = nullptr; - - // Is there already an expression for such a cast? - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) - return getTruncateOrZeroExtend(S, Ty); - - // If not, is this expression something we can't reduce any further? - if (isa<SCEVUnknown>(Op)) { - // Create an explicit cast node. - // We can reuse the existing insert position since if we get here, - // we won't have made any changes which would invalidate it. - Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType()); - assert(getDataLayout().getTypeSizeInBits(getEffectiveSCEVType( - Op->getType())) == getDataLayout().getTypeSizeInBits(IntPtrTy) && - "We can only model ptrtoint if SCEV's effective (integer) type is " - "sufficiently wide to represent all possible pointer values."); - SCEV *S = new (SCEVAllocator) - SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy); - UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); - return getTruncateOrZeroExtend(S, Ty); - } - - assert(Depth == 0 && - "getPtrToIntExpr() should not self-recurse for non-SCEVUnknown's."); - - // Otherwise, we've got some expression that is more complex than just a - // single SCEVUnknown. But we don't want to have a SCEVPtrToIntExpr of an - // arbitrary expression, we want to have SCEVPtrToIntExpr of an SCEVUnknown - // only, and the expressions must otherwise be integer-typed. - // So sink the cast down to the SCEVUnknown's. - - /// The SCEVPtrToIntSinkingRewriter takes a scalar evolution expression, - /// which computes a pointer-typed value, and rewrites the whole expression - /// tree so that *all* the computations are done on integers, and the only - /// pointer-typed operands in the expression are SCEVUnknown. - class SCEVPtrToIntSinkingRewriter - : public SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter> { - using Base = SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter>; - - public: - SCEVPtrToIntSinkingRewriter(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {} - - static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE) { - SCEVPtrToIntSinkingRewriter Rewriter(SE); - return Rewriter.visit(Scev); - } - - const SCEV *visit(const SCEV *S) { - Type *STy = S->getType(); - // If the expression is not pointer-typed, just keep it as-is. - if (!STy->isPointerTy()) - return S; - // Else, recursively sink the cast down into it. - return Base::visit(S); - } - - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - SmallVector<const SCEV *, 2> Operands; - bool Changed = false; - for (auto *Op : Expr->operands()) { - Operands.push_back(visit(Op)); - Changed |= Op != Operands.back(); - } - return !Changed ? Expr : SE.getAddExpr(Operands, Expr->getNoWrapFlags()); - } - - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - SmallVector<const SCEV *, 2> Operands; - bool Changed = false; - for (auto *Op : Expr->operands()) { - Operands.push_back(visit(Op)); - Changed |= Op != Operands.back(); - } - return !Changed ? Expr : SE.getMulExpr(Operands, Expr->getNoWrapFlags()); - } - - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - Type *ExprPtrTy = Expr->getType(); - assert(ExprPtrTy->isPointerTy() && - "Should only reach pointer-typed SCEVUnknown's."); - Type *ExprIntPtrTy = SE.getDataLayout().getIntPtrType(ExprPtrTy); - return SE.getPtrToIntExpr(Expr, ExprIntPtrTy, /*Depth=*/1); - } - }; - - // And actually perform the cast sinking. - const SCEV *IntOp = SCEVPtrToIntSinkingRewriter::rewrite(Op, *this); - assert(IntOp->getType()->isIntegerTy() && - "We must have succeeded in sinking the cast, " - "and ending up with an integer-typed expression!"); - return getTruncateOrZeroExtend(IntOp, Ty); -} - +const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty, + unsigned Depth) { + assert(Ty->isIntegerTy() && "Target type must be an integer type!"); + assert(Depth <= 1 && "getPtrToIntExpr() should self-recurse at most once."); + + // We could be called with an integer-typed operands during SCEV rewrites. + // Since the operand is an integer already, just perform zext/trunc/self cast. + if (!Op->getType()->isPointerTy()) + return getTruncateOrZeroExtend(Op, Ty); + + // What would be an ID for such a SCEV cast expression? + FoldingSetNodeID ID; + ID.AddInteger(scPtrToInt); + ID.AddPointer(Op); + + void *IP = nullptr; + + // Is there already an expression for such a cast? + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return getTruncateOrZeroExtend(S, Ty); + + // If not, is this expression something we can't reduce any further? + if (isa<SCEVUnknown>(Op)) { + // Create an explicit cast node. + // We can reuse the existing insert position since if we get here, + // we won't have made any changes which would invalidate it. + Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType()); + assert(getDataLayout().getTypeSizeInBits(getEffectiveSCEVType( + Op->getType())) == getDataLayout().getTypeSizeInBits(IntPtrTy) && + "We can only model ptrtoint if SCEV's effective (integer) type is " + "sufficiently wide to represent all possible pointer values."); + SCEV *S = new (SCEVAllocator) + SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy); + UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); + return getTruncateOrZeroExtend(S, Ty); + } + + assert(Depth == 0 && + "getPtrToIntExpr() should not self-recurse for non-SCEVUnknown's."); + + // Otherwise, we've got some expression that is more complex than just a + // single SCEVUnknown. But we don't want to have a SCEVPtrToIntExpr of an + // arbitrary expression, we want to have SCEVPtrToIntExpr of an SCEVUnknown + // only, and the expressions must otherwise be integer-typed. + // So sink the cast down to the SCEVUnknown's. + + /// The SCEVPtrToIntSinkingRewriter takes a scalar evolution expression, + /// which computes a pointer-typed value, and rewrites the whole expression + /// tree so that *all* the computations are done on integers, and the only + /// pointer-typed operands in the expression are SCEVUnknown. + class SCEVPtrToIntSinkingRewriter + : public SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter> { + using Base = SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter>; + + public: + SCEVPtrToIntSinkingRewriter(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {} + + static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE) { + SCEVPtrToIntSinkingRewriter Rewriter(SE); + return Rewriter.visit(Scev); + } + + const SCEV *visit(const SCEV *S) { + Type *STy = S->getType(); + // If the expression is not pointer-typed, just keep it as-is. + if (!STy->isPointerTy()) + return S; + // Else, recursively sink the cast down into it. + return Base::visit(S); + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + SmallVector<const SCEV *, 2> Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getAddExpr(Operands, Expr->getNoWrapFlags()); + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + SmallVector<const SCEV *, 2> Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getMulExpr(Operands, Expr->getNoWrapFlags()); + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + Type *ExprPtrTy = Expr->getType(); + assert(ExprPtrTy->isPointerTy() && + "Should only reach pointer-typed SCEVUnknown's."); + Type *ExprIntPtrTy = SE.getDataLayout().getIntPtrType(ExprPtrTy); + return SE.getPtrToIntExpr(Expr, ExprIntPtrTy, /*Depth=*/1); + } + }; + + // And actually perform the cast sinking. + const SCEV *IntOp = SCEVPtrToIntSinkingRewriter::rewrite(Op, *this); + assert(IntOp->getType()->isIntegerTy() && + "We must have succeeded in sinking the cast, " + "and ending up with an integer-typed expression!"); + return getTruncateOrZeroExtend(IntOp, Ty); +} + const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && @@ -1194,8 +1194,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2; ++i) { const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1); - if (!isa<SCEVIntegralCastExpr>(CommOp->getOperand(i)) && - isa<SCEVTruncateExpr>(S)) + if (!isa<SCEVIntegralCastExpr>(CommOp->getOperand(i)) && + isa<SCEVTruncateExpr>(S)) numTruncs++; Operands.push_back(S); } @@ -1222,11 +1222,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } - // Return zero if truncating to known zeros. - uint32_t MinTrailingZeros = GetMinTrailingZeros(Op); - if (MinTrailingZeros >= getTypeSizeInBits(Ty)) - return getZero(Ty); - + // Return zero if truncating to known zeros. + uint32_t MinTrailingZeros = GetMinTrailingZeros(Op); + if (MinTrailingZeros >= getTypeSizeInBits(Ty)) + return getZero(Ty); + // The cast wasn't folded; create an explicit cast node. We can reuse // the existing insert position since if we get here, we won't have // made any changes which would invalidate it. @@ -1387,7 +1387,7 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. - SE->setNoWrapFlags(const_cast<SCEVAddRecExpr *>(PreAR), WrapType); + SE->setNoWrapFlags(const_cast<SCEVAddRecExpr *>(PreAR), WrapType); } return PreStart; } @@ -1591,7 +1591,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { if (!AR->hasNoUnsignedWrap()) { auto NewFlags = proveNoWrapViaConstantRanges(AR); - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags); } // If we have special knowledge that this addrec won't overflow, @@ -1611,7 +1611,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // that value once it has finished. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa<SCEVCouldNotCompute>(MaxBECount)) { - // Manually compute the final value for AR, checking for overflow. + // Manually compute the final value for AR, checking for overflow. // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. @@ -1639,7 +1639,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV::FlagAnyWrap, Depth + 1); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, @@ -1658,7 +1658,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NW, which is propagated to this AddRec. // Negative step causes unsigned wrap, but it still can't self-wrap. - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, @@ -1678,24 +1678,24 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // doing extra work that may not pay off. if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards || !AC.assumptions().empty()) { - - auto NewFlags = proveNoUnsignedWrapViaInduction(AR); - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags); - if (AR->hasNoUnsignedWrap()) { - // Same as nuw case above - duplicated here to avoid a compile time - // issue. It's not clear that the order of checks does matter, but - // it's one of two issue possible causes for a change which was - // reverted. Be conservative for the moment. - return getAddRecExpr( + + auto NewFlags = proveNoUnsignedWrapViaInduction(AR); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags); + if (AR->hasNoUnsignedWrap()) { + // Same as nuw case above - duplicated here to avoid a compile time + // issue. It's not clear that the order of checks does matter, but + // it's one of two issue possible causes for a change which was + // reverted. Be conservative for the moment. + return getAddRecExpr( getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1), getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); - } - - // For a negative step, we can extend the operands iff doing so only - // traverses values in the range zext([0,UINT_MAX]). - if (isKnownNegative(Step)) { + } + + // For a negative step, we can extend the operands iff doing so only + // traverses values in the range zext([0,UINT_MAX]). + if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - getSignedRangeMin(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || @@ -1703,7 +1703,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // Cache knowledge of AR NW, which is propagated to this // AddRec. Negative step causes unsigned wrap, but it // still can't self-wrap. - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, @@ -1732,7 +1732,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { } if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) { - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW); return getAddRecExpr( getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1), getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); @@ -1931,7 +1931,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { if (!AR->hasNoSignedWrap()) { auto NewFlags = proveNoWrapViaConstantRanges(AR); - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags); } // If we have special knowledge that this addrec won't overflow, @@ -1980,7 +1980,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV::FlagAnyWrap, Depth + 1); if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, @@ -2005,7 +2005,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> // (SAdd == OperandExtendedAdd => AR is NW) - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( @@ -2017,16 +2017,16 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { } } - auto NewFlags = proveNoSignedWrapViaInduction(AR); - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags); - if (AR->hasNoSignedWrap()) { - // Same as nsw case above - duplicated here to avoid a compile time - // issue. It's not clear that the order of checks does matter, but - // it's one of two issue possible causes for a change which was - // reverted. Be conservative for the moment. - return getAddRecExpr( - getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), - getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); + auto NewFlags = proveNoSignedWrapViaInduction(AR); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags); + if (AR->hasNoSignedWrap()) { + // Same as nsw case above - duplicated here to avoid a compile time + // issue. It's not clear that the order of checks does matter, but + // it's one of two issue possible causes for a change which was + // reverted. Be conservative for the moment. + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), + getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw> @@ -2047,7 +2047,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { } if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) { - setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW); return getAddRecExpr( getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); @@ -2177,7 +2177,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, } else { // A multiplication of a constant with some other value. Update // the map. - SmallVector<const SCEV *, 4> MulOps(drop_begin(Mul->operands())); + SmallVector<const SCEV *, 4> MulOps(drop_begin(Mul->operands())); const SCEV *Key = SE.getMulExpr(MulOps); auto Pair = M.insert({Key, NewScale}); if (Pair.second) { @@ -2281,9 +2281,9 @@ bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, - SCEV::NoWrapFlags OrigFlags, + SCEV::NoWrapFlags OrigFlags, unsigned Depth) { - assert(!(OrigFlags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && + assert(!(OrigFlags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; @@ -2319,20 +2319,20 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (Ops.size() == 1) return Ops[0]; } - // Delay expensive flag strengthening until necessary. - auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) { - return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags); - }; - + // Delay expensive flag strengthening until necessary. + auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) { + return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags); + }; + // Limit recursion calls depth. if (Depth > MaxArithDepth || hasHugeExpression(Ops)) - return getOrCreateAddExpr(Ops, ComputeFlags(Ops)); + return getOrCreateAddExpr(Ops, ComputeFlags(Ops)); if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) { - // Don't strengthen flags if we have no new information. - SCEVAddExpr *Add = static_cast<SCEVAddExpr *>(S); - if (Add->getNoWrapFlags(OrigFlags) != OrigFlags) - Add->setNoWrapFlags(ComputeFlags(Ops)); + // Don't strengthen flags if we have no new information. + SCEVAddExpr *Add = static_cast<SCEVAddExpr *>(S); + if (Add->getNoWrapFlags(OrigFlags) != OrigFlags) + Add->setNoWrapFlags(ComputeFlags(Ops)); return S; } @@ -2358,7 +2358,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, FoundMatch = true; } if (FoundMatch) - return getAddExpr(Ops, OrigFlags, Depth + 1); + return getAddExpr(Ops, OrigFlags, Depth + 1); // Check for truncates. If all the operands are truncated from the same // type, see if factoring out the truncate would permit the result to be @@ -2593,16 +2593,16 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { - // Compute nowrap flags for the addition of the loop-invariant ops and - // the addrec. Temporarily push it as an operand for that purpose. - LIOps.push_back(AddRec); - SCEV::NoWrapFlags Flags = ComputeFlags(LIOps); - LIOps.pop_back(); - + // Compute nowrap flags for the addition of the loop-invariant ops and + // the addrec. Temporarily push it as an operand for that purpose. + LIOps.push_back(AddRec); + SCEV::NoWrapFlags Flags = ComputeFlags(LIOps); + LIOps.pop_back(); + // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} LIOps.push_back(AddRec->getStart()); - SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands()); + SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands()); // This follows from the fact that the no-wrap flags on the outer add // expression are applicable on the 0th iteration, when the add recurrence // will be equal to its start value. @@ -2640,7 +2640,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, "AddRecExprs are not sorted in reverse dominance order?"); if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> - SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands()); + SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands()); for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); ++OtherIdx) { const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); @@ -2671,7 +2671,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // Okay, it looks like we really DO need an add expr. Check to see if we // already have one, otherwise create a new one. - return getOrCreateAddExpr(Ops, ComputeFlags(Ops)); + return getOrCreateAddExpr(Ops, ComputeFlags(Ops)); } const SCEV * @@ -2715,7 +2715,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops, UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); } - setNoWrapFlags(S, Flags); + setNoWrapFlags(S, Flags); return S; } @@ -2797,9 +2797,9 @@ static bool containsConstantInAddMulChain(const SCEV *StartExpr) { /// Get a canonical multiply expression, or something simpler if possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, - SCEV::NoWrapFlags OrigFlags, + SCEV::NoWrapFlags OrigFlags, unsigned Depth) { - assert(OrigFlags == maskFlags(OrigFlags, SCEV::FlagNUW | SCEV::FlagNSW) && + assert(OrigFlags == maskFlags(OrigFlags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); if (Ops.size() == 1) return Ops[0]; @@ -2813,52 +2813,52 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); - // If there are any constants, fold them together. - unsigned Idx = 0; - if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { - ++Idx; - assert(Idx < Ops.size()); - while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { - // We found two constants, fold them together! - Ops[0] = getConstant(LHSC->getAPInt() * RHSC->getAPInt()); - if (Ops.size() == 2) return Ops[0]; - Ops.erase(Ops.begin()+1); // Erase the folded element - LHSC = cast<SCEVConstant>(Ops[0]); - } - - // If we have a multiply of zero, it will always be zero. - if (LHSC->getValue()->isZero()) - return LHSC; - - // If we are left with a constant one being multiplied, strip it off. - if (LHSC->getValue()->isOne()) { - Ops.erase(Ops.begin()); - --Idx; - } - - if (Ops.size() == 1) - return Ops[0]; - } - - // Delay expensive flag strengthening until necessary. - auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) { - return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags); - }; - - // Limit recursion calls depth. - if (Depth > MaxArithDepth || hasHugeExpression(Ops)) - return getOrCreateMulExpr(Ops, ComputeFlags(Ops)); - + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + Ops[0] = getConstant(LHSC->getAPInt() * RHSC->getAPInt()); + if (Ops.size() == 2) return Ops[0]; + Ops.erase(Ops.begin()+1); // Erase the folded element + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we have a multiply of zero, it will always be zero. + if (LHSC->getValue()->isZero()) + return LHSC; + + // If we are left with a constant one being multiplied, strip it off. + if (LHSC->getValue()->isOne()) { + Ops.erase(Ops.begin()); + --Idx; + } + + if (Ops.size() == 1) + return Ops[0]; + } + + // Delay expensive flag strengthening until necessary. + auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) { + return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags); + }; + + // Limit recursion calls depth. + if (Depth > MaxArithDepth || hasHugeExpression(Ops)) + return getOrCreateMulExpr(Ops, ComputeFlags(Ops)); + if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) { - // Don't strengthen flags if we have no new information. - SCEVMulExpr *Mul = static_cast<SCEVMulExpr *>(S); - if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags) - Mul->setNoWrapFlags(ComputeFlags(Ops)); + // Don't strengthen flags if we have no new information. + SCEVMulExpr *Mul = static_cast<SCEVMulExpr *>(S); + if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags) + Mul->setNoWrapFlags(ComputeFlags(Ops)); return S; } if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { - if (Ops.size() == 2) { + if (Ops.size() == 2) { // C1*(C2+V) -> C1*C2 + C1*V if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) // If any of Add's ops are Adds or Muls with a constant, apply this @@ -2874,9 +2874,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); - if (Ops[0]->isAllOnesValue()) { - // If we have a mul by -1 of an add, try distributing the -1 among the - // add operands. + if (Ops[0]->isAllOnesValue()) { + // If we have a mul by -1 of an add, try distributing the -1 among the + // add operands. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { SmallVector<const SCEV *, 4> NewOps; bool AnyFolded = false; @@ -2961,9 +2961,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // // No self-wrap cannot be guaranteed after changing the step size, but // will be inferred if either NUW or NSW is true. - SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec}); - const SCEV *NewRec = getAddRecExpr( - NewOps, AddRecLoop, AddRec->getNoWrapFlags(Flags)); + SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec}); + const SCEV *NewRec = getAddRecExpr( + NewOps, AddRecLoop, AddRec->getNoWrapFlags(Flags)); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -3056,7 +3056,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // Okay, it looks like we really DO need an mul expr. Check to see if we // already have one, otherwise create a new one. - return getOrCreateMulExpr(Ops, ComputeFlags(Ops)); + return getOrCreateMulExpr(Ops, ComputeFlags(Ops)); } /// Represents an unsigned remainder expression based on unsigned division. @@ -3180,7 +3180,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, const SCEV *Op = M->getOperand(i); const SCEV *Div = getUDivExpr(Op, RHSC); if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { - Operands = SmallVector<const SCEV *, 4>(M->operands()); + Operands = SmallVector<const SCEV *, 4>(M->operands()); Operands[i] = Div; return getMulExpr(Operands); } @@ -3274,7 +3274,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, // first element of the mulexpr. if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) { if (LHSCst == RHSCst) { - SmallVector<const SCEV *, 2> Operands(drop_begin(Mul->operands())); + SmallVector<const SCEV *, 2> Operands(drop_begin(Mul->operands())); return getMulExpr(Operands); } @@ -3364,7 +3364,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) : (!NestedLoop->contains(L) && DT.dominates(L->getHeader(), NestedLoop->getHeader()))) { - SmallVector<const SCEV *, 4> NestedOperands(NestedAR->operands()); + SmallVector<const SCEV *, 4> NestedOperands(NestedAR->operands()); Operands[0] = NestedAR->getStart(); // AddRecs require their operands be loop-invariant with respect to their // loops. Don't perform this transformation if it would break this @@ -3417,12 +3417,12 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, // flow and the no-overflow bits may not be valid for the expression in any // context. This can be fixed similarly to how these flags are handled for // adds. - SCEV::NoWrapFlags OffsetWrap = - GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; + SCEV::NoWrapFlags OffsetWrap = + GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; Type *CurTy = GEP->getType(); bool FirstIter = true; - SmallVector<const SCEV *, 4> Offsets; + SmallVector<const SCEV *, 4> Offsets; for (const SCEV *IndexExpr : IndexExprs) { // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast<StructType>(CurTy)) { @@ -3430,7 +3430,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue(); unsigned FieldNo = Index->getZExtValue(); const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo); - Offsets.push_back(FieldOffset); + Offsets.push_back(FieldOffset); // Update CurTy to the type of the field at Index. CurTy = STy->getTypeAtIndex(Index); @@ -3450,27 +3450,27 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy); // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap); - Offsets.push_back(LocalOffset); + const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap); + Offsets.push_back(LocalOffset); } } - // Handle degenerate case of GEP without offsets. - if (Offsets.empty()) - return BaseExpr; - - // Add the offsets together, assuming nsw if inbounds. - const SCEV *Offset = getAddExpr(Offsets, OffsetWrap); - // Add the base address and the offset. We cannot use the nsw flag, as the - // base address is unsigned. However, if we know that the offset is - // non-negative, we can use nuw. - SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset) - ? SCEV::FlagNUW : SCEV::FlagAnyWrap; - return getAddExpr(BaseExpr, Offset, BaseWrap); + // Handle degenerate case of GEP without offsets. + if (Offsets.empty()) + return BaseExpr; + + // Add the offsets together, assuming nsw if inbounds. + const SCEV *Offset = getAddExpr(Offsets, OffsetWrap); + // Add the base address and the offset. We cannot use the nsw flag, as the + // base address is unsigned. However, if we know that the offset is + // non-negative, we can use nuw. + SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset) + ? SCEV::FlagNUW : SCEV::FlagAnyWrap; + return getAddExpr(BaseExpr, Offset, BaseWrap); } std::tuple<SCEV *, FoldingSetNodeID, void *> -ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType, +ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops) { FoldingSetNodeID ID; void *IP = nullptr; @@ -3481,17 +3481,17 @@ ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType, UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP); } -const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) { - SCEV::NoWrapFlags Flags = IsNSW ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - return getSMaxExpr(Op, getNegativeSCEV(Op, Flags)); -} - -const SCEV *ScalarEvolution::getSignumExpr(const SCEV *Op) { - Type *Ty = Op->getType(); - return getSMinExpr(getSMaxExpr(Op, getMinusOne(Ty)), getOne(Ty)); -} - -const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, +const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) { + SCEV::NoWrapFlags Flags = IsNSW ? SCEV::FlagNSW : SCEV::FlagAnyWrap; + return getSMaxExpr(Op, getNegativeSCEV(Op, Flags)); +} + +const SCEV *ScalarEvolution::getSignumExpr(const SCEV *Op) { + Type *Ty = Op->getType(); + return getSMinExpr(getSMaxExpr(Op, getMinusOne(Ty)), getOne(Ty)); +} + +const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); if (Ops.size() == 1) return Ops[0]; @@ -3615,8 +3615,8 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, return ExistingSCEV; const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = new (SCEVAllocator) - SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size()); + SCEV *S = new (SCEVAllocator) + SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); @@ -3661,42 +3661,42 @@ const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) { return getMinMaxExpr(scUMinExpr, Ops); } -const SCEV * -ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy, - ScalableVectorType *ScalableTy) { - Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo()); - Constant *One = ConstantInt::get(IntTy, 1); - Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One); - // Note that the expression we created is the final expression, we don't - // want to simplify it any further Also, if we call a normal getSCEV(), - // we'll end up in an endless recursion. So just create an SCEVUnknown. - return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy)); -} - +const SCEV * +ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy, + ScalableVectorType *ScalableTy) { + Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo()); + Constant *One = ConstantInt::get(IntTy, 1); + Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One); + // Note that the expression we created is the final expression, we don't + // want to simplify it any further Also, if we call a normal getSCEV(), + // we'll end up in an endless recursion. So just create an SCEVUnknown. + return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy)); +} + const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { - if (auto *ScalableAllocTy = dyn_cast<ScalableVectorType>(AllocTy)) - return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy); - // We can bypass creating a target-independent constant expression and then - // folding it back into a ConstantInt. This is just a compile-time - // optimization. + if (auto *ScalableAllocTy = dyn_cast<ScalableVectorType>(AllocTy)) + return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy); + // We can bypass creating a target-independent constant expression and then + // folding it back into a ConstantInt. This is just a compile-time + // optimization. return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); } -const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) { - if (auto *ScalableStoreTy = dyn_cast<ScalableVectorType>(StoreTy)) - return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy); - // We can bypass creating a target-independent constant expression and then - // folding it back into a ConstantInt. This is just a compile-time - // optimization. - return getConstant(IntTy, getDataLayout().getTypeStoreSize(StoreTy)); -} - +const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) { + if (auto *ScalableStoreTy = dyn_cast<ScalableVectorType>(StoreTy)) + return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy); + // We can bypass creating a target-independent constant expression and then + // folding it back into a ConstantInt. This is just a compile-time + // optimization. + return getConstant(IntTy, getDataLayout().getTypeStoreSize(StoreTy)); +} + const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo) { - // We can bypass creating a target-independent constant expression and then - // folding it back into a ConstantInt. This is just a compile-time - // optimization. + // We can bypass creating a target-independent constant expression and then + // folding it back into a ConstantInt. This is just a compile-time + // optimization. return getConstant( IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); } @@ -3920,7 +3920,7 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - return getMulExpr(V, getMinusOne(Ty), Flags); + return getMulExpr(V, getMinusOne(Ty), Flags); } /// If Expr computes ~A, return A else return nullptr @@ -3954,8 +3954,8 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { return (const SCEV *)nullptr; MatchedOperands.push_back(Matched); } - return getMinMaxExpr(SCEVMinMaxExpr::negate(MME->getSCEVType()), - MatchedOperands); + return getMinMaxExpr(SCEVMinMaxExpr::negate(MME->getSCEVType()), + MatchedOperands); }; if (const SCEV *Replaced = MatchMinMaxNegation(MME)) return Replaced; @@ -3963,7 +3963,7 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - return getMinusSCEV(getMinusOne(Ty), V); + return getMinusSCEV(getMinusOne(Ty), V); } const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, @@ -4110,7 +4110,7 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes( MaxType = getWiderType(MaxType, S->getType()); else MaxType = S->getType(); - assert(MaxType && "Failed to find maximum type!"); + assert(MaxType && "Failed to find maximum type!"); // Extend all ops to max type. SmallVector<const SCEV *, 2> PromotedOps; @@ -4127,7 +4127,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { return V; while (true) { - if (const SCEVIntegralCastExpr *Cast = dyn_cast<SCEVIntegralCastExpr>(V)) { + if (const SCEVIntegralCastExpr *Cast = dyn_cast<SCEVIntegralCastExpr>(V)) { V = Cast->getOperand(); } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { const SCEV *PtrOp = nullptr; @@ -4430,107 +4430,107 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { return Result; } -SCEV::NoWrapFlags -ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) { - SCEV::NoWrapFlags Result = AR->getNoWrapFlags(); - - if (AR->hasNoSignedWrap()) - return Result; - - if (!AR->isAffine()) - return Result; - - const SCEV *Step = AR->getStepRecurrence(*this); - const Loop *L = AR->getLoop(); - - // Check whether the backedge-taken count is SCEVCouldNotCompute. - // Note that this serves two purposes: It filters out loops that are - // simply not analyzable, and it covers the case where this code is - // being called from within backedge-taken count analysis, such that - // attempting to ask for the backedge-taken count would likely result - // in infinite recursion. In the later case, the analysis code will - // cope with a conservative value, and it will take care to purge - // that value once it has finished. - const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); - - // Normally, in the cases we can prove no-overflow via a - // backedge guarding condition, we can also compute a backedge - // taken count for the loop. The exceptions are assumptions and - // guards present in the loop -- SCEV is not great at exploiting - // these to compute max backedge taken counts, but can still use - // these to prove lack of overflow. Use this fact to avoid - // doing extra work that may not pay off. - - if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards && - AC.assumptions().empty()) - return Result; - - // If the backedge is guarded by a comparison with the pre-inc value the - // addrec is safe. Also, if the entry is guarded by a comparison with the - // start value and the backedge is guarded by a comparison with the post-inc - // value, the addrec is safe. - ICmpInst::Predicate Pred; - const SCEV *OverflowLimit = - getSignedOverflowLimitForStep(Step, &Pred, this); - if (OverflowLimit && - (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || - isKnownOnEveryIteration(Pred, AR, OverflowLimit))) { - Result = setFlags(Result, SCEV::FlagNSW); - } - return Result; -} -SCEV::NoWrapFlags -ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) { - SCEV::NoWrapFlags Result = AR->getNoWrapFlags(); - - if (AR->hasNoUnsignedWrap()) - return Result; - - if (!AR->isAffine()) - return Result; - - const SCEV *Step = AR->getStepRecurrence(*this); - unsigned BitWidth = getTypeSizeInBits(AR->getType()); - const Loop *L = AR->getLoop(); - - // Check whether the backedge-taken count is SCEVCouldNotCompute. - // Note that this serves two purposes: It filters out loops that are - // simply not analyzable, and it covers the case where this code is - // being called from within backedge-taken count analysis, such that - // attempting to ask for the backedge-taken count would likely result - // in infinite recursion. In the later case, the analysis code will - // cope with a conservative value, and it will take care to purge - // that value once it has finished. - const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); - - // Normally, in the cases we can prove no-overflow via a - // backedge guarding condition, we can also compute a backedge - // taken count for the loop. The exceptions are assumptions and - // guards present in the loop -- SCEV is not great at exploiting - // these to compute max backedge taken counts, but can still use - // these to prove lack of overflow. Use this fact to avoid - // doing extra work that may not pay off. - - if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards && - AC.assumptions().empty()) - return Result; - - // If the backedge is guarded by a comparison with the pre-inc value the - // addrec is safe. Also, if the entry is guarded by a comparison with the - // start value and the backedge is guarded by a comparison with the post-inc - // value, the addrec is safe. - if (isKnownPositive(Step)) { - const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - - getUnsignedRangeMax(Step)); - if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || - isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) { - Result = setFlags(Result, SCEV::FlagNUW); - } - } - - return Result; -} - +SCEV::NoWrapFlags +ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) { + SCEV::NoWrapFlags Result = AR->getNoWrapFlags(); + + if (AR->hasNoSignedWrap()) + return Result; + + if (!AR->isAffine()) + return Result; + + const SCEV *Step = AR->getStepRecurrence(*this); + const Loop *L = AR->getLoop(); + + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); + + // Normally, in the cases we can prove no-overflow via a + // backedge guarding condition, we can also compute a backedge + // taken count for the loop. The exceptions are assumptions and + // guards present in the loop -- SCEV is not great at exploiting + // these to compute max backedge taken counts, but can still use + // these to prove lack of overflow. Use this fact to avoid + // doing extra work that may not pay off. + + if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards && + AC.assumptions().empty()) + return Result; + + // If the backedge is guarded by a comparison with the pre-inc value the + // addrec is safe. Also, if the entry is guarded by a comparison with the + // start value and the backedge is guarded by a comparison with the post-inc + // value, the addrec is safe. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = + getSignedOverflowLimitForStep(Step, &Pred, this); + if (OverflowLimit && + (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || + isKnownOnEveryIteration(Pred, AR, OverflowLimit))) { + Result = setFlags(Result, SCEV::FlagNSW); + } + return Result; +} +SCEV::NoWrapFlags +ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) { + SCEV::NoWrapFlags Result = AR->getNoWrapFlags(); + + if (AR->hasNoUnsignedWrap()) + return Result; + + if (!AR->isAffine()) + return Result; + + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); + + // Normally, in the cases we can prove no-overflow via a + // backedge guarding condition, we can also compute a backedge + // taken count for the loop. The exceptions are assumptions and + // guards present in the loop -- SCEV is not great at exploiting + // these to compute max backedge taken counts, but can still use + // these to prove lack of overflow. Use this fact to avoid + // doing extra work that may not pay off. + + if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards && + AC.assumptions().empty()) + return Result; + + // If the backedge is guarded by a comparison with the pre-inc value the + // addrec is safe. Also, if the entry is guarded by a comparison with the + // start value and the backedge is guarded by a comparison with the post-inc + // value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - + getUnsignedRangeMax(Step)); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || + isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) { + Result = setFlags(Result, SCEV::FlagNUW); + } + } + + return Result; +} + namespace { /// Represents an abstract binary operation. This may exist as a @@ -4542,7 +4542,7 @@ struct BinaryOp { Value *RHS; bool IsNSW = false; bool IsNUW = false; - bool IsExact = false; + bool IsExact = false; /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or /// constant expression. @@ -4555,14 +4555,14 @@ struct BinaryOp { IsNSW = OBO->hasNoSignedWrap(); IsNUW = OBO->hasNoUnsignedWrap(); } - if (auto *PEO = dyn_cast<PossiblyExactOperator>(Op)) - IsExact = PEO->isExact(); + if (auto *PEO = dyn_cast<PossiblyExactOperator>(Op)) + IsExact = PEO->isExact(); } explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false, - bool IsNUW = false, bool IsExact = false) - : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW), - IsExact(IsExact) {} + bool IsNUW = false, bool IsExact = false) + : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW), + IsExact(IsExact) {} }; } // end anonymous namespace @@ -5259,15 +5259,15 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, bool follow(const SCEV *S) { switch (S->getSCEVType()) { - case scConstant: - case scPtrToInt: - case scTruncate: - case scZeroExtend: - case scSignExtend: - case scAddExpr: - case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: + case scConstant: + case scPtrToInt: + case scTruncate: + case scZeroExtend: + case scSignExtend: + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: case scUMinExpr: case scSMinExpr: // These expressions are available if their operand(s) is/are. @@ -5305,7 +5305,7 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, // We do not try to smart about these at all. return setUnavailable(); } - llvm_unreachable("Unknown SCEV kind!"); + llvm_unreachable("Unknown SCEV kind!"); } bool isDone() { return TraversalDone; } @@ -5525,9 +5525,9 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) return C->getAPInt().countTrailingZeros(); - if (const SCEVPtrToIntExpr *I = dyn_cast<SCEVPtrToIntExpr>(S)) - return GetMinTrailingZeros(I->getOperand()); - + if (const SCEVPtrToIntExpr *I = dyn_cast<SCEVPtrToIntExpr>(S)) + return GetMinTrailingZeros(I->getOperand()); + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) return std::min(GetMinTrailingZeros(T->getOperand()), (uint32_t)getTypeSizeInBits(T->getType())); @@ -5619,15 +5619,15 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { return None; } -void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec, - SCEV::NoWrapFlags Flags) { - if (AddRec->getNoWrapFlags(Flags) != Flags) { - AddRec->setNoWrapFlags(Flags); - UnsignedRanges.erase(AddRec); - SignedRanges.erase(AddRec); - } -} - +void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec, + SCEV::NoWrapFlags Flags) { + if (AddRec->getNoWrapFlags(Flags) != Flags) { + AddRec->setNoWrapFlags(Flags); + UnsignedRanges.erase(AddRec); + SignedRanges.erase(AddRec); + } +} + /// Determine the range for a particular SCEV. If SignHint is /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges /// with a "cleaner" unsigned (resp. signed) representation. @@ -5742,11 +5742,11 @@ ScalarEvolution::getRangeRef(const SCEV *S, RangeType)); } - if (const SCEVPtrToIntExpr *PtrToInt = dyn_cast<SCEVPtrToIntExpr>(S)) { - ConstantRange X = getRangeRef(PtrToInt->getOperand(), SignHint); - return setRange(PtrToInt, SignHint, X); - } - + if (const SCEVPtrToIntExpr *PtrToInt = dyn_cast<SCEVPtrToIntExpr>(S)) { + ConstantRange X = getRangeRef(PtrToInt->getOperand(), SignHint); + return setRange(PtrToInt, SignHint, X); + } + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint); return setRange(Trunc, SignHint, @@ -5799,28 +5799,28 @@ ScalarEvolution::getRangeRef(const SCEV *S, auto RangeFromAffine = getRangeForAffineAR( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, BitWidth); - ConservativeResult = - ConservativeResult.intersectWith(RangeFromAffine, RangeType); + ConservativeResult = + ConservativeResult.intersectWith(RangeFromAffine, RangeType); auto RangeFromFactoring = getRangeViaFactoring( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, BitWidth); - ConservativeResult = - ConservativeResult.intersectWith(RangeFromFactoring, RangeType); - } - - // Now try symbolic BE count and more powerful methods. - if (UseExpensiveRangeSharpening) { - const SCEV *SymbolicMaxBECount = - getSymbolicMaxBackedgeTakenCount(AddRec->getLoop()); - if (!isa<SCEVCouldNotCompute>(SymbolicMaxBECount) && - getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && - AddRec->hasNoSelfWrap()) { - auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR( - AddRec, SymbolicMaxBECount, BitWidth, SignHint); + ConservativeResult = + ConservativeResult.intersectWith(RangeFromFactoring, RangeType); + } + + // Now try symbolic BE count and more powerful methods. + if (UseExpensiveRangeSharpening) { + const SCEV *SymbolicMaxBECount = + getSymbolicMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(SymbolicMaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && + AddRec->hasNoSelfWrap()) { + auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR( + AddRec, SymbolicMaxBECount, BitWidth, SignHint); ConservativeResult = - ConservativeResult.intersectWith(RangeFromAffineNew, RangeType); - } + ConservativeResult.intersectWith(RangeFromAffineNew, RangeType); + } } } @@ -5991,74 +5991,74 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, return SR.intersectWith(UR, ConstantRange::Smallest); } -ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR( - const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth, - ScalarEvolution::RangeSignHint SignHint) { - assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n"); - assert(AddRec->hasNoSelfWrap() && - "This only works for non-self-wrapping AddRecs!"); - const bool IsSigned = SignHint == HINT_RANGE_SIGNED; - const SCEV *Step = AddRec->getStepRecurrence(*this); - // Only deal with constant step to save compile time. - if (!isa<SCEVConstant>(Step)) - return ConstantRange::getFull(BitWidth); - // Let's make sure that we can prove that we do not self-wrap during - // MaxBECount iterations. We need this because MaxBECount is a maximum - // iteration count estimate, and we might infer nw from some exit for which we - // do not know max exit count (or any other side reasoning). - // TODO: Turn into assert at some point. - if (getTypeSizeInBits(MaxBECount->getType()) > - getTypeSizeInBits(AddRec->getType())) - return ConstantRange::getFull(BitWidth); - MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType()); - const SCEV *RangeWidth = getMinusOne(AddRec->getType()); - const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step)); - const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs); - if (!isKnownPredicateViaConstantRanges(ICmpInst::ICMP_ULE, MaxBECount, - MaxItersWithoutWrap)) - return ConstantRange::getFull(BitWidth); - - ICmpInst::Predicate LEPred = - IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; - ICmpInst::Predicate GEPred = - IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; - const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); - - // We know that there is no self-wrap. Let's take Start and End values and - // look at all intermediate values V1, V2, ..., Vn that IndVar takes during - // the iteration. They either lie inside the range [Min(Start, End), - // Max(Start, End)] or outside it: - // - // Case 1: RangeMin ... Start V1 ... VN End ... RangeMax; - // Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax; - // - // No self wrap flag guarantees that the intermediate values cannot be BOTH - // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that - // knowledge, let's try to prove that we are dealing with Case 1. It is so if - // Start <= End and step is positive, or Start >= End and step is negative. - const SCEV *Start = AddRec->getStart(); - ConstantRange StartRange = getRangeRef(Start, SignHint); - ConstantRange EndRange = getRangeRef(End, SignHint); - ConstantRange RangeBetween = StartRange.unionWith(EndRange); - // If they already cover full iteration space, we will know nothing useful - // even if we prove what we want to prove. - if (RangeBetween.isFullSet()) - return RangeBetween; - // Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax). - bool IsWrappedSet = IsSigned ? RangeBetween.isSignWrappedSet() - : RangeBetween.isWrappedSet(); - if (IsWrappedSet) - return ConstantRange::getFull(BitWidth); - - if (isKnownPositive(Step) && - isKnownPredicateViaConstantRanges(LEPred, Start, End)) - return RangeBetween; - else if (isKnownNegative(Step) && - isKnownPredicateViaConstantRanges(GEPred, Start, End)) - return RangeBetween; - return ConstantRange::getFull(BitWidth); -} - +ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR( + const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth, + ScalarEvolution::RangeSignHint SignHint) { + assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n"); + assert(AddRec->hasNoSelfWrap() && + "This only works for non-self-wrapping AddRecs!"); + const bool IsSigned = SignHint == HINT_RANGE_SIGNED; + const SCEV *Step = AddRec->getStepRecurrence(*this); + // Only deal with constant step to save compile time. + if (!isa<SCEVConstant>(Step)) + return ConstantRange::getFull(BitWidth); + // Let's make sure that we can prove that we do not self-wrap during + // MaxBECount iterations. We need this because MaxBECount is a maximum + // iteration count estimate, and we might infer nw from some exit for which we + // do not know max exit count (or any other side reasoning). + // TODO: Turn into assert at some point. + if (getTypeSizeInBits(MaxBECount->getType()) > + getTypeSizeInBits(AddRec->getType())) + return ConstantRange::getFull(BitWidth); + MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType()); + const SCEV *RangeWidth = getMinusOne(AddRec->getType()); + const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step)); + const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs); + if (!isKnownPredicateViaConstantRanges(ICmpInst::ICMP_ULE, MaxBECount, + MaxItersWithoutWrap)) + return ConstantRange::getFull(BitWidth); + + ICmpInst::Predicate LEPred = + IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + ICmpInst::Predicate GEPred = + IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + + // We know that there is no self-wrap. Let's take Start and End values and + // look at all intermediate values V1, V2, ..., Vn that IndVar takes during + // the iteration. They either lie inside the range [Min(Start, End), + // Max(Start, End)] or outside it: + // + // Case 1: RangeMin ... Start V1 ... VN End ... RangeMax; + // Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax; + // + // No self wrap flag guarantees that the intermediate values cannot be BOTH + // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that + // knowledge, let's try to prove that we are dealing with Case 1. It is so if + // Start <= End and step is positive, or Start >= End and step is negative. + const SCEV *Start = AddRec->getStart(); + ConstantRange StartRange = getRangeRef(Start, SignHint); + ConstantRange EndRange = getRangeRef(End, SignHint); + ConstantRange RangeBetween = StartRange.unionWith(EndRange); + // If they already cover full iteration space, we will know nothing useful + // even if we prove what we want to prove. + if (RangeBetween.isFullSet()) + return RangeBetween; + // Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax). + bool IsWrappedSet = IsSigned ? RangeBetween.isSignWrappedSet() + : RangeBetween.isWrappedSet(); + if (IsWrappedSet) + return ConstantRange::getFull(BitWidth); + + if (isKnownPositive(Step) && + isKnownPredicateViaConstantRanges(LEPred, Start, End)) + return RangeBetween; + else if (isKnownNegative(Step) && + isKnownPredicateViaConstantRanges(GEPred, Start, End)) + return RangeBetween; + return ConstantRange::getFull(BitWidth); +} + ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, @@ -6091,7 +6091,7 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, } // Peel off a cast operation - if (auto *SCast = dyn_cast<SCEVIntegralCastExpr>(S)) { + if (auto *SCast = dyn_cast<SCEVIntegralCastExpr>(S)) { CastOp = SCast->getSCEVType(); S = SCast->getOperand(); } @@ -6292,7 +6292,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { const Instruction *Poison = PoisonStack.pop_back_val(); for (auto *PoisonUser : Poison->users()) { - if (propagatesPoison(cast<Operator>(PoisonUser))) { + if (propagatesPoison(cast<Operator>(PoisonUser))) { if (Pushed.insert(cast<Instruction>(PoisonUser)).second) PoisonStack.push_back(cast<Instruction>(PoisonUser)); } else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) { @@ -6356,7 +6356,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) return getConstant(CI); else if (isa<ConstantPointerNull>(V)) - // FIXME: we shouldn't special-case null pointer constant. + // FIXME: we shouldn't special-case null pointer constant. return getZero(V->getType()); else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee()); @@ -6647,15 +6647,15 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { } } } - if (BO->IsExact) { - // Given exact arithmetic in-bounds right-shift by a constant, - // we can lower it into: (abs(x) EXACT/u (1<<C)) * signum(x) - const SCEV *X = getSCEV(BO->LHS); - const SCEV *AbsX = getAbsExpr(X, /*IsNSW=*/false); - APInt Mult = APInt::getOneBitSet(BitWidth, AShrAmt); - const SCEV *Div = getUDivExactExpr(AbsX, getConstant(Mult)); - return getMulExpr(Div, getSignumExpr(X), SCEV::FlagNSW); - } + if (BO->IsExact) { + // Given exact arithmetic in-bounds right-shift by a constant, + // we can lower it into: (abs(x) EXACT/u (1<<C)) * signum(x) + const SCEV *X = getSCEV(BO->LHS); + const SCEV *AbsX = getAbsExpr(X, /*IsNSW=*/false); + APInt Mult = APInt::getOneBitSet(BitWidth, AShrAmt); + const SCEV *Div = getUDivExactExpr(AbsX, getConstant(Mult)); + return getMulExpr(Div, getSignumExpr(X), SCEV::FlagNSW); + } break; } } @@ -6692,29 +6692,29 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getSCEV(U->getOperand(0)); break; - case Instruction::PtrToInt: { - // Pointer to integer cast is straight-forward, so do model it. - Value *Ptr = U->getOperand(0); - const SCEV *Op = getSCEV(Ptr); - Type *DstIntTy = U->getType(); - // SCEV doesn't have constant pointer expression type, but it supports - // nullptr constant (and only that one), which is modelled in SCEV as a - // zero integer constant. So just skip the ptrtoint cast for constants. - if (isa<SCEVConstant>(Op)) - return getTruncateOrZeroExtend(Op, DstIntTy); - Type *PtrTy = Ptr->getType(); - Type *IntPtrTy = getDataLayout().getIntPtrType(PtrTy); - // But only if effective SCEV (integer) type is wide enough to represent - // all possible pointer values. - if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(PtrTy)) != - getDataLayout().getTypeSizeInBits(IntPtrTy)) - return getUnknown(V); - return getPtrToIntExpr(Op, DstIntTy); - } - case Instruction::IntToPtr: - // Just don't deal with inttoptr casts. - return getUnknown(V); - + case Instruction::PtrToInt: { + // Pointer to integer cast is straight-forward, so do model it. + Value *Ptr = U->getOperand(0); + const SCEV *Op = getSCEV(Ptr); + Type *DstIntTy = U->getType(); + // SCEV doesn't have constant pointer expression type, but it supports + // nullptr constant (and only that one), which is modelled in SCEV as a + // zero integer constant. So just skip the ptrtoint cast for constants. + if (isa<SCEVConstant>(Op)) + return getTruncateOrZeroExtend(Op, DstIntTy); + Type *PtrTy = Ptr->getType(); + Type *IntPtrTy = getDataLayout().getIntPtrType(PtrTy); + // But only if effective SCEV (integer) type is wide enough to represent + // all possible pointer values. + if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(PtrTy)) != + getDataLayout().getTypeSizeInBits(IntPtrTy)) + return getUnknown(V); + return getPtrToIntExpr(Op, DstIntTy); + } + case Instruction::IntToPtr: + // Just don't deal with inttoptr casts. + return getUnknown(V); + case Instruction::SDiv: // If both operands are non-negative, this is just an udiv. if (isKnownNonNegative(getSCEV(U->getOperand(0))) && @@ -6749,45 +6749,45 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::Invoke: if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand()) return getSCEV(RV); - - if (auto *II = dyn_cast<IntrinsicInst>(U)) { - switch (II->getIntrinsicID()) { - case Intrinsic::abs: - return getAbsExpr( - getSCEV(II->getArgOperand(0)), - /*IsNSW=*/cast<ConstantInt>(II->getArgOperand(1))->isOne()); - case Intrinsic::umax: - return getUMaxExpr(getSCEV(II->getArgOperand(0)), - getSCEV(II->getArgOperand(1))); - case Intrinsic::umin: - return getUMinExpr(getSCEV(II->getArgOperand(0)), - getSCEV(II->getArgOperand(1))); - case Intrinsic::smax: - return getSMaxExpr(getSCEV(II->getArgOperand(0)), - getSCEV(II->getArgOperand(1))); - case Intrinsic::smin: - return getSMinExpr(getSCEV(II->getArgOperand(0)), - getSCEV(II->getArgOperand(1))); - case Intrinsic::usub_sat: { - const SCEV *X = getSCEV(II->getArgOperand(0)); - const SCEV *Y = getSCEV(II->getArgOperand(1)); - const SCEV *ClampedY = getUMinExpr(X, Y); - return getMinusSCEV(X, ClampedY, SCEV::FlagNUW); - } - case Intrinsic::uadd_sat: { - const SCEV *X = getSCEV(II->getArgOperand(0)); - const SCEV *Y = getSCEV(II->getArgOperand(1)); - const SCEV *ClampedX = getUMinExpr(X, getNotSCEV(Y)); - return getAddExpr(ClampedX, Y, SCEV::FlagNUW); - } - case Intrinsic::start_loop_iterations: - // A start_loop_iterations is just equivalent to the first operand for - // SCEV purposes. - return getSCEV(II->getArgOperand(0)); - default: - break; - } - } + + if (auto *II = dyn_cast<IntrinsicInst>(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::abs: + return getAbsExpr( + getSCEV(II->getArgOperand(0)), + /*IsNSW=*/cast<ConstantInt>(II->getArgOperand(1))->isOne()); + case Intrinsic::umax: + return getUMaxExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::umin: + return getUMinExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::smax: + return getSMaxExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::smin: + return getSMinExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::usub_sat: { + const SCEV *X = getSCEV(II->getArgOperand(0)); + const SCEV *Y = getSCEV(II->getArgOperand(1)); + const SCEV *ClampedY = getUMinExpr(X, Y); + return getMinusSCEV(X, ClampedY, SCEV::FlagNUW); + } + case Intrinsic::uadd_sat: { + const SCEV *X = getSCEV(II->getArgOperand(0)); + const SCEV *Y = getSCEV(II->getArgOperand(1)); + const SCEV *ClampedX = getUMinExpr(X, getNotSCEV(Y)); + return getAddExpr(ClampedX, Y, SCEV::FlagNUW); + } + case Intrinsic::start_loop_iterations: + // A start_loop_iterations is just equivalent to the first operand for + // SCEV purposes. + return getSCEV(II->getArgOperand(0)); + default: + break; + } + } break; } @@ -6820,9 +6820,9 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) { return 0; } -unsigned -ScalarEvolution::getSmallConstantTripCount(const Loop *L, - const BasicBlock *ExitingBlock) { +unsigned +ScalarEvolution::getSmallConstantTripCount(const Loop *L, + const BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!"); @@ -6859,7 +6859,7 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { /// that control exits the loop via ExitingBlock. unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, - const BasicBlock *ExitingBlock) { + const BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!"); @@ -6890,14 +6890,14 @@ ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, } const SCEV *ScalarEvolution::getExitCount(const Loop *L, - const BasicBlock *ExitingBlock, + const BasicBlock *ExitingBlock, ExitCountKind Kind) { switch (Kind) { case Exact: - case SymbolicMaximum: + case SymbolicMaximum: return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); case ConstantMaximum: - return getBackedgeTakenInfo(L).getConstantMax(ExitingBlock, this); + return getBackedgeTakenInfo(L).getConstantMax(ExitingBlock, this); }; llvm_unreachable("Invalid ExitCountKind!"); } @@ -6914,15 +6914,15 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, case Exact: return getBackedgeTakenInfo(L).getExact(L, this); case ConstantMaximum: - return getBackedgeTakenInfo(L).getConstantMax(this); - case SymbolicMaximum: - return getBackedgeTakenInfo(L).getSymbolicMax(L, this); + return getBackedgeTakenInfo(L).getConstantMax(this); + case SymbolicMaximum: + return getBackedgeTakenInfo(L).getSymbolicMax(L, this); }; llvm_unreachable("Invalid ExitCountKind!"); } bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { - return getBackedgeTakenInfo(L).isConstantMaxOrZero(this); + return getBackedgeTakenInfo(L).isConstantMaxOrZero(this); } /// Push PHI nodes in the header of the given loop onto the given Worklist. @@ -6952,7 +6952,7 @@ ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result); } -ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert an invalid entry for this loop. If the insertion // succeeds, proceed to actually compute a backedge-taken count and @@ -6976,11 +6976,11 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { const SCEV *BEExact = Result.getExact(L, this); if (BEExact != getCouldNotCompute()) { assert(isLoopInvariant(BEExact, L) && - isLoopInvariant(Result.getConstantMax(this), L) && + isLoopInvariant(Result.getConstantMax(this), L) && "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; - } else if (Result.getConstantMax(this) == getCouldNotCompute() && - isa<PHINode>(L->getHeader()->begin())) { + } else if (Result.getConstantMax(this) == getCouldNotCompute() && + isa<PHINode>(L->getHeader()->begin())) { // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; } @@ -7221,7 +7221,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, /// Get the exact not taken count for this loop exit. const SCEV * -ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, +ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) @@ -7230,8 +7230,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, return SE->getCouldNotCompute(); } -const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax( - const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { +const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax( + const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.MaxNotTaken; @@ -7239,32 +7239,32 @@ const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax( return SE->getCouldNotCompute(); } -/// getConstantMax - Get the constant max backedge taken count for the loop. +/// getConstantMax - Get the constant max backedge taken count for the loop. const SCEV * -ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const { +ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const { auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { return !ENT.hasAlwaysTruePredicate(); }; - if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getConstantMax()) + if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getConstantMax()) return SE->getCouldNotCompute(); - assert((isa<SCEVCouldNotCompute>(getConstantMax()) || - isa<SCEVConstant>(getConstantMax())) && + assert((isa<SCEVCouldNotCompute>(getConstantMax()) || + isa<SCEVConstant>(getConstantMax())) && "No point in having a non-constant max backedge taken count!"); - return getConstantMax(); -} - -const SCEV * -ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L, - ScalarEvolution *SE) { - if (!SymbolicMax) - SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L); - return SymbolicMax; -} - -bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero( - ScalarEvolution *SE) const { + return getConstantMax(); +} + +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L, + ScalarEvolution *SE) { + if (!SymbolicMax) + SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L); + return SymbolicMax; +} + +bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero( + ScalarEvolution *SE) const { auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { return !ENT.hasAlwaysTruePredicate(); }; @@ -7273,8 +7273,8 @@ bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero( bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, ScalarEvolution *SE) const { - if (getConstantMax() && getConstantMax() != SE->getCouldNotCompute() && - SE->hasOperand(getConstantMax(), S)) + if (getConstantMax() && getConstantMax() != SE->getCouldNotCompute() && + SE->hasOperand(getConstantMax(), S)) return true; for (auto &ENT : ExitNotTaken) @@ -7327,9 +7327,9 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( - ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> ExitCounts, - bool IsComplete, const SCEV *ConstantMax, bool MaxOrZero) - : ConstantMax(ConstantMax), IsComplete(IsComplete), MaxOrZero(MaxOrZero) { + ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> ExitCounts, + bool IsComplete, const SCEV *ConstantMax, bool MaxOrZero) + : ConstantMax(ConstantMax), IsComplete(IsComplete), MaxOrZero(MaxOrZero) { using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; ExitNotTaken.reserve(ExitCounts.size()); @@ -7349,8 +7349,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken, std::move(Predicate)); }); - assert((isa<SCEVCouldNotCompute>(ConstantMax) || - isa<SCEVConstant>(ConstantMax)) && + assert((isa<SCEVCouldNotCompute>(ConstantMax) || + isa<SCEVConstant>(ConstantMax)) && "No point in having a non-constant max backedge taken count!"); } @@ -7539,10 +7539,10 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsExit, bool AllowPredicates) { - // Handle BinOp conditions (And, Or). - if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp( - Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates)) - return *LimitFromBinOp; + // Handle BinOp conditions (And, Or). + if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp( + Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates)) + return *LimitFromBinOp; // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. @@ -7574,95 +7574,95 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( return computeExitCountExhaustively(L, ExitCond, ExitIfTrue); } -Optional<ScalarEvolution::ExitLimit> -ScalarEvolution::computeExitLimitFromCondFromBinOp( - ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, - bool ControlsExit, bool AllowPredicates) { - // Check if the controlling expression for this loop is an And or Or. - Value *Op0, *Op1; - bool IsAnd = false; - if (match(ExitCond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) - IsAnd = true; - else if (match(ExitCond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) - IsAnd = false; - else - return None; - - // EitherMayExit is true in these two cases: - // br (and Op0 Op1), loop, exit - // br (or Op0 Op1), exit, loop - bool EitherMayExit = IsAnd ^ ExitIfTrue; - ExitLimit EL0 = computeExitLimitFromCondCached(Cache, L, Op0, ExitIfTrue, - ControlsExit && !EitherMayExit, - AllowPredicates); - ExitLimit EL1 = computeExitLimitFromCondCached(Cache, L, Op1, ExitIfTrue, - ControlsExit && !EitherMayExit, - AllowPredicates); - - // Be robust against unsimplified IR for the form "op i1 X, NeutralElement" - const Constant *NeutralElement = ConstantInt::get(ExitCond->getType(), IsAnd); - if (isa<ConstantInt>(Op1)) - return Op1 == NeutralElement ? EL0 : EL1; - if (isa<ConstantInt>(Op0)) - return Op0 == NeutralElement ? EL1 : EL0; - - const SCEV *BECount = getCouldNotCompute(); - const SCEV *MaxBECount = getCouldNotCompute(); - if (EitherMayExit) { - // Both conditions must be same for the loop to continue executing. - // Choose the less conservative count. - // If ExitCond is a short-circuit form (select), using - // umin(EL0.ExactNotTaken, EL1.ExactNotTaken) is unsafe in general. - // To see the detailed examples, please see - // test/Analysis/ScalarEvolution/exit-count-select.ll - bool PoisonSafe = isa<BinaryOperator>(ExitCond); - if (!PoisonSafe) - // Even if ExitCond is select, we can safely derive BECount using both - // EL0 and EL1 in these cases: - // (1) EL0.ExactNotTaken is non-zero - // (2) EL1.ExactNotTaken is non-poison - // (3) EL0.ExactNotTaken is zero (BECount should be simply zero and - // it cannot be umin(0, ..)) - // The PoisonSafe assignment below is simplified and the assertion after - // BECount calculation fully guarantees the condition (3). - PoisonSafe = isa<SCEVConstant>(EL0.ExactNotTaken) || - isa<SCEVConstant>(EL1.ExactNotTaken); - if (EL0.ExactNotTaken != getCouldNotCompute() && - EL1.ExactNotTaken != getCouldNotCompute() && PoisonSafe) { - BECount = - getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); - - // If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form, - // it should have been simplified to zero (see the condition (3) above) - assert(!isa<BinaryOperator>(ExitCond) || !EL0.ExactNotTaken->isZero() || - BECount->isZero()); - } - if (EL0.MaxNotTaken == getCouldNotCompute()) - MaxBECount = EL1.MaxNotTaken; - else if (EL1.MaxNotTaken == getCouldNotCompute()) - MaxBECount = EL0.MaxNotTaken; - else - MaxBECount = getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); - } else { - // Both conditions must be same at the same time for the loop to exit. - // For now, be conservative. - if (EL0.ExactNotTaken == EL1.ExactNotTaken) - BECount = EL0.ExactNotTaken; - } - - // There are cases (e.g. PR26207) where computeExitLimitFromCond is able - // to be more aggressive when computing BECount than when computing - // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and - // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken - // to not. - if (isa<SCEVCouldNotCompute>(MaxBECount) && - !isa<SCEVCouldNotCompute>(BECount)) - MaxBECount = getConstant(getUnsignedRangeMax(BECount)); - - return ExitLimit(BECount, MaxBECount, false, - { &EL0.Predicates, &EL1.Predicates }); -} - +Optional<ScalarEvolution::ExitLimit> +ScalarEvolution::computeExitLimitFromCondFromBinOp( + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, + bool ControlsExit, bool AllowPredicates) { + // Check if the controlling expression for this loop is an And or Or. + Value *Op0, *Op1; + bool IsAnd = false; + if (match(ExitCond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) + IsAnd = true; + else if (match(ExitCond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) + IsAnd = false; + else + return None; + + // EitherMayExit is true in these two cases: + // br (and Op0 Op1), loop, exit + // br (or Op0 Op1), exit, loop + bool EitherMayExit = IsAnd ^ ExitIfTrue; + ExitLimit EL0 = computeExitLimitFromCondCached(Cache, L, Op0, ExitIfTrue, + ControlsExit && !EitherMayExit, + AllowPredicates); + ExitLimit EL1 = computeExitLimitFromCondCached(Cache, L, Op1, ExitIfTrue, + ControlsExit && !EitherMayExit, + AllowPredicates); + + // Be robust against unsimplified IR for the form "op i1 X, NeutralElement" + const Constant *NeutralElement = ConstantInt::get(ExitCond->getType(), IsAnd); + if (isa<ConstantInt>(Op1)) + return Op1 == NeutralElement ? EL0 : EL1; + if (isa<ConstantInt>(Op0)) + return Op0 == NeutralElement ? EL1 : EL0; + + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + if (EitherMayExit) { + // Both conditions must be same for the loop to continue executing. + // Choose the less conservative count. + // If ExitCond is a short-circuit form (select), using + // umin(EL0.ExactNotTaken, EL1.ExactNotTaken) is unsafe in general. + // To see the detailed examples, please see + // test/Analysis/ScalarEvolution/exit-count-select.ll + bool PoisonSafe = isa<BinaryOperator>(ExitCond); + if (!PoisonSafe) + // Even if ExitCond is select, we can safely derive BECount using both + // EL0 and EL1 in these cases: + // (1) EL0.ExactNotTaken is non-zero + // (2) EL1.ExactNotTaken is non-poison + // (3) EL0.ExactNotTaken is zero (BECount should be simply zero and + // it cannot be umin(0, ..)) + // The PoisonSafe assignment below is simplified and the assertion after + // BECount calculation fully guarantees the condition (3). + PoisonSafe = isa<SCEVConstant>(EL0.ExactNotTaken) || + isa<SCEVConstant>(EL1.ExactNotTaken); + if (EL0.ExactNotTaken != getCouldNotCompute() && + EL1.ExactNotTaken != getCouldNotCompute() && PoisonSafe) { + BECount = + getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); + + // If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form, + // it should have been simplified to zero (see the condition (3) above) + assert(!isa<BinaryOperator>(ExitCond) || !EL0.ExactNotTaken->isZero() || + BECount->isZero()); + } + if (EL0.MaxNotTaken == getCouldNotCompute()) + MaxBECount = EL1.MaxNotTaken; + else if (EL1.MaxNotTaken == getCouldNotCompute()) + MaxBECount = EL0.MaxNotTaken; + else + MaxBECount = getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); + } else { + // Both conditions must be same at the same time for the loop to exit. + // For now, be conservative. + if (EL0.ExactNotTaken == EL1.ExactNotTaken) + BECount = EL0.ExactNotTaken; + } + + // There are cases (e.g. PR26207) where computeExitLimitFromCond is able + // to be more aggressive when computing BECount than when computing + // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and + // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken + // to not. + if (isa<SCEVCouldNotCompute>(MaxBECount) && + !isa<SCEVCouldNotCompute>(BECount)) + MaxBECount = getConstant(getUnsignedRangeMax(BECount)); + + return ExitLimit(BECount, MaxBECount, false, + { &EL0.Predicates, &EL1.Predicates }); +} + ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, @@ -8357,110 +8357,110 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { /// SCEVConstant, because SCEVConstant is restricted to ConstantInt. /// Returns NULL if the SCEV isn't representable as a Constant. static Constant *BuildConstantFromSCEV(const SCEV *V) { - switch (V->getSCEVType()) { - case scCouldNotCompute: - case scAddRecExpr: - return nullptr; - case scConstant: - return cast<SCEVConstant>(V)->getValue(); - case scUnknown: - return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue()); - case scSignExtend: { - const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V); - if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) - return ConstantExpr::getSExt(CastOp, SS->getType()); - return nullptr; - } - case scZeroExtend: { - const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V); - if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) - return ConstantExpr::getZExt(CastOp, SZ->getType()); - return nullptr; - } - case scPtrToInt: { - const SCEVPtrToIntExpr *P2I = cast<SCEVPtrToIntExpr>(V); - if (Constant *CastOp = BuildConstantFromSCEV(P2I->getOperand())) - return ConstantExpr::getPtrToInt(CastOp, P2I->getType()); - - return nullptr; - } - case scTruncate: { - const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V); - if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) - return ConstantExpr::getTrunc(CastOp, ST->getType()); - return nullptr; - } - case scAddExpr: { - const SCEVAddExpr *SA = cast<SCEVAddExpr>(V); - if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { - if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { - unsigned AS = PTy->getAddressSpace(); - Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); - C = ConstantExpr::getBitCast(C, DestPtrTy); - } - for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { - Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); - if (!C2) - return nullptr; - - // First pointer! - if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { - unsigned AS = C2->getType()->getPointerAddressSpace(); - std::swap(C, C2); + switch (V->getSCEVType()) { + case scCouldNotCompute: + case scAddRecExpr: + return nullptr; + case scConstant: + return cast<SCEVConstant>(V)->getValue(); + case scUnknown: + return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue()); + case scSignExtend: { + const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) + return ConstantExpr::getSExt(CastOp, SS->getType()); + return nullptr; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) + return ConstantExpr::getZExt(CastOp, SZ->getType()); + return nullptr; + } + case scPtrToInt: { + const SCEVPtrToIntExpr *P2I = cast<SCEVPtrToIntExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(P2I->getOperand())) + return ConstantExpr::getPtrToInt(CastOp, P2I->getType()); + + return nullptr; + } + case scTruncate: { + const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) + return ConstantExpr::getTrunc(CastOp, ST->getType()); + return nullptr; + } + case scAddExpr: { + const SCEVAddExpr *SA = cast<SCEVAddExpr>(V); + if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { + if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { + unsigned AS = PTy->getAddressSpace(); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); + C = ConstantExpr::getBitCast(C, DestPtrTy); + } + for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); + if (!C2) + return nullptr; + + // First pointer! + if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { + unsigned AS = C2->getType()->getPointerAddressSpace(); + std::swap(C, C2); Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); - // The offsets have been converted to bytes. We can add bytes to an - // i8* by GEP with the byte count in the first index. + // The offsets have been converted to bytes. We can add bytes to an + // i8* by GEP with the byte count in the first index. C = ConstantExpr::getBitCast(C, DestPtrTy); } - // Don't bother trying to sum two pointers. We probably can't - // statically compute a load that results from it anyway. - if (C2->getType()->isPointerTy()) - return nullptr; - - if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { - if (PTy->getElementType()->isStructTy()) - C2 = ConstantExpr::getIntegerCast( - C2, Type::getInt32Ty(C->getContext()), true); - C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); - } else - C = ConstantExpr::getAdd(C, C2); + // Don't bother trying to sum two pointers. We probably can't + // statically compute a load that results from it anyway. + if (C2->getType()->isPointerTy()) + return nullptr; + + if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { + if (PTy->getElementType()->isStructTy()) + C2 = ConstantExpr::getIntegerCast( + C2, Type::getInt32Ty(C->getContext()), true); + C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); + } else + C = ConstantExpr::getAdd(C, C2); } - return C; - } - return nullptr; - } - case scMulExpr: { - const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); - if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { - // Don't bother with pointers at all. - if (C->getType()->isPointerTy()) - return nullptr; - for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { - Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); - if (!C2 || C2->getType()->isPointerTy()) - return nullptr; - C = ConstantExpr::getMul(C, C2); + return C; + } + return nullptr; + } + case scMulExpr: { + const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); + if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { + // Don't bother with pointers at all. + if (C->getType()->isPointerTy()) + return nullptr; + for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); + if (!C2 || C2->getType()->isPointerTy()) + return nullptr; + C = ConstantExpr::getMul(C, C2); } - return C; - } - return nullptr; - } - case scUDivExpr: { - const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V); - if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) - if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) - if (LHS->getType() == RHS->getType()) - return ConstantExpr::getUDiv(LHS, RHS); - return nullptr; - } - case scSMaxExpr: - case scUMaxExpr: - case scSMinExpr: - case scUMinExpr: - return nullptr; // TODO: smax, umax, smin, umax. - } - llvm_unreachable("Unknown SCEV kind!"); + return C; + } + return nullptr; + } + case scUDivExpr: { + const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V); + if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) + if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) + if (LHS->getType() == RHS->getType()) + return ConstantExpr::getUDiv(LHS, RHS); + return nullptr; + } + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: + return nullptr; // TODO: smax, umax, smin, umax. + } + llvm_unreachable("Unknown SCEV kind!"); } const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { @@ -8471,22 +8471,22 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { if (PHINode *PN = dyn_cast<PHINode>(I)) { - const Loop *CurrLoop = this->LI[I->getParent()]; + const Loop *CurrLoop = this->LI[I->getParent()]; // Looking for loop exit value. - if (CurrLoop && CurrLoop->getParentLoop() == L && - PN->getParent() == CurrLoop->getHeader()) { + if (CurrLoop && CurrLoop->getParentLoop() == L && + PN->getParent() == CurrLoop->getHeader()) { // Okay, there is no closed form solution for the PHI node. Check // to see if the loop that contains it has a known backedge-taken // count. If so, we may be able to force computation of the exit // value. - const SCEV *BackedgeTakenCount = getBackedgeTakenCount(CurrLoop); + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(CurrLoop); // This trivial case can show up in some degenerate cases where // the incoming IR has not yet been fully simplified. if (BackedgeTakenCount->isZero()) { Value *InitValue = nullptr; bool MultipleInitValues = false; for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { - if (!CurrLoop->contains(PN->getIncomingBlock(i))) { + if (!CurrLoop->contains(PN->getIncomingBlock(i))) { if (!InitValue) InitValue = PN->getIncomingValue(i); else if (InitValue != PN->getIncomingValue(i)) { @@ -8504,18 +8504,18 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { isKnownPositive(BackedgeTakenCount) && PN->getNumIncomingValues() == 2) { - unsigned InLoopPred = - CurrLoop->contains(PN->getIncomingBlock(0)) ? 0 : 1; + unsigned InLoopPred = + CurrLoop->contains(PN->getIncomingBlock(0)) ? 0 : 1; Value *BackedgeVal = PN->getIncomingValue(InLoopPred); - if (CurrLoop->isLoopInvariant(BackedgeVal)) + if (CurrLoop->isLoopInvariant(BackedgeVal)) return getSCEV(BackedgeVal); } if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. - Constant *RV = getConstantEvolutionLoopExitValue( - PN, BTCC->getAPInt(), CurrLoop); + Constant *RV = getConstantEvolutionLoopExitValue( + PN, BTCC->getAPInt(), CurrLoop); if (RV) return getSCEV(RV); } } @@ -8571,10 +8571,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], DL, &TLI); - else if (const LoadInst *Load = dyn_cast<LoadInst>(I)) { - if (!Load->isVolatile()) - C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(), - DL); + else if (const LoadInst *Load = dyn_cast<LoadInst>(I)) { + if (!Load->isVolatile()) + C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(), + DL); } else C = ConstantFoldInstOperands(I, Operands, DL, &TLI); if (!C) return V; @@ -8691,13 +8691,13 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { return getTruncateExpr(Op, Cast->getType()); } - if (const SCEVPtrToIntExpr *Cast = dyn_cast<SCEVPtrToIntExpr>(V)) { - const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); - if (Op == Cast->getOperand()) - return Cast; // must be loop invariant - return getPtrToIntExpr(Op, Cast->getType()); - } - + if (const SCEVPtrToIntExpr *Cast = dyn_cast<SCEVPtrToIntExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getPtrToIntExpr(Op, Cast->getType()); + } + llvm_unreachable("Unknown SCEV type!"); } @@ -9112,10 +9112,10 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { - APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L)); - APInt MaxBECountBase = getUnsignedRangeMax(Distance); - if (MaxBECountBase.ult(MaxBECount)) - MaxBECount = MaxBECountBase; + APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L)); + APInt MaxBECountBase = getUnsignedRangeMax(Distance); + if (MaxBECountBase.ult(MaxBECount)) + MaxBECount = MaxBECountBase; // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, // we end up with a loop whose backedge-taken count is n - 1. Detect this @@ -9180,19 +9180,19 @@ ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { return getCouldNotCompute(); } -std::pair<const BasicBlock *, const BasicBlock *> -ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) - const { +std::pair<const BasicBlock *, const BasicBlock *> +ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) + const { // If the block has a unique predecessor, then there is no path from the // predecessor to the block that does not go through the direct edge // from the predecessor to the block. - if (const BasicBlock *Pred = BB->getSinglePredecessor()) + if (const BasicBlock *Pred = BB->getSinglePredecessor()) return {Pred, BB}; // A loop's header is defined to be a block that dominates the loop. // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. - if (const Loop *L = LI.getLoopFor(BB)) + if (const Loop *L = LI.getLoopFor(BB)) return {L->getLoopPredecessor(), L->getHeader()}; return {nullptr, nullptr}; @@ -9521,14 +9521,14 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS); } -bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS, - const Instruction *Context) { - // TODO: Analyze guards and assumes from Context's block. - return isKnownPredicate(Pred, LHS, RHS) || - isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS); -} - +bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const Instruction *Context) { + // TODO: Analyze guards and assumes from Context's block. + return isKnownPredicate(Pred, LHS, RHS) || + isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS); +} + bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, const SCEVAddRecExpr *LHS, const SCEV *RHS) { @@ -9537,30 +9537,30 @@ bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS); } -Optional<ScalarEvolution::MonotonicPredicateType> -ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS, - ICmpInst::Predicate Pred) { - auto Result = getMonotonicPredicateTypeImpl(LHS, Pred); +Optional<ScalarEvolution::MonotonicPredicateType> +ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred) { + auto Result = getMonotonicPredicateTypeImpl(LHS, Pred); #ifndef NDEBUG // Verify an invariant: inverting the predicate should turn a monotonically // increasing change to a monotonically decreasing one, and vice versa. - if (Result) { - auto ResultSwapped = - getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred)); + if (Result) { + auto ResultSwapped = + getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred)); - assert(ResultSwapped.hasValue() && "should be able to analyze both!"); - assert(ResultSwapped.getValue() != Result.getValue() && + assert(ResultSwapped.hasValue() && "should be able to analyze both!"); + assert(ResultSwapped.getValue() != Result.getValue() && "monotonicity should flip as we flip the predicate"); - } + } #endif return Result; } -Optional<ScalarEvolution::MonotonicPredicateType> -ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, - ICmpInst::Predicate Pred) { +Optional<ScalarEvolution::MonotonicPredicateType> +ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred) { // A zero step value for LHS means the induction variable is essentially a // loop invariant value. We don't really depend on the predicate actually // flipping from false to true (for increasing predicates, and the other way @@ -9571,46 +9571,46 @@ ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be // as general as possible. - // Only handle LE/LT/GE/GT predicates. - if (!ICmpInst::isRelational(Pred)) - return None; - - bool IsGreater = ICmpInst::isGE(Pred) || ICmpInst::isGT(Pred); - assert((IsGreater || ICmpInst::isLE(Pred) || ICmpInst::isLT(Pred)) && - "Should be greater or less!"); + // Only handle LE/LT/GE/GT predicates. + if (!ICmpInst::isRelational(Pred)) + return None; - // Check that AR does not wrap. - if (ICmpInst::isUnsigned(Pred)) { + bool IsGreater = ICmpInst::isGE(Pred) || ICmpInst::isGT(Pred); + assert((IsGreater || ICmpInst::isLE(Pred) || ICmpInst::isLT(Pred)) && + "Should be greater or less!"); + + // Check that AR does not wrap. + if (ICmpInst::isUnsigned(Pred)) { if (!LHS->hasNoUnsignedWrap()) - return None; - return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; - } else { - assert(ICmpInst::isSigned(Pred) && - "Relational predicate is either signed or unsigned!"); + return None; + return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; + } else { + assert(ICmpInst::isSigned(Pred) && + "Relational predicate is either signed or unsigned!"); if (!LHS->hasNoSignedWrap()) - return None; + return None; const SCEV *Step = LHS->getStepRecurrence(*this); - if (isKnownNonNegative(Step)) - return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; + if (isKnownNonNegative(Step)) + return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; - if (isKnownNonPositive(Step)) - return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; + if (isKnownNonPositive(Step)) + return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; - return None; + return None; } } -Optional<ScalarEvolution::LoopInvariantPredicate> -ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS, - const Loop *L) { +Optional<ScalarEvolution::LoopInvariantPredicate> +ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const Loop *L) { // If there is a loop-invariant, force it into the RHS, otherwise bail out. if (!isLoopInvariant(RHS, L)) { if (!isLoopInvariant(LHS, L)) - return None; + return None; std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); @@ -9618,11 +9618,11 @@ ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS); if (!ArLHS || ArLHS->getLoop() != L) - return None; + return None; - auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred); - if (!MonotonicType) - return None; + auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred); + if (!MonotonicType) + return None; // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to // true as the loop iterates, and the backedge is control dependent on // "ArLHS `Pred` RHS" == true then we can reason as follows: @@ -9640,79 +9640,79 @@ ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred, // // A similar reasoning applies for a monotonically decreasing predicate, by // replacing true with false and false with true in the above two bullets. - bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing; + bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing; auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) - return None; - - return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS); -} - -Optional<ScalarEvolution::LoopInvariantPredicate> -ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations( - ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, - const Instruction *Context, const SCEV *MaxIter) { - // Try to prove the following set of facts: - // - The predicate is monotonic in the iteration space. - // - If the check does not fail on the 1st iteration: - // - No overflow will happen during first MaxIter iterations; - // - It will not fail on the MaxIter'th iteration. - // If the check does fail on the 1st iteration, we leave the loop and no - // other checks matter. - - // If there is a loop-invariant, force it into the RHS, otherwise bail out. - if (!isLoopInvariant(RHS, L)) { - if (!isLoopInvariant(LHS, L)) - return None; - - std::swap(LHS, RHS); - Pred = ICmpInst::getSwappedPredicate(Pred); - } - - auto *AR = dyn_cast<SCEVAddRecExpr>(LHS); - if (!AR || AR->getLoop() != L) - return None; - - // The predicate must be relational (i.e. <, <=, >=, >). - if (!ICmpInst::isRelational(Pred)) - return None; - - // TODO: Support steps other than +/- 1. - const SCEV *Step = AR->getStepRecurrence(*this); - auto *One = getOne(Step->getType()); - auto *MinusOne = getNegativeSCEV(One); - if (Step != One && Step != MinusOne) - return None; - - // Type mismatch here means that MaxIter is potentially larger than max - // unsigned value in start type, which mean we cannot prove no wrap for the - // indvar. - if (AR->getType() != MaxIter->getType()) - return None; - - // Value of IV on suggested last iteration. - const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this); - // Does it still meet the requirement? - if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS)) - return None; - // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does - // not exceed max unsigned value of this type), this effectively proves - // that there is no wrap during the iteration. To prove that there is no - // signed/unsigned wrap, we need to check that - // Start <= Last for step = 1 or Start >= Last for step = -1. - ICmpInst::Predicate NoOverflowPred = - CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; - if (Step == MinusOne) - NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred); - const SCEV *Start = AR->getStart(); - if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context)) - return None; - - // Everything is fine. - return ScalarEvolution::LoopInvariantPredicate(Pred, Start, RHS); -} - + return None; + + return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS); +} + +Optional<ScalarEvolution::LoopInvariantPredicate> +ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, + const Instruction *Context, const SCEV *MaxIter) { + // Try to prove the following set of facts: + // - The predicate is monotonic in the iteration space. + // - If the check does not fail on the 1st iteration: + // - No overflow will happen during first MaxIter iterations; + // - It will not fail on the MaxIter'th iteration. + // If the check does fail on the 1st iteration, we leave the loop and no + // other checks matter. + + // If there is a loop-invariant, force it into the RHS, otherwise bail out. + if (!isLoopInvariant(RHS, L)) { + if (!isLoopInvariant(LHS, L)) + return None; + + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + auto *AR = dyn_cast<SCEVAddRecExpr>(LHS); + if (!AR || AR->getLoop() != L) + return None; + + // The predicate must be relational (i.e. <, <=, >=, >). + if (!ICmpInst::isRelational(Pred)) + return None; + + // TODO: Support steps other than +/- 1. + const SCEV *Step = AR->getStepRecurrence(*this); + auto *One = getOne(Step->getType()); + auto *MinusOne = getNegativeSCEV(One); + if (Step != One && Step != MinusOne) + return None; + + // Type mismatch here means that MaxIter is potentially larger than max + // unsigned value in start type, which mean we cannot prove no wrap for the + // indvar. + if (AR->getType() != MaxIter->getType()) + return None; + + // Value of IV on suggested last iteration. + const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this); + // Does it still meet the requirement? + if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS)) + return None; + // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does + // not exceed max unsigned value of this type), this effectively proves + // that there is no wrap during the iteration. To prove that there is no + // signed/unsigned wrap, we need to check that + // Start <= Last for step = 1 or Start >= Last for step = -1. + ICmpInst::Predicate NoOverflowPred = + CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + if (Step == MinusOne) + NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred); + const SCEV *Start = AR->getStart(); + if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context)) + return None; + + // Everything is fine. + return ScalarEvolution::LoopInvariantPredicate(Pred, Start, RHS); +} + bool ScalarEvolution::isKnownPredicateViaConstantRanges( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { if (HasSameValue(LHS, RHS)) @@ -9795,24 +9795,24 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()) return true; break; - - case ICmpInst::ICMP_UGE: - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_ULE: - // X u<= (X + C)<nuw> for any C - if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW)) - return true; - break; - - case ICmpInst::ICMP_UGT: - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_ULT: - // X u< (X + C)<nuw> if C != 0 - if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW) && !C.isNullValue()) - return true; - break; + + case ICmpInst::ICMP_UGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: + // X u<= (X + C)<nuw> for any C + if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW)) + return true; + break; + + case ICmpInst::ICMP_UGT: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULT: + // X u< (X + C)<nuw> if C != 0 + if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW) && !C.isNullValue()) + return true; + break; } return false; @@ -9840,14 +9840,14 @@ bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); } -bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB, +bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // No need to even try if we know the module has no guards. if (!HasGuards) return false; - return any_of(*BB, [&](const Instruction &I) { + return any_of(*BB, [&](const Instruction &I) { using namespace llvm::PatternMatch; Value *Condition; @@ -9970,12 +9970,12 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, return false; } -bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, - ICmpInst::Predicate Pred, - const SCEV *LHS, - const SCEV *RHS) { +bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, + ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { if (VerifyIR) - assert(!verifyFunction(*BB->getParent(), &dbgs()) && + assert(!verifyFunction(*BB->getParent(), &dbgs()) && "This cannot be done on broken IR!"); if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) @@ -10001,7 +10001,7 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, } // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard. - auto ProveViaGuard = [&](const BasicBlock *Block) { + auto ProveViaGuard = [&](const BasicBlock *Block) { if (isImpliedViaGuard(Block, Pred, LHS, RHS)) return true; if (ProvingStrictComparison) { @@ -10018,39 +10018,39 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, }; // Try to prove (Pred, LHS, RHS) using isImpliedCond. - auto ProveViaCond = [&](const Value *Condition, bool Inverse) { - const Instruction *Context = &BB->front(); - if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context)) + auto ProveViaCond = [&](const Value *Condition, bool Inverse) { + const Instruction *Context = &BB->front(); + if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context)) return true; if (ProvingStrictComparison) { if (!ProvedNonStrictComparison) - ProvedNonStrictComparison = isImpliedCond(NonStrictPredicate, LHS, RHS, - Condition, Inverse, Context); + ProvedNonStrictComparison = isImpliedCond(NonStrictPredicate, LHS, RHS, + Condition, Inverse, Context); if (!ProvedNonEquality) - ProvedNonEquality = isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, - Condition, Inverse, Context); + ProvedNonEquality = isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, + Condition, Inverse, Context); if (ProvedNonStrictComparison && ProvedNonEquality) return true; } return false; }; - // Starting at the block's predecessor, climb up the predecessor chain, as long + // Starting at the block's predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors - // leading to the original block. - const Loop *ContainingLoop = LI.getLoopFor(BB); - const BasicBlock *PredBB; - if (ContainingLoop && ContainingLoop->getHeader() == BB) - PredBB = ContainingLoop->getLoopPredecessor(); - else - PredBB = BB->getSinglePredecessor(); - for (std::pair<const BasicBlock *, const BasicBlock *> Pair(PredBB, BB); - Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { + // leading to the original block. + const Loop *ContainingLoop = LI.getLoopFor(BB); + const BasicBlock *PredBB; + if (ContainingLoop && ContainingLoop->getHeader() == BB) + PredBB = ContainingLoop->getLoopPredecessor(); + else + PredBB = BB->getSinglePredecessor(); + for (std::pair<const BasicBlock *, const BasicBlock *> Pair(PredBB, BB); + Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { if (ProveViaGuard(Pair.first)) return true; - const BranchInst *LoopEntryPredicate = - dyn_cast<BranchInst>(Pair.first->getTerminator()); + const BranchInst *LoopEntryPredicate = + dyn_cast<BranchInst>(Pair.first->getTerminator()); if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) continue; @@ -10065,7 +10065,7 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, if (!AssumeVH) continue; auto *CI = cast<CallInst>(AssumeVH); - if (!DT.dominates(CI, BB)) + if (!DT.dominates(CI, BB)) continue; if (ProveViaCond(CI->getArgOperand(0), false)) @@ -10075,27 +10075,27 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, return false; } -bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, - ICmpInst::Predicate Pred, - const SCEV *LHS, - const SCEV *RHS) { - // Interpret a null as meaning no loop, where there is obviously no guard - // (interprocedural conditions notwithstanding). - if (!L) - return false; - - // Both LHS and RHS must be available at loop entry. - assert(isAvailableAtLoopEntry(LHS, L) && - "LHS is not available at Loop Entry"); - assert(isAvailableAtLoopEntry(RHS, L) && - "RHS is not available at Loop Entry"); - return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS); -} - -bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, - const SCEV *RHS, - const Value *FoundCondValue, bool Inverse, - const Instruction *Context) { +bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) + return false; + + // Both LHS and RHS must be available at loop entry. + assert(isAvailableAtLoopEntry(LHS, L) && + "LHS is not available at Loop Entry"); + assert(isAvailableAtLoopEntry(RHS, L) && + "RHS is not available at Loop Entry"); + return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS); +} + +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, + const Value *FoundCondValue, bool Inverse, + const Instruction *Context) { if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; @@ -10103,23 +10103,23 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); }); // Recursively handle And and Or conditions. - if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { + if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, - Context) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, - Context); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, + Context) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, + Context); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, - Context) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, - Context); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, + Context) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, + Context); } } - const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); + const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); if (!ICI) return false; // Now that we found a conditional branch that dominates the loop or controls @@ -10133,36 +10133,36 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); - return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context); + return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, - const SCEV *FoundLHS, const SCEV *FoundRHS, - const Instruction *Context) { + const SCEV *FoundLHS, const SCEV *FoundRHS, + const Instruction *Context) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { - // For unsigned and equality predicates, try to prove that both found - // operands fit into narrow unsigned range. If so, try to prove facts in - // narrow types. - if (!CmpInst::isSigned(FoundPred)) { - auto *NarrowType = LHS->getType(); - auto *WideType = FoundLHS->getType(); - auto BitWidth = getTypeSizeInBits(NarrowType); - const SCEV *MaxValue = getZeroExtendExpr( - getConstant(APInt::getMaxValue(BitWidth)), WideType); - if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) && - isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) { - const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType); - const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType); - if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS, - TruncFoundRHS, Context)) - return true; - } - } - + // For unsigned and equality predicates, try to prove that both found + // operands fit into narrow unsigned range. If so, try to prove facts in + // narrow types. + if (!CmpInst::isSigned(FoundPred)) { + auto *NarrowType = LHS->getType(); + auto *WideType = FoundLHS->getType(); + auto BitWidth = getTypeSizeInBits(NarrowType); + const SCEV *MaxValue = getZeroExtendExpr( + getConstant(APInt::getMaxValue(BitWidth)), WideType); + if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) && + isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) { + const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType); + const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType); + if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS, + TruncFoundRHS, Context)) + return true; + } + } + if (CmpInst::isSigned(Pred)) { LHS = getSignExtendExpr(LHS, FoundLHS->getType()); RHS = getSignExtendExpr(RHS, FoundLHS->getType()); @@ -10180,17 +10180,17 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); } } - return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS, - FoundRHS, Context); -} + return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS, + FoundRHS, Context); +} -bool ScalarEvolution::isImpliedCondBalancedTypes( - ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, - const Instruction *Context) { - assert(getTypeSizeInBits(LHS->getType()) == - getTypeSizeInBits(FoundLHS->getType()) && - "Types should be balanced!"); +bool ScalarEvolution::isImpliedCondBalancedTypes( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, + const Instruction *Context) { + assert(getTypeSizeInBits(LHS->getType()) == + getTypeSizeInBits(FoundLHS->getType()) && + "Types should be balanced!"); // Canonicalize the query to match the way instcombine will have // canonicalized the comparison. if (SimplifyICmpOperands(Pred, LHS, RHS)) @@ -10213,16 +10213,16 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); // Check whether swapping the found predicate makes it the same as the // desired predicate. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { if (isa<SCEVConstant>(RHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context); + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context); else - return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), RHS, - LHS, FoundLHS, FoundRHS, Context); + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), RHS, + LHS, FoundLHS, FoundRHS, Context); } // Unsigned comparison is the same as signed comparison when both the operands @@ -10230,7 +10230,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( if (CmpInst::isUnsigned(FoundPred) && CmpInst::getSignedPredicate(FoundPred) == Pred && isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && @@ -10267,8 +10267,8 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( case ICmpInst::ICMP_UGE: // We know V `Pred` SharperMin. If this implies LHS `Pred` // RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin), - Context)) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin), + Context)) return true; LLVM_FALLTHROUGH; @@ -10283,26 +10283,26 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( // // If V `Pred` Min implies LHS `Pred` RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), - Context)) - return true; - break; - - // `LHS < RHS` and `LHS <= RHS` are handled in the same way as `RHS > LHS` and `RHS >= LHS` respectively. - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_ULE: - if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(SharperMin), Context)) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), + Context)) return true; + break; + + // `LHS < RHS` and `LHS <= RHS` are handled in the same way as `RHS > LHS` and `RHS >= LHS` respectively. + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_ULE: + if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, + LHS, V, getConstant(SharperMin), Context)) + return true; LLVM_FALLTHROUGH; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_ULT: - if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(Min), Context)) - return true; - break; - + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: + if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, + LHS, V, getConstant(Min), Context)) + return true; + break; + default: // No change break; @@ -10313,12 +10313,12 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) - if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context)) return true; if (Pred == ICmpInst::ICMP_NE) if (!ICmpInst::isTrueWhenEqual(FoundPred)) - if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, - Context)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, + Context)) return true; // Otherwise assume the worst. @@ -10397,51 +10397,51 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More, return None; } -bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( - ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) { - // Try to recognize the following pattern: - // - // FoundRHS = ... - // ... - // loop: - // FoundLHS = {Start,+,W} - // context_bb: // Basic block from the same loop - // known(Pred, FoundLHS, FoundRHS) - // - // If some predicate is known in the context of a loop, it is also known on - // each iteration of this loop, including the first iteration. Therefore, in - // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to - // prove the original pred using this fact. - if (!Context) - return false; - const BasicBlock *ContextBB = Context->getParent(); - // Make sure AR varies in the context block. - if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) { - const Loop *L = AR->getLoop(); - // Make sure that context belongs to the loop and executes on 1st iteration - // (if it ever executes at all). - if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch())) - return false; - if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop())) - return false; - return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS); - } - - if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundRHS)) { - const Loop *L = AR->getLoop(); - // Make sure that context belongs to the loop and executes on 1st iteration - // (if it ever executes at all). - if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch())) - return false; - if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop())) - return false; - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart()); - } - - return false; -} - +bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) { + // Try to recognize the following pattern: + // + // FoundRHS = ... + // ... + // loop: + // FoundLHS = {Start,+,W} + // context_bb: // Basic block from the same loop + // known(Pred, FoundLHS, FoundRHS) + // + // If some predicate is known in the context of a loop, it is also known on + // each iteration of this loop, including the first iteration. Therefore, in + // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to + // prove the original pred using this fact. + if (!Context) + return false; + const BasicBlock *ContextBB = Context->getParent(); + // Make sure AR varies in the context block. + if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) { + const Loop *L = AR->getLoop(); + // Make sure that context belongs to the loop and executes on 1st iteration + // (if it ever executes at all). + if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch())) + return false; + if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop())) + return false; + return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS); + } + + if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundRHS)) { + const Loop *L = AR->getLoop(); + // Make sure that context belongs to the loop and executes on 1st iteration + // (if it ever executes at all). + if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch())) + return false; + if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop())) + return false; + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart()); + } + + return false; +} + bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { @@ -10622,10 +10622,10 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, if (!dominates(RHS, IncBB)) return false; const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); - // Make sure L does not refer to a value from a potentially previous - // iteration of a loop. - if (!properlyDominates(L, IncBB)) - return false; + // Make sure L does not refer to a value from a potentially previous + // iteration of a loop. + if (!properlyDominates(L, IncBB)) + return false; if (!ProvedEasily(L, RHS)) return false; } @@ -10636,18 +10636,18 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS, - const Instruction *Context) { + const SCEV *FoundRHS, + const Instruction *Context) { if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; - if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, - Context)) - return true; - + if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, + Context)) + return true; + return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y @@ -10664,7 +10664,7 @@ static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr, if (!MinMaxExpr) return false; - return is_contained(MinMaxExpr->operands(), Candidate); + return is_contained(MinMaxExpr->operands(), Candidate); } static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, @@ -10746,31 +10746,31 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, // We want to avoid hurting the compile time with analysis of too big trees. if (Depth > MaxSCEVOperationsImplicationDepth) return false; - - // We only want to work with GT comparison so far. - if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) { - Pred = CmpInst::getSwappedPredicate(Pred); + + // We only want to work with GT comparison so far. + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) { + Pred = CmpInst::getSwappedPredicate(Pred); std::swap(LHS, RHS); std::swap(FoundLHS, FoundRHS); } - - // For unsigned, try to reduce it to corresponding signed comparison. - if (Pred == ICmpInst::ICMP_UGT) - // We can replace unsigned predicate with its signed counterpart if all - // involved values are non-negative. - // TODO: We could have better support for unsigned. - if (isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) { - // Knowing that both FoundLHS and FoundRHS are non-negative, and knowing - // FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us - // use this fact to prove that LHS and RHS are non-negative. - const SCEV *MinusOne = getMinusOne(LHS->getType()); - if (isImpliedCondOperands(ICmpInst::ICMP_SGT, LHS, MinusOne, FoundLHS, - FoundRHS) && - isImpliedCondOperands(ICmpInst::ICMP_SGT, RHS, MinusOne, FoundLHS, - FoundRHS)) - Pred = ICmpInst::ICMP_SGT; - } - + + // For unsigned, try to reduce it to corresponding signed comparison. + if (Pred == ICmpInst::ICMP_UGT) + // We can replace unsigned predicate with its signed counterpart if all + // involved values are non-negative. + // TODO: We could have better support for unsigned. + if (isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) { + // Knowing that both FoundLHS and FoundRHS are non-negative, and knowing + // FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us + // use this fact to prove that LHS and RHS are non-negative. + const SCEV *MinusOne = getMinusOne(LHS->getType()); + if (isImpliedCondOperands(ICmpInst::ICMP_SGT, LHS, MinusOne, FoundLHS, + FoundRHS) && + isImpliedCondOperands(ICmpInst::ICMP_SGT, RHS, MinusOne, FoundLHS, + FoundRHS)) + Pred = ICmpInst::ICMP_SGT; + } + if (Pred != ICmpInst::ICMP_SGT) return false; @@ -10810,7 +10810,7 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, auto *LL = LHSAddExpr->getOperand(0); auto *LR = LHSAddExpr->getOperand(1); - auto *MinusOne = getMinusOne(RHS->getType()); + auto *MinusOne = getMinusOne(RHS->getType()); // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context. auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) { @@ -10883,7 +10883,7 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, // 1. If FoundLHS is negative, then the result is 0. // 2. If FoundLHS is non-negative, then the result is non-negative. // Anyways, the result is non-negative. - auto *MinusOne = getMinusOne(WTy); + auto *MinusOne = getMinusOne(WTy); auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt); if (isKnownNegative(RHS) && IsSGTViaContext(FoundRHSExt, NegDenomMinusOne)) @@ -11238,13 +11238,13 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) BECount = BECountIfBackedgeTaken; else { - // If we know that RHS >= Start in the context of loop, then we know that - // max(RHS, Start) = RHS at this point. - if (isLoopEntryGuardedByCond( - L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, RHS, Start)) - End = RHS; - else - End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); + // If we know that RHS >= Start in the context of loop, then we know that + // max(RHS, Start) = RHS at this point. + if (isLoopEntryGuardedByCond( + L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, RHS, Start)) + End = RHS; + else + End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); } @@ -11311,15 +11311,15 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const SCEV *Start = IV->getStart(); const SCEV *End = RHS; - if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) { - // If we know that Start >= RHS in the context of loop, then we know that - // min(RHS, Start) = RHS at this point. - if (isLoopEntryGuardedByCond( - L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, Start, RHS)) - End = RHS; - else - End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); - } + if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) { + // If we know that Start >= RHS in the context of loop, then we know that + // min(RHS, Start) = RHS at this point. + if (isLoopEntryGuardedByCond( + L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, Start, RHS)) + End = RHS; + else + End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); + } const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); @@ -11359,7 +11359,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, // If the start is a non-zero constant, shift the range to simplify things. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) if (!SC->getValue()->isZero()) { - SmallVector<const SCEV *, 4> Operands(operands()); + SmallVector<const SCEV *, 4> Operands(operands()); Operands[0] = SE.getZero(SC->getType()); const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), getNoWrapFlags(FlagNW)); @@ -11642,7 +11642,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, } // Remove all SCEVConstants. - erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }); + erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }); if (Terms.size() > 0) if (!findArrayDimensionsRec(SE, Terms, Sizes)) @@ -11970,7 +11970,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { // so that future queries will recompute the expressions using the new // value. Value *Old = getValPtr(); - SmallVector<User *, 16> Worklist(Old->users()); + SmallVector<User *, 16> Worklist(Old->users()); SmallPtrSet<User *, 8> Visited; while (!Worklist.empty()) { User *U = Worklist.pop_back_val(); @@ -11983,7 +11983,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { if (PHINode *PN = dyn_cast<PHINode>(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); SE->eraseValueFromMap(U); - llvm::append_range(Worklist, U->users()); + llvm::append_range(Worklist, U->users()); } // Delete the Old value. if (PHINode *PN = dyn_cast<PHINode>(Old)) @@ -12265,10 +12265,10 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { ScalarEvolution::LoopDisposition ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { - switch (S->getSCEVType()) { + switch (S->getSCEVType()) { case scConstant: return LoopInvariant; - case scPtrToInt: + case scPtrToInt: case scTruncate: case scZeroExtend: case scSignExtend: @@ -12373,10 +12373,10 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { ScalarEvolution::BlockDisposition ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { - switch (S->getSCEVType()) { + switch (S->getSCEVType()) { case scConstant: return ProperlyDominatesBlock; - case scPtrToInt: + case scPtrToInt: case scTruncate: case scZeroExtend: case scSignExtend: @@ -12548,7 +12548,7 @@ void ScalarEvolution::verify() const { while (!LoopStack.empty()) { auto *L = LoopStack.pop_back_val(); - llvm::append_range(LoopStack, *L); + llvm::append_range(LoopStack, *L); auto *CurBECount = SCM.visit( const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L)); @@ -12592,25 +12592,25 @@ void ScalarEvolution::verify() const { std::abort(); } } - - // Collect all valid loops currently in LoopInfo. - SmallPtrSet<Loop *, 32> ValidLoops; - SmallVector<Loop *, 32> Worklist(LI.begin(), LI.end()); - while (!Worklist.empty()) { - Loop *L = Worklist.pop_back_val(); - if (ValidLoops.contains(L)) - continue; - ValidLoops.insert(L); - Worklist.append(L->begin(), L->end()); - } - // Check for SCEV expressions referencing invalid/deleted loops. - for (auto &KV : ValueExprMap) { - auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second); - if (!AR) - continue; - assert(ValidLoops.contains(AR->getLoop()) && - "AddRec references invalid loop"); - } + + // Collect all valid loops currently in LoopInfo. + SmallPtrSet<Loop *, 32> ValidLoops; + SmallVector<Loop *, 32> Worklist(LI.begin(), LI.end()); + while (!Worklist.empty()) { + Loop *L = Worklist.pop_back_val(); + if (ValidLoops.contains(L)) + continue; + ValidLoops.insert(L); + Worklist.append(L->begin(), L->end()); + } + // Check for SCEV expressions referencing invalid/deleted loops. + for (auto &KV : ValueExprMap) { + auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second); + if (!AR) + continue; + assert(ValidLoops.contains(AR->getLoop()) && + "AddRec references invalid loop"); + } } bool ScalarEvolution::invalidate( @@ -12643,11 +12643,11 @@ ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - // For compatibility with opt's -analyze feature under legacy pass manager - // which was not ported to NPM. This keeps tests using - // update_analyze_test_checks.py working. - OS << "Printing analysis 'Scalar Evolution Analysis' for function '" - << F.getName() << "':\n"; + // For compatibility with opt's -analyze feature under legacy pass manager + // which was not ported to NPM. This keeps tests using + // update_analyze_test_checks.py working. + OS << "Printing analysis 'Scalar Evolution Analysis' for function '" + << F.getName() << "':\n"; AM.getResult<ScalarEvolutionAnalysis>(F).print(OS); return PreservedAnalyses::all(); } @@ -13143,24 +13143,24 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { } // Match the mathematical pattern A - (A / B) * B, where A and B can be -// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used -// for URem with constant power-of-2 second operands. +// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used +// for URem with constant power-of-2 second operands. // It's not always easy, as A and B can be folded (imagine A is X / 2, and B is // 4, A / B becomes X / 8). bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS) { - // Try to match 'zext (trunc A to iB) to iY', which is used - // for URem with constant power-of-2 second operands. Make sure the size of - // the operand A matches the size of the whole expressions. - if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr)) - if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) { - LHS = Trunc->getOperand(); - if (LHS->getType() != Expr->getType()) - LHS = getZeroExtendExpr(LHS, Expr->getType()); - RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) - << getTypeSizeInBits(Trunc->getType())); - return true; - } + // Try to match 'zext (trunc A to iB) to iY', which is used + // for URem with constant power-of-2 second operands. Make sure the size of + // the operand A matches the size of the whole expressions. + if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr)) + if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) { + LHS = Trunc->getOperand(); + if (LHS->getType() != Expr->getType()) + LHS = getZeroExtendExpr(LHS, Expr->getType()); + RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) + << getTypeSizeInBits(Trunc->getType())); + return true; + } const auto *Add = dyn_cast<SCEVAddExpr>(Expr); if (Add == nullptr || Add->getNumOperands() != 2) return false; @@ -13194,146 +13194,146 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); return false; } - -const SCEV * -ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) { - SmallVector<BasicBlock*, 16> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // Form an expression for the maximum exit count possible for this loop. We - // merge the max and exact information to approximate a version of - // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. - SmallVector<const SCEV*, 4> ExitCounts; - for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = getExitCount(L, ExitingBB); - if (isa<SCEVCouldNotCompute>(ExitCount)) - ExitCount = getExitCount(L, ExitingBB, - ScalarEvolution::ConstantMaximum); - if (!isa<SCEVCouldNotCompute>(ExitCount)) { - assert(DT.dominates(ExitingBB, L->getLoopLatch()) && - "We should only have known counts for exiting blocks that " - "dominate latch!"); - ExitCounts.push_back(ExitCount); - } - } - if (ExitCounts.empty()) - return getCouldNotCompute(); - return getUMinFromMismatchedTypes(ExitCounts); -} - -/// This rewriter is similar to SCEVParameterRewriter (it replaces SCEVUnknown -/// components following the Map (Value -> SCEV)), but skips AddRecExpr because -/// we cannot guarantee that the replacement is loop invariant in the loop of -/// the AddRec. -class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> { - ValueToSCEVMapTy ⤅ - -public: - SCEVLoopGuardRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M) - : SCEVRewriteVisitor(SE), Map(M) {} - - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; } - - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - auto I = Map.find(Expr->getValue()); - if (I == Map.end()) - return Expr; - return I->second; - } -}; - -const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { - auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, - const SCEV *RHS, ValueToSCEVMapTy &RewriteMap) { - if (!isa<SCEVUnknown>(LHS)) { - std::swap(LHS, RHS); - Predicate = CmpInst::getSwappedPredicate(Predicate); - } - - // For now, limit to conditions that provide information about unknown - // expressions. - auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS); - if (!LHSUnknown) - return; - - // TODO: use information from more predicates. - switch (Predicate) { - case CmpInst::ICMP_ULT: { - if (!containsAddRecurrence(RHS)) { - const SCEV *Base = LHS; - auto I = RewriteMap.find(LHSUnknown->getValue()); - if (I != RewriteMap.end()) - Base = I->second; - - RewriteMap[LHSUnknown->getValue()] = - getUMinExpr(Base, getMinusSCEV(RHS, getOne(RHS->getType()))); - } - break; - } - case CmpInst::ICMP_ULE: { - if (!containsAddRecurrence(RHS)) { - const SCEV *Base = LHS; - auto I = RewriteMap.find(LHSUnknown->getValue()); - if (I != RewriteMap.end()) - Base = I->second; - RewriteMap[LHSUnknown->getValue()] = getUMinExpr(Base, RHS); - } - break; - } - case CmpInst::ICMP_EQ: - if (isa<SCEVConstant>(RHS)) - RewriteMap[LHSUnknown->getValue()] = RHS; - break; - case CmpInst::ICMP_NE: - if (isa<SCEVConstant>(RHS) && - cast<SCEVConstant>(RHS)->getValue()->isNullValue()) - RewriteMap[LHSUnknown->getValue()] = - getUMaxExpr(LHS, getOne(RHS->getType())); - break; - default: - break; - } - }; - // Starting at the loop predecessor, climb up the predecessor chain, as long - // as there are predecessors that can be found that have unique successors - // leading to the original header. - // TODO: share this logic with isLoopEntryGuardedByCond. - ValueToSCEVMapTy RewriteMap; - for (std::pair<const BasicBlock *, const BasicBlock *> Pair( - L->getLoopPredecessor(), L->getHeader()); - Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { - - const BranchInst *LoopEntryPredicate = - dyn_cast<BranchInst>(Pair.first->getTerminator()); - if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) - continue; - - // TODO: use information from more complex conditions, e.g. AND expressions. - auto *Cmp = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition()); - if (!Cmp) - continue; - - auto Predicate = Cmp->getPredicate(); - if (LoopEntryPredicate->getSuccessor(1) == Pair.second) - Predicate = CmpInst::getInversePredicate(Predicate); - CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)), - getSCEV(Cmp->getOperand(1)), RewriteMap); - } - - // Also collect information from assumptions dominating the loop. - for (auto &AssumeVH : AC.assumptions()) { - if (!AssumeVH) - continue; - auto *AssumeI = cast<CallInst>(AssumeVH); - auto *Cmp = dyn_cast<ICmpInst>(AssumeI->getOperand(0)); - if (!Cmp || !DT.dominates(AssumeI, L->getHeader())) - continue; - CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)), - getSCEV(Cmp->getOperand(1)), RewriteMap); - } - - if (RewriteMap.empty()) - return Expr; - SCEVLoopGuardRewriter Rewriter(*this, RewriteMap); - return Rewriter.visit(Expr); -} + +const SCEV * +ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) { + SmallVector<BasicBlock*, 16> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Form an expression for the maximum exit count possible for this loop. We + // merge the max and exact information to approximate a version of + // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. + SmallVector<const SCEV*, 4> ExitCounts; + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = getExitCount(L, ExitingBB); + if (isa<SCEVCouldNotCompute>(ExitCount)) + ExitCount = getExitCount(L, ExitingBB, + ScalarEvolution::ConstantMaximum); + if (!isa<SCEVCouldNotCompute>(ExitCount)) { + assert(DT.dominates(ExitingBB, L->getLoopLatch()) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + } + } + if (ExitCounts.empty()) + return getCouldNotCompute(); + return getUMinFromMismatchedTypes(ExitCounts); +} + +/// This rewriter is similar to SCEVParameterRewriter (it replaces SCEVUnknown +/// components following the Map (Value -> SCEV)), but skips AddRecExpr because +/// we cannot guarantee that the replacement is loop invariant in the loop of +/// the AddRec. +class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> { + ValueToSCEVMapTy ⤅ + +public: + SCEVLoopGuardRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M) + : SCEVRewriteVisitor(SE), Map(M) {} + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + auto I = Map.find(Expr->getValue()); + if (I == Map.end()) + return Expr; + return I->second; + } +}; + +const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { + auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, + const SCEV *RHS, ValueToSCEVMapTy &RewriteMap) { + if (!isa<SCEVUnknown>(LHS)) { + std::swap(LHS, RHS); + Predicate = CmpInst::getSwappedPredicate(Predicate); + } + + // For now, limit to conditions that provide information about unknown + // expressions. + auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS); + if (!LHSUnknown) + return; + + // TODO: use information from more predicates. + switch (Predicate) { + case CmpInst::ICMP_ULT: { + if (!containsAddRecurrence(RHS)) { + const SCEV *Base = LHS; + auto I = RewriteMap.find(LHSUnknown->getValue()); + if (I != RewriteMap.end()) + Base = I->second; + + RewriteMap[LHSUnknown->getValue()] = + getUMinExpr(Base, getMinusSCEV(RHS, getOne(RHS->getType()))); + } + break; + } + case CmpInst::ICMP_ULE: { + if (!containsAddRecurrence(RHS)) { + const SCEV *Base = LHS; + auto I = RewriteMap.find(LHSUnknown->getValue()); + if (I != RewriteMap.end()) + Base = I->second; + RewriteMap[LHSUnknown->getValue()] = getUMinExpr(Base, RHS); + } + break; + } + case CmpInst::ICMP_EQ: + if (isa<SCEVConstant>(RHS)) + RewriteMap[LHSUnknown->getValue()] = RHS; + break; + case CmpInst::ICMP_NE: + if (isa<SCEVConstant>(RHS) && + cast<SCEVConstant>(RHS)->getValue()->isNullValue()) + RewriteMap[LHSUnknown->getValue()] = + getUMaxExpr(LHS, getOne(RHS->getType())); + break; + default: + break; + } + }; + // Starting at the loop predecessor, climb up the predecessor chain, as long + // as there are predecessors that can be found that have unique successors + // leading to the original header. + // TODO: share this logic with isLoopEntryGuardedByCond. + ValueToSCEVMapTy RewriteMap; + for (std::pair<const BasicBlock *, const BasicBlock *> Pair( + L->getLoopPredecessor(), L->getHeader()); + Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { + + const BranchInst *LoopEntryPredicate = + dyn_cast<BranchInst>(Pair.first->getTerminator()); + if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) + continue; + + // TODO: use information from more complex conditions, e.g. AND expressions. + auto *Cmp = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition()); + if (!Cmp) + continue; + + auto Predicate = Cmp->getPredicate(); + if (LoopEntryPredicate->getSuccessor(1) == Pair.second) + Predicate = CmpInst::getInversePredicate(Predicate); + CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)), + getSCEV(Cmp->getOperand(1)), RewriteMap); + } + + // Also collect information from assumptions dominating the loop. + for (auto &AssumeVH : AC.assumptions()) { + if (!AssumeVH) + continue; + auto *AssumeI = cast<CallInst>(AssumeVH); + auto *Cmp = dyn_cast<ICmpInst>(AssumeI->getOperand(0)); + if (!Cmp || !DT.dominates(AssumeI, L->getHeader())) + continue; + CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)), + getSCEV(Cmp->getOperand(1)), RewriteMap); + } + + if (RewriteMap.empty()) + return Expr; + SCEVLoopGuardRewriter Rewriter(*this, RewriteMap); + return Rewriter.visit(Expr); +} diff --git a/contrib/libs/llvm12/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 8f289feb3d..6299247bdf 100644 --- a/contrib/libs/llvm12/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -82,12 +82,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, Value *BO = GetBaseValue(BS); if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr)) if (alias(MemoryLocation(AO ? AO : LocA.Ptr, - AO ? LocationSize::beforeOrAfterPointer() - : LocA.Size, + AO ? LocationSize::beforeOrAfterPointer() + : LocA.Size, AO ? AAMDNodes() : LocA.AATags), MemoryLocation(BO ? BO : LocB.Ptr, - BO ? LocationSize::beforeOrAfterPointer() - : LocB.Size, + BO ? LocationSize::beforeOrAfterPointer() + : LocB.Size, BO ? AAMDNodes() : LocB.AATags), AAQI) == NoAlias) return NoAlias; diff --git a/contrib/libs/llvm12/lib/Analysis/ScalarEvolutionDivision.cpp b/contrib/libs/llvm12/lib/Analysis/ScalarEvolutionDivision.cpp index 64e908bdf3..5ec18d8e9d 100644 --- a/contrib/libs/llvm12/lib/Analysis/ScalarEvolutionDivision.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ScalarEvolutionDivision.cpp @@ -215,14 +215,14 @@ void SCEVDivision::visitMulExpr(const SCEVMulExpr *Numerator) { return cannotDivide(Numerator); // The Remainder is obtained by replacing Denominator by 0 in Numerator. - ValueToSCEVMapTy RewriteMap; - RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = Zero; - Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap); + ValueToSCEVMapTy RewriteMap; + RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = Zero; + Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap); if (Remainder->isZero()) { // The Quotient is obtained by replacing Denominator by 1 in Numerator. - RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = One; - Quotient = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap); + RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = One; + Quotient = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap); return; } diff --git a/contrib/libs/llvm12/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/libs/llvm12/lib/Analysis/ScopedNoAliasAA.cpp index 6b38d6716b..c6c3278e94 100644 --- a/contrib/libs/llvm12/lib/Analysis/ScopedNoAliasAA.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ScopedNoAliasAA.cpp @@ -34,7 +34,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -161,7 +161,7 @@ ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F, char ScopedNoAliasAAWrapperPass::ID = 0; -INITIALIZE_PASS(ScopedNoAliasAAWrapperPass, "scoped-noalias-aa", +INITIALIZE_PASS(ScopedNoAliasAAWrapperPass, "scoped-noalias-aa", "Scoped NoAlias Alias Analysis", false, true) ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() { diff --git a/contrib/libs/llvm12/lib/Analysis/StackLifetime.cpp b/contrib/libs/llvm12/lib/Analysis/StackLifetime.cpp index ab5f2db7d1..c4c07bfaed 100644 --- a/contrib/libs/llvm12/lib/Analysis/StackLifetime.cpp +++ b/contrib/libs/llvm12/lib/Analysis/StackLifetime.cpp @@ -11,7 +11,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/BasicBlock.h" @@ -64,28 +64,28 @@ bool StackLifetime::isAliveAfter(const AllocaInst *AI, return getLiveRange(AI).test(InstNum); } -// Returns unique alloca annotated by lifetime marker only if -// markers has the same size and points to the alloca start. -static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II, - const DataLayout &DL) { - const AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true); - if (!AI) - return nullptr; - - auto AllocaSizeInBits = AI->getAllocationSizeInBits(DL); - if (!AllocaSizeInBits) - return nullptr; - int64_t AllocaSize = AllocaSizeInBits.getValue() / 8; - - auto *Size = dyn_cast<ConstantInt>(II.getArgOperand(0)); - if (!Size) - return nullptr; - int64_t LifetimeSize = Size->getSExtValue(); - - if (LifetimeSize != -1 && LifetimeSize != AllocaSize) - return nullptr; - - return AI; +// Returns unique alloca annotated by lifetime marker only if +// markers has the same size and points to the alloca start. +static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II, + const DataLayout &DL) { + const AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true); + if (!AI) + return nullptr; + + auto AllocaSizeInBits = AI->getAllocationSizeInBits(DL); + if (!AllocaSizeInBits) + return nullptr; + int64_t AllocaSize = AllocaSizeInBits.getValue() / 8; + + auto *Size = dyn_cast<ConstantInt>(II.getArgOperand(0)); + if (!Size) + return nullptr; + int64_t LifetimeSize = Size->getSExtValue(); + + if (LifetimeSize != -1 && LifetimeSize != AllocaSize) + return nullptr; + + return AI; } void StackLifetime::collectMarkers() { @@ -93,27 +93,27 @@ void StackLifetime::collectMarkers() { DenseMap<const BasicBlock *, SmallDenseMap<const IntrinsicInst *, Marker>> BBMarkerSet; - const DataLayout &DL = F.getParent()->getDataLayout(); - + const DataLayout &DL = F.getParent()->getDataLayout(); + // Compute the set of start/end markers per basic block. - for (const BasicBlock *BB : depth_first(&F)) { - for (const Instruction &I : *BB) { - const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); - if (!II || !II->isLifetimeStartOrEnd()) - continue; - const AllocaInst *AI = findMatchingAlloca(*II, DL); - if (!AI) { - HasUnknownLifetimeStartOrEnd = true; - continue; + for (const BasicBlock *BB : depth_first(&F)) { + for (const Instruction &I : *BB) { + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); + if (!II || !II->isLifetimeStartOrEnd()) + continue; + const AllocaInst *AI = findMatchingAlloca(*II, DL); + if (!AI) { + HasUnknownLifetimeStartOrEnd = true; + continue; } - auto It = AllocaNumbering.find(AI); - if (It == AllocaNumbering.end()) - continue; - auto AllocaNo = It->second; - bool IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start; - if (IsStart) - InterestingAllocas.set(AllocaNo); - BBMarkerSet[BB][II] = {AllocaNo, IsStart}; + auto It = AllocaNumbering.find(AI); + if (It == AllocaNumbering.end()) + continue; + auto AllocaNo = It->second; + bool IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start; + if (IsStart) + InterestingAllocas.set(AllocaNo); + BBMarkerSet[BB][II] = {AllocaNo, IsStart}; } } @@ -292,7 +292,7 @@ LLVM_DUMP_METHOD void StackLifetime::dumpBlockLiveness() const { const BasicBlock *BB = IT.getFirst(); const BlockLifetimeInfo &BlockInfo = BlockLiveness.find(BB)->getSecond(); auto BlockRange = BlockInstRange.find(BB)->getSecond(); - dbgs() << " BB (" << BB->getName() << ") [" << BlockRange.first << ", " << BlockRange.second + dbgs() << " BB (" << BB->getName() << ") [" << BlockRange.first << ", " << BlockRange.second << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End << ", livein " << BlockInfo.LiveIn << ", liveout " << BlockInfo.LiveOut << "\n"; @@ -319,20 +319,20 @@ StackLifetime::StackLifetime(const Function &F, } void StackLifetime::run() { - if (HasUnknownLifetimeStartOrEnd) { - // There is marker which we can't assign to a specific alloca, so we - // fallback to the most conservative results for the type. - switch (Type) { - case LivenessType::May: - LiveRanges.resize(NumAllocas, getFullLiveRange()); - break; - case LivenessType::Must: - LiveRanges.resize(NumAllocas, LiveRange(Instructions.size())); - break; - } - return; - } - + if (HasUnknownLifetimeStartOrEnd) { + // There is marker which we can't assign to a specific alloca, so we + // fallback to the most conservative results for the type. + switch (Type) { + case LivenessType::May: + LiveRanges.resize(NumAllocas, getFullLiveRange()); + break; + case LivenessType::Must: + LiveRanges.resize(NumAllocas, LiveRange(Instructions.size())); + break; + } + return; + } + LiveRanges.resize(NumAllocas, LiveRange(Instructions.size())); for (unsigned I = 0; I < NumAllocas; ++I) if (!InterestingAllocas.test(I)) diff --git a/contrib/libs/llvm12/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/StackSafetyAnalysis.cpp index 73096eb4ba..aa7d3fc4bb 100644 --- a/contrib/libs/llvm12/lib/Analysis/StackSafetyAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/StackSafetyAnalysis.cpp @@ -22,7 +22,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -38,25 +38,25 @@ using namespace llvm; STATISTIC(NumAllocaStackSafe, "Number of safe allocas"); STATISTIC(NumAllocaTotal, "Number of total allocas"); -STATISTIC(NumCombinedCalleeLookupTotal, - "Number of total callee lookups on combined index."); -STATISTIC(NumCombinedCalleeLookupFailed, - "Number of failed callee lookups on combined index."); -STATISTIC(NumModuleCalleeLookupTotal, - "Number of total callee lookups on module index."); -STATISTIC(NumModuleCalleeLookupFailed, - "Number of failed callee lookups on module index."); -STATISTIC(NumCombinedParamAccessesBefore, - "Number of total param accesses before generateParamAccessSummary."); -STATISTIC(NumCombinedParamAccessesAfter, - "Number of total param accesses after generateParamAccessSummary."); -STATISTIC(NumCombinedDataFlowNodes, - "Number of total nodes in combined index for dataflow processing."); -STATISTIC(NumIndexCalleeUnhandled, "Number of index callee which are unhandled."); -STATISTIC(NumIndexCalleeMultipleWeak, "Number of index callee non-unique weak."); -STATISTIC(NumIndexCalleeMultipleExternal, "Number of index callee non-unique external."); - - +STATISTIC(NumCombinedCalleeLookupTotal, + "Number of total callee lookups on combined index."); +STATISTIC(NumCombinedCalleeLookupFailed, + "Number of failed callee lookups on combined index."); +STATISTIC(NumModuleCalleeLookupTotal, + "Number of total callee lookups on module index."); +STATISTIC(NumModuleCalleeLookupFailed, + "Number of failed callee lookups on module index."); +STATISTIC(NumCombinedParamAccessesBefore, + "Number of total param accesses before generateParamAccessSummary."); +STATISTIC(NumCombinedParamAccessesAfter, + "Number of total param accesses after generateParamAccessSummary."); +STATISTIC(NumCombinedDataFlowNodes, + "Number of total nodes in combined index for dataflow processing."); +STATISTIC(NumIndexCalleeUnhandled, "Number of index callee which are unhandled."); +STATISTIC(NumIndexCalleeMultipleWeak, "Number of index callee non-unique weak."); +STATISTIC(NumIndexCalleeMultipleExternal, "Number of index callee non-unique external."); + + static cl::opt<int> StackSafetyMaxIterations("stack-safety-max-iterations", cl::init(20), cl::Hidden); @@ -68,47 +68,47 @@ static cl::opt<bool> StackSafetyRun("stack-safety-run", cl::init(false), namespace { -// Check if we should bailout for such ranges. -bool isUnsafe(const ConstantRange &R) { - return R.isEmptySet() || R.isFullSet() || R.isUpperSignWrapped(); -} - -ConstantRange addOverflowNever(const ConstantRange &L, const ConstantRange &R) { - assert(!L.isSignWrappedSet()); - assert(!R.isSignWrappedSet()); - if (L.signedAddMayOverflow(R) != - ConstantRange::OverflowResult::NeverOverflows) - return ConstantRange::getFull(L.getBitWidth()); - ConstantRange Result = L.add(R); - assert(!Result.isSignWrappedSet()); - return Result; -} - -ConstantRange unionNoWrap(const ConstantRange &L, const ConstantRange &R) { - assert(!L.isSignWrappedSet()); - assert(!R.isSignWrappedSet()); - auto Result = L.unionWith(R); - // Two non-wrapped sets can produce wrapped. - if (Result.isSignWrappedSet()) - Result = ConstantRange::getFull(Result.getBitWidth()); - return Result; -} - +// Check if we should bailout for such ranges. +bool isUnsafe(const ConstantRange &R) { + return R.isEmptySet() || R.isFullSet() || R.isUpperSignWrapped(); +} + +ConstantRange addOverflowNever(const ConstantRange &L, const ConstantRange &R) { + assert(!L.isSignWrappedSet()); + assert(!R.isSignWrappedSet()); + if (L.signedAddMayOverflow(R) != + ConstantRange::OverflowResult::NeverOverflows) + return ConstantRange::getFull(L.getBitWidth()); + ConstantRange Result = L.add(R); + assert(!Result.isSignWrappedSet()); + return Result; +} + +ConstantRange unionNoWrap(const ConstantRange &L, const ConstantRange &R) { + assert(!L.isSignWrappedSet()); + assert(!R.isSignWrappedSet()); + auto Result = L.unionWith(R); + // Two non-wrapped sets can produce wrapped. + if (Result.isSignWrappedSet()) + Result = ConstantRange::getFull(Result.getBitWidth()); + return Result; +} + /// Describes use of address in as a function call argument. template <typename CalleeTy> struct CallInfo { /// Function being called. const CalleeTy *Callee = nullptr; /// Index of argument which pass address. size_t ParamNo = 0; - - CallInfo(const CalleeTy *Callee, size_t ParamNo) - : Callee(Callee), ParamNo(ParamNo) {} - - struct Less { - bool operator()(const CallInfo &L, const CallInfo &R) const { - return std::tie(L.ParamNo, L.Callee) < std::tie(R.ParamNo, R.Callee); - } - }; + + CallInfo(const CalleeTy *Callee, size_t ParamNo) + : Callee(Callee), ParamNo(ParamNo) {} + + struct Less { + bool operator()(const CallInfo &L, const CallInfo &R) const { + return std::tie(L.ParamNo, L.Callee) < std::tie(R.ParamNo, R.Callee); + } + }; }; /// Describe uses of address (alloca or parameter) inside of the function. @@ -118,26 +118,26 @@ template <typename CalleeTy> struct UseInfo { ConstantRange Range; // List of calls which pass address as an argument. - // Value is offset range of address from base address (alloca or calling - // function argument). Range should never set to empty-set, that is an invalid - // access range that can cause empty-set to be propagated with - // ConstantRange::add - using CallsTy = std::map<CallInfo<CalleeTy>, ConstantRange, - typename CallInfo<CalleeTy>::Less>; - CallsTy Calls; + // Value is offset range of address from base address (alloca or calling + // function argument). Range should never set to empty-set, that is an invalid + // access range that can cause empty-set to be propagated with + // ConstantRange::add + using CallsTy = std::map<CallInfo<CalleeTy>, ConstantRange, + typename CallInfo<CalleeTy>::Less>; + CallsTy Calls; UseInfo(unsigned PointerSize) : Range{PointerSize, false} {} - void updateRange(const ConstantRange &R) { Range = unionNoWrap(Range, R); } + void updateRange(const ConstantRange &R) { Range = unionNoWrap(Range, R); } }; template <typename CalleeTy> raw_ostream &operator<<(raw_ostream &OS, const UseInfo<CalleeTy> &U) { OS << U.Range; for (auto &Call : U.Calls) - OS << ", " - << "@" << Call.first.Callee->getName() << "(arg" << Call.first.ParamNo - << ", " << Call.second << ")"; + OS << ", " + << "@" << Call.first.Callee->getName() << "(arg" << Call.first.ParamNo + << ", " << Call.second << ")"; return OS; } @@ -208,7 +208,7 @@ template <typename CalleeTy> struct FunctionInfo { } else { assert(Allocas.empty()); } - O << "\n"; + O << "\n"; } }; @@ -419,11 +419,11 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, } assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee)); - ConstantRange Offsets = offsetFrom(UI, Ptr); - auto Insert = - US.Calls.emplace(CallInfo<GlobalValue>(Callee, ArgNo), Offsets); - if (!Insert.second) - Insert.first->second = Insert.first->second.unionWith(Offsets); + ConstantRange Offsets = offsetFrom(UI, Ptr); + auto Insert = + US.Calls.emplace(CallInfo<GlobalValue>(Callee, ArgNo), Offsets); + if (!Insert.second) + Insert.first->second = Insert.first->second.unionWith(Offsets); break; } @@ -456,7 +456,7 @@ FunctionInfo<GlobalValue> StackSafetyLocalAnalysis::run() { analyzeAllUses(AI, UI, SL); } - for (Argument &A : F.args()) { + for (Argument &A : F.args()) { // Non pointers and bypass arguments are not going to be used in any global // processing. if (A.getType()->isPointerTy() && !A.hasByValAttr()) { @@ -529,18 +529,18 @@ template <typename CalleeTy> bool StackSafetyDataFlowAnalysis<CalleeTy>::updateOneUse(UseInfo<CalleeTy> &US, bool UpdateToFullSet) { bool Changed = false; - for (auto &KV : US.Calls) { - assert(!KV.second.isEmptySet() && + for (auto &KV : US.Calls) { + assert(!KV.second.isEmptySet() && "Param range can't be empty-set, invalid offset range"); ConstantRange CalleeRange = - getArgumentAccessRange(KV.first.Callee, KV.first.ParamNo, KV.second); + getArgumentAccessRange(KV.first.Callee, KV.first.ParamNo, KV.second); if (!US.Range.contains(CalleeRange)) { Changed = true; if (UpdateToFullSet) US.Range = UnknownRange; else - US.updateRange(CalleeRange); + US.updateRange(CalleeRange); } } return Changed; @@ -574,7 +574,7 @@ void StackSafetyDataFlowAnalysis<CalleeTy>::runDataFlow() { auto &FS = F.second; for (auto &KV : FS.Params) for (auto &CS : KV.second.Calls) - Callees.push_back(CS.first.Callee); + Callees.push_back(CS.first.Callee); llvm::sort(Callees); Callees.erase(std::unique(Callees.begin(), Callees.end()), Callees.end()); @@ -609,52 +609,52 @@ StackSafetyDataFlowAnalysis<CalleeTy>::run() { return Functions; } -FunctionSummary *findCalleeFunctionSummary(ValueInfo VI, StringRef ModuleId) { - if (!VI) - return nullptr; - auto SummaryList = VI.getSummaryList(); - GlobalValueSummary* S = nullptr; - for (const auto& GVS : SummaryList) { - if (!GVS->isLive()) - continue; - if (const AliasSummary *AS = dyn_cast<AliasSummary>(GVS.get())) - if (!AS->hasAliasee()) - continue; - if (!isa<FunctionSummary>(GVS->getBaseObject())) - continue; - if (GlobalValue::isLocalLinkage(GVS->linkage())) { - if (GVS->modulePath() == ModuleId) { - S = GVS.get(); - break; - } - } else if (GlobalValue::isExternalLinkage(GVS->linkage())) { - if (S) { - ++NumIndexCalleeMultipleExternal; - return nullptr; - } - S = GVS.get(); - } else if (GlobalValue::isWeakLinkage(GVS->linkage())) { - if (S) { - ++NumIndexCalleeMultipleWeak; - return nullptr; - } - S = GVS.get(); - } else if (GlobalValue::isAvailableExternallyLinkage(GVS->linkage()) || - GlobalValue::isLinkOnceLinkage(GVS->linkage())) { - if (SummaryList.size() == 1) - S = GVS.get(); - // According thinLTOResolvePrevailingGUID these are unlikely prevailing. - } else { - ++NumIndexCalleeUnhandled; - } - }; +FunctionSummary *findCalleeFunctionSummary(ValueInfo VI, StringRef ModuleId) { + if (!VI) + return nullptr; + auto SummaryList = VI.getSummaryList(); + GlobalValueSummary* S = nullptr; + for (const auto& GVS : SummaryList) { + if (!GVS->isLive()) + continue; + if (const AliasSummary *AS = dyn_cast<AliasSummary>(GVS.get())) + if (!AS->hasAliasee()) + continue; + if (!isa<FunctionSummary>(GVS->getBaseObject())) + continue; + if (GlobalValue::isLocalLinkage(GVS->linkage())) { + if (GVS->modulePath() == ModuleId) { + S = GVS.get(); + break; + } + } else if (GlobalValue::isExternalLinkage(GVS->linkage())) { + if (S) { + ++NumIndexCalleeMultipleExternal; + return nullptr; + } + S = GVS.get(); + } else if (GlobalValue::isWeakLinkage(GVS->linkage())) { + if (S) { + ++NumIndexCalleeMultipleWeak; + return nullptr; + } + S = GVS.get(); + } else if (GlobalValue::isAvailableExternallyLinkage(GVS->linkage()) || + GlobalValue::isLinkOnceLinkage(GVS->linkage())) { + if (SummaryList.size() == 1) + S = GVS.get(); + // According thinLTOResolvePrevailingGUID these are unlikely prevailing. + } else { + ++NumIndexCalleeUnhandled; + } + }; while (S) { if (!S->isLive() || !S->isDSOLocal()) return nullptr; if (FunctionSummary *FS = dyn_cast<FunctionSummary>(S)) return FS; AliasSummary *AS = dyn_cast<AliasSummary>(S); - if (!AS || !AS->hasAliasee()) + if (!AS || !AS->hasAliasee()) return nullptr; S = AS->getBaseObject(); if (S == AS) @@ -692,33 +692,33 @@ const ConstantRange *findParamAccess(const FunctionSummary &FS, void resolveAllCalls(UseInfo<GlobalValue> &Use, const ModuleSummaryIndex *Index) { ConstantRange FullSet(Use.Range.getBitWidth(), true); - // Move Use.Calls to a temp storage and repopulate - don't use std::move as it - // leaves Use.Calls in an undefined state. - UseInfo<GlobalValue>::CallsTy TmpCalls; - std::swap(TmpCalls, Use.Calls); - for (const auto &C : TmpCalls) { - const Function *F = findCalleeInModule(C.first.Callee); + // Move Use.Calls to a temp storage and repopulate - don't use std::move as it + // leaves Use.Calls in an undefined state. + UseInfo<GlobalValue>::CallsTy TmpCalls; + std::swap(TmpCalls, Use.Calls); + for (const auto &C : TmpCalls) { + const Function *F = findCalleeInModule(C.first.Callee); if (F) { - Use.Calls.emplace(CallInfo<GlobalValue>(F, C.first.ParamNo), C.second); + Use.Calls.emplace(CallInfo<GlobalValue>(F, C.first.ParamNo), C.second); continue; } if (!Index) return Use.updateRange(FullSet); - FunctionSummary *FS = - findCalleeFunctionSummary(Index->getValueInfo(C.first.Callee->getGUID()), - C.first.Callee->getParent()->getModuleIdentifier()); - ++NumModuleCalleeLookupTotal; - if (!FS) { - ++NumModuleCalleeLookupFailed; + FunctionSummary *FS = + findCalleeFunctionSummary(Index->getValueInfo(C.first.Callee->getGUID()), + C.first.Callee->getParent()->getModuleIdentifier()); + ++NumModuleCalleeLookupTotal; + if (!FS) { + ++NumModuleCalleeLookupFailed; return Use.updateRange(FullSet); - } - const ConstantRange *Found = findParamAccess(*FS, C.first.ParamNo); - if (!Found || Found->isFullSet()) + } + const ConstantRange *Found = findParamAccess(*FS, C.first.ParamNo); + if (!Found || Found->isFullSet()) return Use.updateRange(FullSet); ConstantRange Access = Found->sextOrTrunc(Use.Range.getBitWidth()); - if (!Access.isEmptySet()) - Use.updateRange(addOverflowNever(Access, C.second)); + if (!Access.isEmptySet()) + Use.updateRange(addOverflowNever(Access, C.second)); } } @@ -733,11 +733,11 @@ GVToSSI createGlobalStackSafetyInfo( auto Copy = Functions; for (auto &FnKV : Copy) - for (auto &KV : FnKV.second.Params) { + for (auto &KV : FnKV.second.Params) { resolveAllCalls(KV.second, Index); - if (KV.second.Range.isFullSet()) - KV.second.Calls.clear(); - } + if (KV.second.Range.isFullSet()) + KV.second.Calls.clear(); + } uint32_t PointerSize = Copy.begin() ->first->getParent() @@ -752,8 +752,8 @@ GVToSSI createGlobalStackSafetyInfo( auto &A = KV.second; resolveAllCalls(A, Index); for (auto &C : A.Calls) { - A.updateRange(SSDFA.getArgumentAccessRange(C.first.Callee, - C.first.ParamNo, C.second)); + A.updateRange(SSDFA.getArgumentAccessRange(C.first.Callee, + C.first.ParamNo, C.second)); } // FIXME: This is needed only to preserve calls in print() results. A.Calls = SrcF.Allocas.find(KV.first)->second.Calls; @@ -822,7 +822,7 @@ const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const { } std::vector<FunctionSummary::ParamAccess> -StackSafetyInfo::getParamAccesses(ModuleSummaryIndex &Index) const { +StackSafetyInfo::getParamAccesses(ModuleSummaryIndex &Index) const { // Implementation transforms internal representation of parameter information // into FunctionSummary format. std::vector<FunctionSummary::ParamAccess> ParamAccesses; @@ -843,21 +843,21 @@ StackSafetyInfo::getParamAccesses(ModuleSummaryIndex &Index) const { // will make ParamAccess::Range as FullSet anyway. So we can drop the // entire parameter like we did above. // TODO(vitalybuka): Return already filtered parameters from getInfo(). - if (C.second.isFullSet()) { + if (C.second.isFullSet()) { ParamAccesses.pop_back(); break; } - Param.Calls.emplace_back(C.first.ParamNo, - Index.getOrInsertValueInfo(C.first.Callee), - C.second); + Param.Calls.emplace_back(C.first.ParamNo, + Index.getOrInsertValueInfo(C.first.Callee), + C.second); } } - for (FunctionSummary::ParamAccess &Param : ParamAccesses) { - sort(Param.Calls, [](const FunctionSummary::ParamAccess::Call &L, - const FunctionSummary::ParamAccess::Call &R) { - return std::tie(L.ParamNo, L.Callee) < std::tie(R.ParamNo, R.Callee); - }); - } + for (FunctionSummary::ParamAccess &Param : ParamAccesses) { + sort(Param.Calls, [](const FunctionSummary::ParamAccess::Call &L, + const FunctionSummary::ParamAccess::Call &R) { + return std::tie(L.ParamNo, L.Callee) < std::tie(R.ParamNo, R.Callee); + }); + } return ParamAccesses; } @@ -1002,28 +1002,28 @@ bool llvm::needsParamAccessSummary(const Module &M) { } void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { - if (!Index.hasParamAccess()) - return; + if (!Index.hasParamAccess()) + return; const ConstantRange FullSet(FunctionSummary::ParamAccess::RangeWidth, true); - - auto CountParamAccesses = [&](auto &Stat) { - if (!AreStatisticsEnabled()) - return; - for (auto &GVS : Index) - for (auto &GV : GVS.second.SummaryList) - if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GV.get())) - Stat += FS->paramAccesses().size(); - }; - - CountParamAccesses(NumCombinedParamAccessesBefore); - + + auto CountParamAccesses = [&](auto &Stat) { + if (!AreStatisticsEnabled()) + return; + for (auto &GVS : Index) + for (auto &GV : GVS.second.SummaryList) + if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GV.get())) + Stat += FS->paramAccesses().size(); + }; + + CountParamAccesses(NumCombinedParamAccessesBefore); + std::map<const FunctionSummary *, FunctionInfo<FunctionSummary>> Functions; // Convert the ModuleSummaryIndex to a FunctionMap for (auto &GVS : Index) { for (auto &GV : GVS.second.SummaryList) { FunctionSummary *FS = dyn_cast<FunctionSummary>(GV.get()); - if (!FS || FS->paramAccesses().empty()) + if (!FS || FS->paramAccesses().empty()) continue; if (FS->isLive() && FS->isDSOLocal()) { FunctionInfo<FunctionSummary> FI; @@ -1035,17 +1035,17 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { US.Range = PS.Use; for (auto &Call : PS.Calls) { assert(!Call.Offsets.isFullSet()); - FunctionSummary *S = - findCalleeFunctionSummary(Call.Callee, FS->modulePath()); - ++NumCombinedCalleeLookupTotal; + FunctionSummary *S = + findCalleeFunctionSummary(Call.Callee, FS->modulePath()); + ++NumCombinedCalleeLookupTotal; if (!S) { - ++NumCombinedCalleeLookupFailed; + ++NumCombinedCalleeLookupFailed; US.Range = FullSet; US.Calls.clear(); break; } - US.Calls.emplace(CallInfo<FunctionSummary>(S, Call.ParamNo), - Call.Offsets); + US.Calls.emplace(CallInfo<FunctionSummary>(S, Call.ParamNo), + Call.Offsets); } } Functions.emplace(FS, std::move(FI)); @@ -1056,16 +1056,16 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { FS->setParamAccesses({}); } } - NumCombinedDataFlowNodes += Functions.size(); + NumCombinedDataFlowNodes += Functions.size(); StackSafetyDataFlowAnalysis<FunctionSummary> SSDFA( FunctionSummary::ParamAccess::RangeWidth, std::move(Functions)); for (auto &KV : SSDFA.run()) { std::vector<FunctionSummary::ParamAccess> NewParams; NewParams.reserve(KV.second.Params.size()); for (auto &Param : KV.second.Params) { - // It's not needed as FullSet is processed the same as a missing value. - if (Param.second.Range.isFullSet()) - continue; + // It's not needed as FullSet is processed the same as a missing value. + if (Param.second.Range.isFullSet()) + continue; NewParams.emplace_back(); FunctionSummary::ParamAccess &New = NewParams.back(); New.ParamNo = Param.first; @@ -1074,8 +1074,8 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { const_cast<FunctionSummary *>(KV.first)->setParamAccesses( std::move(NewParams)); } - - CountParamAccesses(NumCombinedParamAccessesAfter); + + CountParamAccesses(NumCombinedParamAccessesAfter); } static const char LocalPassArg[] = "stack-safety-local"; diff --git a/contrib/libs/llvm12/lib/Analysis/SyncDependenceAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/SyncDependenceAnalysis.cpp index 67a1365b69..f26176a2c5 100644 --- a/contrib/libs/llvm12/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/SyncDependenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===// +//===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -98,7 +98,7 @@ // loop exit and the loop header (_after_ SSA construction). // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/SyncDependenceAnalysis.h" +#include "llvm/Analysis/SyncDependenceAnalysis.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/PostDominators.h" @@ -107,355 +107,355 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include <functional> +#include <functional> #include <stack> #include <unordered_set> #define DEBUG_TYPE "sync-dependence" -// The SDA algorithm operates on a modified CFG - we modify the edges leaving -// loop headers as follows: -// -// * We remove all edges leaving all loop headers. -// * We add additional edges from the loop headers to their exit blocks. -// -// The modification is virtual, that is whenever we visit a loop header we -// pretend it had different successors. -namespace { -using namespace llvm; - -// Custom Post-Order Traveral -// -// We cannot use the vanilla (R)PO computation of LLVM because: -// * We (virtually) modify the CFG. -// * We want a loop-compact block enumeration, that is the numbers assigned by -// the traveral to the blocks of a loop are an interval. -using POCB = std::function<void(const BasicBlock &)>; -using VisitedSet = std::set<const BasicBlock *>; -using BlockStack = std::vector<const BasicBlock *>; - -// forward -static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack, - VisitedSet &Finalized); - -// for a nested region (top-level loop or nested loop) -static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop, - POCB CallBack, VisitedSet &Finalized) { - const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr; - while (!Stack.empty()) { - const auto *NextBB = Stack.back(); - - auto *NestedLoop = LI.getLoopFor(NextBB); - bool IsNestedLoop = NestedLoop != Loop; - - // Treat the loop as a node - if (IsNestedLoop) { - SmallVector<BasicBlock *, 3> NestedExits; - NestedLoop->getUniqueExitBlocks(NestedExits); - bool PushedNodes = false; - for (const auto *NestedExitBB : NestedExits) { - if (NestedExitBB == LoopHeader) - continue; - if (Loop && !Loop->contains(NestedExitBB)) - continue; - if (Finalized.count(NestedExitBB)) - continue; - PushedNodes = true; - Stack.push_back(NestedExitBB); - } - if (!PushedNodes) { - // All loop exits finalized -> finish this node - Stack.pop_back(); - computeLoopPO(LI, *NestedLoop, CallBack, Finalized); - } - continue; - } - - // DAG-style - bool PushedNodes = false; - for (const auto *SuccBB : successors(NextBB)) { - if (SuccBB == LoopHeader) - continue; - if (Loop && !Loop->contains(SuccBB)) - continue; - if (Finalized.count(SuccBB)) - continue; - PushedNodes = true; - Stack.push_back(SuccBB); - } - if (!PushedNodes) { - // Never push nodes twice - Stack.pop_back(); - if (!Finalized.insert(NextBB).second) - continue; - CallBack(*NextBB); - } - } -} - -static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) { - VisitedSet Finalized; - BlockStack Stack; - Stack.reserve(24); // FIXME made-up number - Stack.push_back(&F.getEntryBlock()); - computeStackPO(Stack, LI, nullptr, CallBack, Finalized); -} - -static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack, - VisitedSet &Finalized) { - /// Call CallBack on all loop blocks. - std::vector<const BasicBlock *> Stack; - const auto *LoopHeader = Loop.getHeader(); - - // Visit the header last - Finalized.insert(LoopHeader); - CallBack(*LoopHeader); - - // Initialize with immediate successors - for (const auto *BB : successors(LoopHeader)) { - if (!Loop.contains(BB)) - continue; - if (BB == LoopHeader) - continue; - Stack.push_back(BB); - } - - // Compute PO inside region - computeStackPO(Stack, LI, &Loop, CallBack, Finalized); -} - -} // namespace - +// The SDA algorithm operates on a modified CFG - we modify the edges leaving +// loop headers as follows: +// +// * We remove all edges leaving all loop headers. +// * We add additional edges from the loop headers to their exit blocks. +// +// The modification is virtual, that is whenever we visit a loop header we +// pretend it had different successors. +namespace { +using namespace llvm; + +// Custom Post-Order Traveral +// +// We cannot use the vanilla (R)PO computation of LLVM because: +// * We (virtually) modify the CFG. +// * We want a loop-compact block enumeration, that is the numbers assigned by +// the traveral to the blocks of a loop are an interval. +using POCB = std::function<void(const BasicBlock &)>; +using VisitedSet = std::set<const BasicBlock *>; +using BlockStack = std::vector<const BasicBlock *>; + +// forward +static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack, + VisitedSet &Finalized); + +// for a nested region (top-level loop or nested loop) +static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop, + POCB CallBack, VisitedSet &Finalized) { + const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr; + while (!Stack.empty()) { + const auto *NextBB = Stack.back(); + + auto *NestedLoop = LI.getLoopFor(NextBB); + bool IsNestedLoop = NestedLoop != Loop; + + // Treat the loop as a node + if (IsNestedLoop) { + SmallVector<BasicBlock *, 3> NestedExits; + NestedLoop->getUniqueExitBlocks(NestedExits); + bool PushedNodes = false; + for (const auto *NestedExitBB : NestedExits) { + if (NestedExitBB == LoopHeader) + continue; + if (Loop && !Loop->contains(NestedExitBB)) + continue; + if (Finalized.count(NestedExitBB)) + continue; + PushedNodes = true; + Stack.push_back(NestedExitBB); + } + if (!PushedNodes) { + // All loop exits finalized -> finish this node + Stack.pop_back(); + computeLoopPO(LI, *NestedLoop, CallBack, Finalized); + } + continue; + } + + // DAG-style + bool PushedNodes = false; + for (const auto *SuccBB : successors(NextBB)) { + if (SuccBB == LoopHeader) + continue; + if (Loop && !Loop->contains(SuccBB)) + continue; + if (Finalized.count(SuccBB)) + continue; + PushedNodes = true; + Stack.push_back(SuccBB); + } + if (!PushedNodes) { + // Never push nodes twice + Stack.pop_back(); + if (!Finalized.insert(NextBB).second) + continue; + CallBack(*NextBB); + } + } +} + +static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) { + VisitedSet Finalized; + BlockStack Stack; + Stack.reserve(24); // FIXME made-up number + Stack.push_back(&F.getEntryBlock()); + computeStackPO(Stack, LI, nullptr, CallBack, Finalized); +} + +static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack, + VisitedSet &Finalized) { + /// Call CallBack on all loop blocks. + std::vector<const BasicBlock *> Stack; + const auto *LoopHeader = Loop.getHeader(); + + // Visit the header last + Finalized.insert(LoopHeader); + CallBack(*LoopHeader); + + // Initialize with immediate successors + for (const auto *BB : successors(LoopHeader)) { + if (!Loop.contains(BB)) + continue; + if (BB == LoopHeader) + continue; + Stack.push_back(BB); + } + + // Compute PO inside region + computeStackPO(Stack, LI, &Loop, CallBack, Finalized); +} + +} // namespace + namespace llvm { -ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc; +ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc; SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI) - : DT(DT), PDT(PDT), LI(LI) { - computeTopLevelPO(*DT.getRoot()->getParent(), LI, - [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); }); -} + : DT(DT), PDT(PDT), LI(LI) { + computeTopLevelPO(*DT.getRoot()->getParent(), LI, + [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); }); +} SyncDependenceAnalysis::~SyncDependenceAnalysis() {} // divergence propagator for reducible CFGs struct DivergencePropagator { - const ModifiedPO &LoopPOT; + const ModifiedPO &LoopPOT; const DominatorTree &DT; const PostDominatorTree &PDT; const LoopInfo &LI; - const BasicBlock &DivTermBlock; - - // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at - // block B - // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet - // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths - // from X or B is an immediate successor of X (initial value). - using BlockLabelVec = std::vector<const BasicBlock *>; - BlockLabelVec BlockLabels; - // divergent join and loop exit descriptor. - std::unique_ptr<ControlDivergenceDesc> DivDesc; - - DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT, - const PostDominatorTree &PDT, const LoopInfo &LI, - const BasicBlock &DivTermBlock) - : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock), - BlockLabels(LoopPOT.size(), nullptr), - DivDesc(new ControlDivergenceDesc) {} + const BasicBlock &DivTermBlock; + + // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at + // block B + // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet + // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths + // from X or B is an immediate successor of X (initial value). + using BlockLabelVec = std::vector<const BasicBlock *>; + BlockLabelVec BlockLabels; + // divergent join and loop exit descriptor. + std::unique_ptr<ControlDivergenceDesc> DivDesc; + + DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT, + const PostDominatorTree &PDT, const LoopInfo &LI, + const BasicBlock &DivTermBlock) + : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock), + BlockLabels(LoopPOT.size(), nullptr), + DivDesc(new ControlDivergenceDesc) {} void printDefs(raw_ostream &Out) { - Out << "Propagator::BlockLabels {\n"; - for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) { - const auto *Label = BlockLabels[BlockIdx]; - Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx - << ") : "; - if (!Label) { - Out << "<null>\n"; + Out << "Propagator::BlockLabels {\n"; + for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) { + const auto *Label = BlockLabels[BlockIdx]; + Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx + << ") : "; + if (!Label) { + Out << "<null>\n"; } else { - Out << Label->getName() << "\n"; + Out << Label->getName() << "\n"; } } Out << "}\n"; } - // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this - // causes a divergent join. - bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) { - auto SuccIdx = LoopPOT.getIndexOf(SuccBlock); + // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this + // causes a divergent join. + bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) { + auto SuccIdx = LoopPOT.getIndexOf(SuccBlock); - // unset or same reaching label - const auto *OldLabel = BlockLabels[SuccIdx]; - if (!OldLabel || (OldLabel == &PushedLabel)) { - BlockLabels[SuccIdx] = &PushedLabel; - return false; + // unset or same reaching label + const auto *OldLabel = BlockLabels[SuccIdx]; + if (!OldLabel || (OldLabel == &PushedLabel)) { + BlockLabels[SuccIdx] = &PushedLabel; + return false; } - // Update the definition - BlockLabels[SuccIdx] = &SuccBlock; - return true; - } - - // visiting a virtual loop exit edge from the loop header --> temporal - // divergence on join - bool visitLoopExitEdge(const BasicBlock &ExitBlock, - const BasicBlock &DefBlock, bool FromParentLoop) { - // Pushing from a non-parent loop cannot cause temporal divergence. - if (!FromParentLoop) - return visitEdge(ExitBlock, DefBlock); - - if (!computeJoin(ExitBlock, DefBlock)) - return false; - - // Identified a divergent loop exit - DivDesc->LoopDivBlocks.insert(&ExitBlock); - LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName() - << "\n"); - return true; + // Update the definition + BlockLabels[SuccIdx] = &SuccBlock; + return true; + } + + // visiting a virtual loop exit edge from the loop header --> temporal + // divergence on join + bool visitLoopExitEdge(const BasicBlock &ExitBlock, + const BasicBlock &DefBlock, bool FromParentLoop) { + // Pushing from a non-parent loop cannot cause temporal divergence. + if (!FromParentLoop) + return visitEdge(ExitBlock, DefBlock); + + if (!computeJoin(ExitBlock, DefBlock)) + return false; + + // Identified a divergent loop exit + DivDesc->LoopDivBlocks.insert(&ExitBlock); + LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName() + << "\n"); + return true; } - // process \p SuccBlock with reaching definition \p DefBlock - bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) { - if (!computeJoin(SuccBlock, DefBlock)) - return false; - - // Divergent, disjoint paths join. - DivDesc->JoinDivBlocks.insert(&SuccBlock); - LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName()); - return true; - } - - std::unique_ptr<ControlDivergenceDesc> computeJoinPoints() { - assert(DivDesc); - - LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName() - << "\n"); - - const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock); - - // Early stopping criterion - int FloorIdx = LoopPOT.size() - 1; - const BasicBlock *FloorLabel = nullptr; - + // process \p SuccBlock with reaching definition \p DefBlock + bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) { + if (!computeJoin(SuccBlock, DefBlock)) + return false; + + // Divergent, disjoint paths join. + DivDesc->JoinDivBlocks.insert(&SuccBlock); + LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName()); + return true; + } + + std::unique_ptr<ControlDivergenceDesc> computeJoinPoints() { + assert(DivDesc); + + LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName() + << "\n"); + + const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock); + + // Early stopping criterion + int FloorIdx = LoopPOT.size() - 1; + const BasicBlock *FloorLabel = nullptr; + // bootstrap with branch targets - int BlockIdx = 0; - - for (const auto *SuccBlock : successors(&DivTermBlock)) { - auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock); - BlockLabels[SuccIdx] = SuccBlock; - - // Find the successor with the highest index to start with - BlockIdx = std::max<int>(BlockIdx, SuccIdx); - FloorIdx = std::min<int>(FloorIdx, SuccIdx); - - // Identify immediate divergent loop exits - if (!DivBlockLoop) - continue; - - const auto *BlockLoop = LI.getLoopFor(SuccBlock); - if (BlockLoop && DivBlockLoop->contains(BlockLoop)) - continue; - DivDesc->LoopDivBlocks.insert(SuccBlock); - LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: " - << SuccBlock->getName() << "\n"); + int BlockIdx = 0; + + for (const auto *SuccBlock : successors(&DivTermBlock)) { + auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock); + BlockLabels[SuccIdx] = SuccBlock; + + // Find the successor with the highest index to start with + BlockIdx = std::max<int>(BlockIdx, SuccIdx); + FloorIdx = std::min<int>(FloorIdx, SuccIdx); + + // Identify immediate divergent loop exits + if (!DivBlockLoop) + continue; + + const auto *BlockLoop = LI.getLoopFor(SuccBlock); + if (BlockLoop && DivBlockLoop->contains(BlockLoop)) + continue; + DivDesc->LoopDivBlocks.insert(SuccBlock); + LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: " + << SuccBlock->getName() << "\n"); } // propagate definitions at the immediate successors of the node in RPO - for (; BlockIdx >= FloorIdx; --BlockIdx) { - LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs())); + for (; BlockIdx >= FloorIdx; --BlockIdx) { + LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs())); - // Any label available here - const auto *Label = BlockLabels[BlockIdx]; - if (!Label) + // Any label available here + const auto *Label = BlockLabels[BlockIdx]; + if (!Label) continue; - // Ok. Get the block - const auto *Block = LoopPOT.getBlockAt(BlockIdx); - LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n"); + // Ok. Get the block + const auto *Block = LoopPOT.getBlockAt(BlockIdx); + LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n"); auto *BlockLoop = LI.getLoopFor(Block); - bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block; - bool CausedJoin = false; - int LoweredFloorIdx = FloorIdx; - if (IsLoopHeader) { - // Disconnect from immediate successors and propagate directly to loop - // exits. + bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block; + bool CausedJoin = false; + int LoweredFloorIdx = FloorIdx; + if (IsLoopHeader) { + // Disconnect from immediate successors and propagate directly to loop + // exits. SmallVector<BasicBlock *, 4> BlockLoopExits; BlockLoop->getExitBlocks(BlockLoopExits); - - bool IsParentLoop = BlockLoop->contains(&DivTermBlock); + + bool IsParentLoop = BlockLoop->contains(&DivTermBlock); for (const auto *BlockLoopExit : BlockLoopExits) { - CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop); - LoweredFloorIdx = std::min<int>(LoweredFloorIdx, - LoopPOT.getIndexOf(*BlockLoopExit)); + CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop); + LoweredFloorIdx = std::min<int>(LoweredFloorIdx, + LoopPOT.getIndexOf(*BlockLoopExit)); } } else { - // Acyclic successor case + // Acyclic successor case for (const auto *SuccBlock : successors(Block)) { - CausedJoin |= visitEdge(*SuccBlock, *Label); - LoweredFloorIdx = - std::min<int>(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock)); + CausedJoin |= visitEdge(*SuccBlock, *Label); + LoweredFloorIdx = + std::min<int>(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock)); } } - - // Floor update - if (CausedJoin) { - // 1. Different labels pushed to successors - FloorIdx = LoweredFloorIdx; - } else if (FloorLabel != Label) { - // 2. No join caused BUT we pushed a label that is different than the - // last pushed label - FloorIdx = LoweredFloorIdx; - FloorLabel = Label; - } + + // Floor update + if (CausedJoin) { + // 1. Different labels pushed to successors + FloorIdx = LoweredFloorIdx; + } else if (FloorLabel != Label) { + // 2. No join caused BUT we pushed a label that is different than the + // last pushed label + FloorIdx = LoweredFloorIdx; + FloorLabel = Label; + } } LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs())); - return std::move(DivDesc); + return std::move(DivDesc); } }; -#ifndef NDEBUG -static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) { - Out << "["; - bool First = true; - for (const auto *BB : Blocks) { - if (!First) - Out << ", "; - First = false; - Out << BB->getName(); +#ifndef NDEBUG +static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) { + Out << "["; + bool First = true; + for (const auto *BB : Blocks) { + if (!First) + Out << ", "; + First = false; + Out << BB->getName(); } - Out << "]"; + Out << "]"; } -#endif +#endif -const ControlDivergenceDesc & -SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) { +const ControlDivergenceDesc & +SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) { // trivial case - if (Term.getNumSuccessors() <= 1) { - return EmptyDivergenceDesc; + if (Term.getNumSuccessors() <= 1) { + return EmptyDivergenceDesc; } // already available in cache? - auto ItCached = CachedControlDivDescs.find(&Term); - if (ItCached != CachedControlDivDescs.end()) + auto ItCached = CachedControlDivDescs.find(&Term); + if (ItCached != CachedControlDivDescs.end()) return *ItCached->second; // compute all join points - // Special handling of divergent loop exits is not needed for LCSSA + // Special handling of divergent loop exits is not needed for LCSSA const auto &TermBlock = *Term.getParent(); - DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock); - auto DivDesc = Propagator.computeJoinPoints(); - - LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n"; - dbgs() << "JoinDivBlocks: "; - printBlockSet(DivDesc->JoinDivBlocks, dbgs()); - dbgs() << "\nLoopDivBlocks: "; - printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";); - - auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc)); + DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock); + auto DivDesc = Propagator.computeJoinPoints(); + + LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n"; + dbgs() << "JoinDivBlocks: "; + printBlockSet(DivDesc->JoinDivBlocks, dbgs()); + dbgs() << "\nLoopDivBlocks: "; + printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";); + + auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc)); assert(ItInserted.second); return *ItInserted.first->second; } diff --git a/contrib/libs/llvm12/lib/Analysis/TFUtils.cpp b/contrib/libs/llvm12/lib/Analysis/TFUtils.cpp index 3f26bdfdc0..9d4859ab85 100644 --- a/contrib/libs/llvm12/lib/Analysis/TFUtils.cpp +++ b/contrib/libs/llvm12/lib/Analysis/TFUtils.cpp @@ -10,23 +10,23 @@ // This file implements utilities for interfacing with tensorflow C APIs. // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#if defined(LLVM_HAVE_TF_API) +#include "llvm/Config/config.h" +#if defined(LLVM_HAVE_TF_API) -#include "llvm/ADT/Twine.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/Utils/TFUtils.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/JSON.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#error #include "tensorflow/c/c_api.h" -#error #include "tensorflow/c/c_api_experimental.h" +#error #include "tensorflow/c/c_api.h" +#error #include "tensorflow/c/c_api_experimental.h" #include <cassert> -#include <numeric> +#include <numeric> using namespace llvm; @@ -64,89 +64,89 @@ TFStatusPtr createTFStatus() { TFSessionOptionsPtr createTFSessionOptions() { return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); } - -/// Write the values of one tensor as a list. -template <typename T> -void writeTensorValues(raw_ostream &OutFile, const char *TensorData, - size_t ElemCount) { - OutFile << "["; - const T *TypedData = reinterpret_cast<const T *>(TensorData); - for (size_t I = 0; I < ElemCount; ++I) { - if (I > 0) - OutFile << ", "; - OutFile << TypedData[I]; - } - OutFile << "]"; -} - -/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. -/// The tensors are assumed to be stored contiguously, in row-major format, -/// in the TensorData buffer. Each tensor has the shape given by Spec. The -/// feature name in the output is either the provided LoggingName, if -/// specified, otherwise it's the name of the tensor (as given by Spec). -void writeRawTensorsAsFeatureLists(raw_ostream &OutFile, - const LoggedFeatureSpec &LoggedSpec, - const char *TensorData, size_t TensorCount, - bool FinalReward = false) { - const char *FieldName = "<invalid>"; - std::function<void(const char *)> ValueWriter; - const auto &Spec = LoggedSpec.Spec; - // The 'Feature' protobuf only has 3 possible fields: float_list, - // int64_list, or bytes_list, so we capture int32 values as int64. We don't - // support any other types. - if (Spec.isElementType<int64_t>()) { - FieldName = "int64_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount()); - }; - } else if (Spec.isElementType<int32_t>()) { - FieldName = "int64_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount()); - }; - - } else if (Spec.isElementType<float>()) { - FieldName = "float_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues<float>(OutFile, Data, Spec.getElementCount()); - }; - - } else { - llvm_unreachable("Unsupported tensor type."); - } - - OutFile << " feature_list: {\n"; - OutFile << " key: " - << "\"" - << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name()) - << "\" "; - OutFile << "value: {\n"; - size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); - - auto WriteFeatureProto = [&](const char *P) { - OutFile << " feature: { " << FieldName << ": { value: "; - ValueWriter(P); - OutFile << " } }\n"; - }; - - const char *CurrentTensor = TensorData; - static int64_t Zero = 0; - // Write all but the last value. If this is the final reward, don't increment - // the CurrentTensor, and just write 0. - for (size_t I = 0; I < TensorCount - 1; ++I) { - if (FinalReward) - WriteFeatureProto(reinterpret_cast<const char *>(&Zero)); - else { - WriteFeatureProto(CurrentTensor); - CurrentTensor += TensorByteSize; - } - } - - WriteFeatureProto(CurrentTensor); - - OutFile << " }\n"; - OutFile << " }\n"; -} + +/// Write the values of one tensor as a list. +template <typename T> +void writeTensorValues(raw_ostream &OutFile, const char *TensorData, + size_t ElemCount) { + OutFile << "["; + const T *TypedData = reinterpret_cast<const T *>(TensorData); + for (size_t I = 0; I < ElemCount; ++I) { + if (I > 0) + OutFile << ", "; + OutFile << TypedData[I]; + } + OutFile << "]"; +} + +/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. +/// The tensors are assumed to be stored contiguously, in row-major format, +/// in the TensorData buffer. Each tensor has the shape given by Spec. The +/// feature name in the output is either the provided LoggingName, if +/// specified, otherwise it's the name of the tensor (as given by Spec). +void writeRawTensorsAsFeatureLists(raw_ostream &OutFile, + const LoggedFeatureSpec &LoggedSpec, + const char *TensorData, size_t TensorCount, + bool FinalReward = false) { + const char *FieldName = "<invalid>"; + std::function<void(const char *)> ValueWriter; + const auto &Spec = LoggedSpec.Spec; + // The 'Feature' protobuf only has 3 possible fields: float_list, + // int64_list, or bytes_list, so we capture int32 values as int64. We don't + // support any other types. + if (Spec.isElementType<int64_t>()) { + FieldName = "int64_list"; + ValueWriter = [&](const char *Data) { + writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount()); + }; + } else if (Spec.isElementType<int32_t>()) { + FieldName = "int64_list"; + ValueWriter = [&](const char *Data) { + writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount()); + }; + + } else if (Spec.isElementType<float>()) { + FieldName = "float_list"; + ValueWriter = [&](const char *Data) { + writeTensorValues<float>(OutFile, Data, Spec.getElementCount()); + }; + + } else { + llvm_unreachable("Unsupported tensor type."); + } + + OutFile << " feature_list: {\n"; + OutFile << " key: " + << "\"" + << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name()) + << "\" "; + OutFile << "value: {\n"; + size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); + + auto WriteFeatureProto = [&](const char *P) { + OutFile << " feature: { " << FieldName << ": { value: "; + ValueWriter(P); + OutFile << " } }\n"; + }; + + const char *CurrentTensor = TensorData; + static int64_t Zero = 0; + // Write all but the last value. If this is the final reward, don't increment + // the CurrentTensor, and just write 0. + for (size_t I = 0; I < TensorCount - 1; ++I) { + if (FinalReward) + WriteFeatureProto(reinterpret_cast<const char *>(&Zero)); + else { + WriteFeatureProto(CurrentTensor); + CurrentTensor += TensorByteSize; + } + } + + WriteFeatureProto(CurrentTensor); + + OutFile << " }\n"; + OutFile << " }\n"; +} } // namespace namespace llvm { @@ -170,122 +170,122 @@ private: std::vector<TF_Tensor *> Output; }; -size_t TensorSpec::getElementByteSize() const { - return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex)); -} - -TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex, - const std::vector<int64_t> &Shape) - : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape), - ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1, - std::multiplies<int64_t>())) {} - -Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx, - const json::Value &Value) { - auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> { - std::string S; - llvm::raw_string_ostream OS(S); - OS << Value; - Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S); - return None; - }; - // FIXME: accept a Path as a parameter, and use it for error reporting. - json::Path::Root Root("tensor_spec"); - json::ObjectMapper Mapper(Value, Root); - if (!Mapper) - return EmitError("Value is not a dict"); - - std::string TensorName; - int TensorPort = -1; - std::string TensorType; - std::vector<int64_t> TensorShape; - - if (!Mapper.map<std::string>("name", TensorName)) - return EmitError("'name' property not present or not a string"); - if (!Mapper.map<std::string>("type", TensorType)) - return EmitError("'type' property not present or not a string"); - if (!Mapper.map<int>("port", TensorPort)) - return EmitError("'port' property not present or not an int"); - if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape)) - return EmitError("'shape' property not present or not an int array"); - -#define PARSE_TYPE(T, E) \ - if (TensorType == #T) \ - return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort); - TFUTILS_SUPPORTED_TYPES(PARSE_TYPE) -#undef PARSE_TYPE - return None; -} - -Optional<std::vector<LoggedFeatureSpec>> -loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, - StringRef ModelPath, StringRef SpecFileOverride) { - SmallVector<char, 128> OutputSpecsPath; - StringRef FileName = SpecFileOverride; - if (FileName.empty()) { - llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json"); - FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()}; - } - - auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName); - if (!BufferOrError) { - Ctx.emitError("Error opening output specs file: " + FileName + " : " + - BufferOrError.getError().message()); - return None; - } - auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer()); - if (!ParsedJSONValues) { - Ctx.emitError("Could not parse specs file: " + FileName); - return None; - } - auto ValuesArray = ParsedJSONValues->getAsArray(); - if (!ValuesArray) { - Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, " - "logging_name:<name>} dictionaries"); - return None; - } - std::vector<LoggedFeatureSpec> Ret; - for (const auto &Value : *ValuesArray) - if (const auto *Obj = Value.getAsObject()) - if (const auto *SpecPart = Obj->get("tensor_spec")) - if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart)) - if (auto LoggingName = Obj->getString("logging_name")) { - if (!TensorSpec->isElementType<int64_t>() && - !TensorSpec->isElementType<int32_t>() && - !TensorSpec->isElementType<float>()) { - Ctx.emitError( - "Only int64, int32, and float tensors are supported. " - "Found unsupported type for tensor named " + - TensorSpec->name()); - return None; - } - Ret.push_back({*TensorSpec, LoggingName->str()}); - } - - if (ValuesArray->size() != Ret.size()) { - Ctx.emitError( - "Unable to parse output spec. It should be a json file containing an " - "array of dictionaries. Each dictionary must have a 'tensor_spec' key, " - "with a json object describing a TensorSpec; and a 'logging_name' key, " - "which is a string to use as name when logging this tensor in the " - "training log."); - return None; - } - if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) { - Ctx.emitError("The first output spec must describe the decision tensor, " - "and must have the logging_name " + - StringRef(ExpectedDecisionName)); - return None; - } - return Ret; -} - +size_t TensorSpec::getElementByteSize() const { + return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex)); +} + +TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex, + const std::vector<int64_t> &Shape) + : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape), + ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1, + std::multiplies<int64_t>())) {} + +Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx, + const json::Value &Value) { + auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> { + std::string S; + llvm::raw_string_ostream OS(S); + OS << Value; + Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S); + return None; + }; + // FIXME: accept a Path as a parameter, and use it for error reporting. + json::Path::Root Root("tensor_spec"); + json::ObjectMapper Mapper(Value, Root); + if (!Mapper) + return EmitError("Value is not a dict"); + + std::string TensorName; + int TensorPort = -1; + std::string TensorType; + std::vector<int64_t> TensorShape; + + if (!Mapper.map<std::string>("name", TensorName)) + return EmitError("'name' property not present or not a string"); + if (!Mapper.map<std::string>("type", TensorType)) + return EmitError("'type' property not present or not a string"); + if (!Mapper.map<int>("port", TensorPort)) + return EmitError("'port' property not present or not an int"); + if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape)) + return EmitError("'shape' property not present or not an int array"); + +#define PARSE_TYPE(T, E) \ + if (TensorType == #T) \ + return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort); + TFUTILS_SUPPORTED_TYPES(PARSE_TYPE) +#undef PARSE_TYPE + return None; +} + +Optional<std::vector<LoggedFeatureSpec>> +loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, + StringRef ModelPath, StringRef SpecFileOverride) { + SmallVector<char, 128> OutputSpecsPath; + StringRef FileName = SpecFileOverride; + if (FileName.empty()) { + llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json"); + FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()}; + } + + auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName); + if (!BufferOrError) { + Ctx.emitError("Error opening output specs file: " + FileName + " : " + + BufferOrError.getError().message()); + return None; + } + auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer()); + if (!ParsedJSONValues) { + Ctx.emitError("Could not parse specs file: " + FileName); + return None; + } + auto ValuesArray = ParsedJSONValues->getAsArray(); + if (!ValuesArray) { + Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, " + "logging_name:<name>} dictionaries"); + return None; + } + std::vector<LoggedFeatureSpec> Ret; + for (const auto &Value : *ValuesArray) + if (const auto *Obj = Value.getAsObject()) + if (const auto *SpecPart = Obj->get("tensor_spec")) + if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart)) + if (auto LoggingName = Obj->getString("logging_name")) { + if (!TensorSpec->isElementType<int64_t>() && + !TensorSpec->isElementType<int32_t>() && + !TensorSpec->isElementType<float>()) { + Ctx.emitError( + "Only int64, int32, and float tensors are supported. " + "Found unsupported type for tensor named " + + TensorSpec->name()); + return None; + } + Ret.push_back({*TensorSpec, LoggingName->str()}); + } + + if (ValuesArray->size() != Ret.size()) { + Ctx.emitError( + "Unable to parse output spec. It should be a json file containing an " + "array of dictionaries. Each dictionary must have a 'tensor_spec' key, " + "with a json object describing a TensorSpec; and a 'logging_name' key, " + "which is a string to use as name when logging this tensor in the " + "training log."); + return None; + } + if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) { + Ctx.emitError("The first output spec must describe the decision tensor, " + "and must have the logging_name " + + StringRef(ExpectedDecisionName)); + return None; + } + return Ret; +} + class TFModelEvaluatorImpl { public: TFModelEvaluatorImpl(StringRef SavedModelPath, - const std::vector<TensorSpec> &InputSpecs, - function_ref<TensorSpec(size_t)> GetOutputSpecs, - size_t OutputSpecsSize, const char *Tags); + const std::vector<TensorSpec> &InputSpecs, + function_ref<TensorSpec(size_t)> GetOutputSpecs, + size_t OutputSpecsSize, const char *Tags); bool isValid() const { return IsValid; } size_t OutputSize() const { return OutputFeed.size(); } @@ -329,18 +329,18 @@ private: /// Reusable utility for ensuring we can bind the requested Name to a node in /// the SavedModel Graph. - bool checkReportAndInvalidate(const TF_Output &Output, - const TensorSpec &OutputSpec); + bool checkReportAndInvalidate(const TF_Output &Output, + const TensorSpec &OutputSpec); }; } // namespace llvm TFModelEvaluatorImpl::TFModelEvaluatorImpl( - StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, - function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, - const char *Tags = "serve") + StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, + function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, + const char *Tags = "serve") : Graph(createTFGraph()), Options(createTFSessionOptions()), - InputFeed(InputSpecs.size()), Input(InputSpecs.size()), - OutputFeed(OutputSpecsSize) { + InputFeed(InputSpecs.size()), Input(InputSpecs.size()), + OutputFeed(OutputSpecsSize) { if (!ensureInitTF()) { errs() << "Tensorflow should have been initialized"; return; @@ -354,44 +354,44 @@ TFModelEvaluatorImpl::TFModelEvaluatorImpl( errs() << TF_Message(Status.get()); invalidate(); } - for (size_t I = 0; I < InputSpecs.size(); ++I) { - auto &InputSpec = InputSpecs[I]; + for (size_t I = 0; I < InputSpecs.size(); ++I) { + auto &InputSpec = InputSpecs[I]; InputFeed[I] = { - TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()), - InputSpec.port()}; - if (!checkReportAndInvalidate(InputFeed[I], InputSpec)) + TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()), + InputSpec.port()}; + if (!checkReportAndInvalidate(InputFeed[I], InputSpec)) return; - initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()), - InputSpec.shape()); + initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()), + InputSpec.shape()); } - for (size_t I = 0; I < OutputSpecsSize; ++I) { - auto OutputSpec = GetOutputSpecs(I); + for (size_t I = 0; I < OutputSpecsSize; ++I) { + auto OutputSpec = GetOutputSpecs(I); OutputFeed[I] = { - TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()), - OutputSpec.port()}; - if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec)) + TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()), + OutputSpec.port()}; + if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec)) return; } } -TFModelEvaluator::TFModelEvaluator( - StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, - function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, - const char *Tags) - : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs, - OutputSpecsSize, Tags)) { +TFModelEvaluator::TFModelEvaluator( + StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, + function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, + const char *Tags) + : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs, + OutputSpecsSize, Tags)) { if (!Impl->isValid()) Impl.reset(); } -TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, - const std::vector<TensorSpec> &InputSpecs, - const std::vector<TensorSpec> &OutputSpecs, - const char *Tags) - : TFModelEvaluator( - SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; }, - OutputSpecs.size(), Tags) {} - +TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, + const std::vector<TensorSpec> &InputSpecs, + const std::vector<TensorSpec> &OutputSpecs, + const char *Tags) + : TFModelEvaluator( + SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; }, + OutputSpecs.size(), Tags) {} + TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { for (auto *T : Input) { TF_DeleteTensor(T); @@ -405,11 +405,11 @@ TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { errs() << "Could not delete TF session"; } -bool TFModelEvaluatorImpl::checkReportAndInvalidate( - const TF_Output &Output, const TensorSpec &OutputSpec) { +bool TFModelEvaluatorImpl::checkReportAndInvalidate( + const TF_Output &Output, const TensorSpec &OutputSpec) { if (Output.oper) return true; - errs() << "Could not find TF_Output named: " + OutputSpec.name(); + errs() << "Could not find TF_Output named: " + OutputSpec.name(); IsValid = false; return IsValid; } @@ -451,55 +451,55 @@ TFModelEvaluator::EvaluationResult::EvaluationResult( TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) : Impl(std::move(Other.Impl)) {} -TFModelEvaluator::EvaluationResult & -TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) { - Impl = std::move(Other.Impl); - return *this; -} - +TFModelEvaluator::EvaluationResult & +TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) { + Impl = std::move(Other.Impl); + return *this; +} + void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { return TF_TensorData(Impl->getOutput()[Index]); } -const void * -TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const { - return TF_TensorData(Impl->getOutput()[Index]); +const void * +TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const { + return TF_TensorData(Impl->getOutput()[Index]); } -#define TFUTILS_GETDATATYPE_IMPL(T, E) \ - template <> int TensorSpec::getDataType<T>() { return E; } - -TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL) - -#undef TFUTILS_GETDATATYPE_IMPL - -TFModelEvaluator::EvaluationResult::~EvaluationResult() {} -TFModelEvaluator::~TFModelEvaluator() {} - -void Logger::print(raw_ostream &OS) { - if (RawLogData.empty()) - return; - if (RawLogData[0].empty()) - return; - size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() * - FeatureSpecs[0].Spec.getElementByteSize(); - size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size; - if (NumberOfRecords == 0) - return; - size_t RewardSize = - RewardSpec.getElementCount() * RewardSpec.getElementByteSize(); - size_t NumberOfRewards = RawLogData.back().size() / RewardSize; - - OS << "feature_lists: {\n"; - for (size_t I = 0; I < FeatureSpecs.size(); ++I) - writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(), - NumberOfRecords); - - if (IncludeReward) - writeRawTensorsAsFeatureLists(OS, {RewardSpec, None}, - RawLogData.back().data(), NumberOfRecords, - NumberOfRewards == 1); - - OS << "}\n"; +#define TFUTILS_GETDATATYPE_IMPL(T, E) \ + template <> int TensorSpec::getDataType<T>() { return E; } + +TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL) + +#undef TFUTILS_GETDATATYPE_IMPL + +TFModelEvaluator::EvaluationResult::~EvaluationResult() {} +TFModelEvaluator::~TFModelEvaluator() {} + +void Logger::print(raw_ostream &OS) { + if (RawLogData.empty()) + return; + if (RawLogData[0].empty()) + return; + size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() * + FeatureSpecs[0].Spec.getElementByteSize(); + size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size; + if (NumberOfRecords == 0) + return; + size_t RewardSize = + RewardSpec.getElementCount() * RewardSpec.getElementByteSize(); + size_t NumberOfRewards = RawLogData.back().size() / RewardSize; + + OS << "feature_lists: {\n"; + for (size_t I = 0; I < FeatureSpecs.size(); ++I) + writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(), + NumberOfRecords); + + if (IncludeReward) + writeRawTensorsAsFeatureLists(OS, {RewardSpec, None}, + RawLogData.back().data(), NumberOfRecords, + NumberOfRewards == 1); + + OS << "}\n"; } -#endif // defined(LLVM_HAVE_TF_API) +#endif // defined(LLVM_HAVE_TF_API) diff --git a/contrib/libs/llvm12/lib/Analysis/TargetLibraryInfo.cpp b/contrib/libs/llvm12/lib/Analysis/TargetLibraryInfo.cpp index a4de21a254..948672e294 100644 --- a/contrib/libs/llvm12/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/libs/llvm12/lib/Analysis/TargetLibraryInfo.cpp @@ -24,8 +24,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary( "No vector functions library"), clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", "Accelerate framework"), - clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86", - "GLIBC Vector Math library"), + clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86", + "GLIBC Vector Math library"), clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV", "IBM MASS vector library"), clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", @@ -551,14 +551,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_nvvm_reflect); } - // These vec_malloc/free routines are only available on AIX. - if (!T.isOSAIX()) { - TLI.setUnavailable(LibFunc_vec_calloc); - TLI.setUnavailable(LibFunc_vec_malloc); - TLI.setUnavailable(LibFunc_vec_realloc); - TLI.setUnavailable(LibFunc_vec_free); - } - + // These vec_malloc/free routines are only available on AIX. + if (!T.isOSAIX()) { + TLI.setUnavailable(LibFunc_vec_calloc); + TLI.setUnavailable(LibFunc_vec_malloc); + TLI.setUnavailable(LibFunc_vec_realloc); + TLI.setUnavailable(LibFunc_vec_free); + } + TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary); } @@ -646,7 +646,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, const DataLayout *DL) const { LLVMContext &Ctx = FTy.getContext(); Type *PCharTy = Type::getInt8PtrTy(Ctx); - Type *SizeTTy = DL ? DL->getIntPtrType(Ctx, /*AddressSpace=*/0) : nullptr; + Type *SizeTTy = DL ? DL->getIntPtrType(Ctx, /*AddressSpace=*/0) : nullptr; auto IsSizeTTy = [SizeTTy](Type *Ty) { return SizeTTy ? Ty == SizeTTy : Ty->isIntegerTy(); }; @@ -839,7 +839,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_system: return (NumParams == 1 && FTy.getParamType(0)->isPointerTy()); case LibFunc_malloc: - case LibFunc_vec_malloc: + case LibFunc_vec_malloc: return (NumParams == 1 && FTy.getReturnType()->isPointerTy()); case LibFunc_memcmp: return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) && @@ -858,7 +858,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy()); case LibFunc_memcpy_chk: - case LibFunc_mempcpy_chk: + case LibFunc_mempcpy_chk: case LibFunc_memmove_chk: --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) @@ -894,7 +894,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (FTy.getReturnType()->isPointerTy()); case LibFunc_realloc: case LibFunc_reallocf: - case LibFunc_vec_realloc: + case LibFunc_vec_realloc: return (NumParams == 2 && FTy.getReturnType() == PCharTy && FTy.getParamType(0) == FTy.getReturnType() && IsSizeTTy(FTy.getParamType(1))); @@ -922,7 +922,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_bzero: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy()); case LibFunc_calloc: - case LibFunc_vec_calloc: + case LibFunc_vec_calloc: return (NumParams == 2 && FTy.getReturnType()->isPointerTy()); case LibFunc_atof: @@ -973,7 +973,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_mkdir: case LibFunc_mktime: case LibFunc_times: - case LibFunc_vec_free: + case LibFunc_vec_free: return (NumParams != 0 && FTy.getParamType(0)->isPointerTy()); case LibFunc_fopen: @@ -1243,15 +1243,15 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_ZdaPvmSt11align_val_t: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy()); - // void __atomic_load(size_t, void *, void *, int) - case LibFunc_atomic_load: - // void __atomic_store(size_t, void *, void *, int) - case LibFunc_atomic_store: - return (NumParams == 4 && FTy.getParamType(0)->isIntegerTy() && - FTy.getParamType(1)->isPointerTy() && - FTy.getParamType(2)->isPointerTy() && - FTy.getParamType(3)->isIntegerTy()); - + // void __atomic_load(size_t, void *, void *, int) + case LibFunc_atomic_load: + // void __atomic_store(size_t, void *, void *, int) + case LibFunc_atomic_store: + return (NumParams == 4 && FTy.getParamType(0)->isIntegerTy() && + FTy.getParamType(1)->isPointerTy() && + FTy.getParamType(2)->isPointerTy() && + FTy.getParamType(3)->isIntegerTy()); + case LibFunc_memset_pattern16: return (!FTy.isVarArg() && NumParams == 3 && FTy.getParamType(0)->isPointerTy() && @@ -1555,10 +1555,10 @@ static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) { } void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) { - llvm::append_range(VectorDescs, Fns); + llvm::append_range(VectorDescs, Fns); llvm::sort(VectorDescs, compareByScalarFnName); - llvm::append_range(ScalarDescs, Fns); + llvm::append_range(ScalarDescs, Fns); llvm::sort(ScalarDescs, compareByVectorFnName); } @@ -1573,14 +1573,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( addVectorizableFunctions(VecFuncs); break; } - case LIBMVEC_X86: { - const VecDesc VecFuncs[] = { - #define TLI_DEFINE_LIBMVEC_X86_VECFUNCS - #include "llvm/Analysis/VecFuncs.def" - }; - addVectorizableFunctions(VecFuncs); - break; - } + case LIBMVEC_X86: { + const VecDesc VecFuncs[] = { + #define TLI_DEFINE_LIBMVEC_X86_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" + }; + addVectorizableFunctions(VecFuncs); + break; + } case MASSV: { const VecDesc VecFuncs[] = { #define TLI_DEFINE_MASSV_VECFUNCS diff --git a/contrib/libs/llvm12/lib/Analysis/TargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Analysis/TargetTransformInfo.cpp index e498401eb8..44674433ae 100644 --- a/contrib/libs/llvm12/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/libs/llvm12/lib/Analysis/TargetTransformInfo.cpp @@ -71,7 +71,7 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI)) FMF = FPMO->getFastMathFlags(); - Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); + Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); @@ -79,10 +79,10 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, - ElementCount Factor) - : RetTy(CI.getType()), IID(Id), VF(Factor) { + ElementCount Factor) + : RetTy(CI.getType()), IID(Id), VF(Factor) { - assert(!Factor.isScalable() && "Scalable vectors are not yet supported"); + assert(!Factor.isScalable() && "Scalable vectors are not yet supported"); if (auto *FPMO = dyn_cast<FPMathOperator>(&CI)) FMF = FPMO->getFastMathFlags(); @@ -94,9 +94,9 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, - ElementCount Factor, - unsigned ScalarCost) - : RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) { + ElementCount Factor, + unsigned ScalarCost) + : RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) { if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI)) FMF = FPMO->getFastMathFlags(); @@ -217,11 +217,11 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // Note that this block may not be the loop latch block, even if the loop // has a latch block. ExitBlock = BB; - TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType())); - - if (!EC->getType()->isPointerTy() && EC->getType() != CountType) - TripCount = SE.getZeroExtendExpr(TripCount, CountType); - + TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType())); + + if (!EC->getType()->isPointerTy() && EC->getType() != CountType) + TripCount = SE.getZeroExtendExpr(TripCount, CountType); + break; } @@ -247,11 +247,11 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { return TTIImpl->getInliningThresholdMultiplier(); } -unsigned -TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const { - return TTIImpl->adjustInliningThreshold(CB); -} - +unsigned +TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const { + return TTIImpl->adjustInliningThreshold(CB); +} + int TargetTransformInfo::getInlinerVectorBonusPercent() const { return TTIImpl->getInlinerVectorBonusPercent(); } @@ -307,10 +307,10 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS, return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS); } -unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const { - return TTIImpl->getAssumedAddrSpace(V); -} - +unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const { + return TTIImpl->getAssumedAddrSpace(V); +} + Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace( IntrinsicInst *II, Value *OldV, Value *NewV) const { return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); @@ -337,29 +337,29 @@ bool TargetTransformInfo::emitGetActiveLaneMask() const { return TTIImpl->emitGetActiveLaneMask(); } -Optional<Instruction *> -TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const { - return TTIImpl->instCombineIntrinsic(IC, II); -} - -Optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic( - InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, - bool &KnownBitsComputed) const { - return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, - KnownBitsComputed); -} - -Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic( - InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, - APInt &UndefElts2, APInt &UndefElts3, - std::function<void(Instruction *, unsigned, APInt, APInt &)> - SimplifyAndSetOp) const { - return TTIImpl->simplifyDemandedVectorEltsIntrinsic( - IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, - SimplifyAndSetOp); -} - +Optional<Instruction *> +TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const { + return TTIImpl->instCombineIntrinsic(IC, II); +} + +Optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, + bool &KnownBitsComputed) const { + return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, + KnownBitsComputed); +} + +Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, + APInt &UndefElts2, APInt &UndefElts3, + std::function<void(Instruction *, unsigned, APInt, APInt &)> + SimplifyAndSetOp) const { + return TTIImpl->simplifyDemandedVectorEltsIntrinsic( + IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + SimplifyAndSetOp); +} + void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); @@ -391,10 +391,10 @@ bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { return TTIImpl->isLSRCostLess(C1, C2); } -bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const { - return TTIImpl->isNumRegsMajorCostOfLSR(); -} - +bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const { + return TTIImpl->isNumRegsMajorCostOfLSR(); +} + bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const { return TTIImpl->isProfitableLSRChainElement(I); } @@ -496,10 +496,10 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } -unsigned TargetTransformInfo::getRegUsageForType(Type *Ty) const { - return TTIImpl->getRegUsageForType(Ty); -} - +unsigned TargetTransformInfo::getRegUsageForType(Type *Ty) const { + return TTIImpl->getRegUsageForType(Ty); +} + bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } @@ -593,11 +593,11 @@ int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty, return Cost; } -int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind, - Instruction *Inst) const { - int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst); +int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) const { + int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -632,10 +632,10 @@ unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { return TTIImpl->getMinVectorRegisterBitWidth(); } -Optional<unsigned> TargetTransformInfo::getMaxVScale() const { - return TTIImpl->getMaxVScale(); -} - +Optional<unsigned> TargetTransformInfo::getMaxVScale() const { + return TTIImpl->getMaxVScale(); +} + bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const { return TTIImpl->shouldMaximizeVectorBandwidth(OptSize); } @@ -644,11 +644,11 @@ unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const { return TTIImpl->getMinimumVF(ElemWidth); } -unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth, - unsigned Opcode) const { - return TTIImpl->getMaximumVF(ElemWidth, Opcode); -} - +unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth, + unsigned Opcode) const { + return TTIImpl->getMaximumVF(ElemWidth, Opcode); +} + bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( @@ -762,57 +762,57 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty, return Cost; } -TTI::CastContextHint -TargetTransformInfo::getCastContextHint(const Instruction *I) { - if (!I) - return CastContextHint::None; - - auto getLoadStoreKind = [](const Value *V, unsigned LdStOp, unsigned MaskedOp, - unsigned GatScatOp) { - const Instruction *I = dyn_cast<Instruction>(V); - if (!I) - return CastContextHint::None; - - if (I->getOpcode() == LdStOp) - return CastContextHint::Normal; - - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - if (II->getIntrinsicID() == MaskedOp) - return TTI::CastContextHint::Masked; - if (II->getIntrinsicID() == GatScatOp) - return TTI::CastContextHint::GatherScatter; - } - - return TTI::CastContextHint::None; - }; - - switch (I->getOpcode()) { - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPExt: - return getLoadStoreKind(I->getOperand(0), Instruction::Load, - Intrinsic::masked_load, Intrinsic::masked_gather); - case Instruction::Trunc: - case Instruction::FPTrunc: - if (I->hasOneUse()) - return getLoadStoreKind(*I->user_begin(), Instruction::Store, - Intrinsic::masked_store, - Intrinsic::masked_scatter); - break; - default: - return CastContextHint::None; - } - - return TTI::CastContextHint::None; -} - +TTI::CastContextHint +TargetTransformInfo::getCastContextHint(const Instruction *I) { + if (!I) + return CastContextHint::None; + + auto getLoadStoreKind = [](const Value *V, unsigned LdStOp, unsigned MaskedOp, + unsigned GatScatOp) { + const Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return CastContextHint::None; + + if (I->getOpcode() == LdStOp) + return CastContextHint::Normal; + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == MaskedOp) + return TTI::CastContextHint::Masked; + if (II->getIntrinsicID() == GatScatOp) + return TTI::CastContextHint::GatherScatter; + } + + return TTI::CastContextHint::None; + }; + + switch (I->getOpcode()) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPExt: + return getLoadStoreKind(I->getOperand(0), Instruction::Load, + Intrinsic::masked_load, Intrinsic::masked_gather); + case Instruction::Trunc: + case Instruction::FPTrunc: + if (I->hasOneUse()) + return getLoadStoreKind(*I->user_begin(), Instruction::Store, + Intrinsic::masked_store, + Intrinsic::masked_scatter); + break; + default: + return CastContextHint::None; + } + + return TTI::CastContextHint::None; +} + int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - CastContextHint CCH, + CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -834,13 +834,13 @@ int TargetTransformInfo::getCFInstrCost(unsigned Opcode, int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = - TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + int Cost = + TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -948,13 +948,13 @@ int TargetTransformInfo::getMinMaxReductionCost( return Cost; } -InstructionCost TargetTransformInfo::getExtendedAddReductionCost( - bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, - TTI::TargetCostKind CostKind) const { - return TTIImpl->getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty, - CostKind); -} - +InstructionCost TargetTransformInfo::getExtendedAddReductionCost( + bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, + TTI::TargetCostKind CostKind) const { + return TTIImpl->getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty, + CostKind); +} + unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { return TTIImpl->getCostOfKeepingLiveOverCall(Tys); @@ -1054,16 +1054,16 @@ bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty, return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); } -bool TargetTransformInfo::preferInLoopReduction(unsigned Opcode, Type *Ty, - ReductionFlags Flags) const { - return TTIImpl->preferInLoopReduction(Opcode, Ty, Flags); -} - -bool TargetTransformInfo::preferPredicatedReductionSelect( - unsigned Opcode, Type *Ty, ReductionFlags Flags) const { - return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags); -} - +bool TargetTransformInfo::preferInLoopReduction(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const { + return TTIImpl->preferInLoopReduction(Opcode, Ty, Flags); +} + +bool TargetTransformInfo::preferPredicatedReductionSelect( + unsigned Opcode, Type *Ty, ReductionFlags Flags) const { + return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags); +} + bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } @@ -1072,10 +1072,10 @@ unsigned TargetTransformInfo::getGISelRematGlobalCost() const { return TTIImpl->getGISelRematGlobalCost(); } -bool TargetTransformInfo::supportsScalableVectors() const { - return TTIImpl->supportsScalableVectors(); -} - +bool TargetTransformInfo::supportsScalableVectors() const { + return TTIImpl->supportsScalableVectors(); +} + int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } @@ -1089,8 +1089,8 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, else if (!SI) return false; - SmallVector<int, 32> Mask( - cast<FixedVectorType>(SI->getType())->getNumElements(), -1); + SmallVector<int, 32> Mask( + cast<FixedVectorType>(SI->getType())->getNumElements(), -1); // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether // we look at the left or right side. @@ -1229,7 +1229,7 @@ TTI::ReductionKind TTI::matchPairwiseReduction( if (!RD) return TTI::RK_None; - auto *VecTy = cast<FixedVectorType>(RdxStart->getType()); + auto *VecTy = cast<FixedVectorType>(RdxStart->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) return TTI::RK_None; @@ -1294,7 +1294,7 @@ TTI::ReductionKind TTI::matchVectorSplittingReduction( if (!RD) return TTI::RK_None; - auto *VecTy = cast<FixedVectorType>(ReduxRoot->getOperand(0)->getType()); + auto *VecTy = cast<FixedVectorType>(ReduxRoot->getOperand(0)->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) return TTI::RK_None; @@ -1353,18 +1353,18 @@ TTI::ReductionKind TTI::matchVectorSplittingReduction( return RD->Kind; } -TTI::ReductionKind -TTI::matchVectorReduction(const ExtractElementInst *Root, unsigned &Opcode, - VectorType *&Ty, bool &IsPairwise) { - TTI::ReductionKind RdxKind = matchVectorSplittingReduction(Root, Opcode, Ty); - if (RdxKind != TTI::ReductionKind::RK_None) { - IsPairwise = false; - return RdxKind; - } - IsPairwise = true; - return matchPairwiseReduction(Root, Opcode, Ty); -} - +TTI::ReductionKind +TTI::matchVectorReduction(const ExtractElementInst *Root, unsigned &Opcode, + VectorType *&Ty, bool &IsPairwise) { + TTI::ReductionKind RdxKind = matchVectorSplittingReduction(Root, Opcode, Ty); + if (RdxKind != TTI::ReductionKind::RK_None) { + IsPairwise = false; + return RdxKind; + } + IsPairwise = true; + return matchPairwiseReduction(Root, Opcode, Ty); +} + int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; diff --git a/contrib/libs/llvm12/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/libs/llvm12/lib/Analysis/TypeBasedAliasAnalysis.cpp index 268acb682c..2b9a142cb1 100644 --- a/contrib/libs/llvm12/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/libs/llvm12/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -111,7 +111,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -737,84 +737,84 @@ bool TypeBasedAAWrapperPass::doFinalization(Module &M) { void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } - -MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) { - // Fast path if there's no offset - if (Offset == 0) - return MD; - // Fast path if there's no path tbaa node (and thus scalar) - if (!isStructPathTBAA(MD)) - return MD; - - TBAAStructTagNode Tag(MD); - SmallVector<Metadata *, 5> Sub; - Sub.push_back(MD->getOperand(0)); - Sub.push_back(MD->getOperand(1)); - ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(2)); - - if (Tag.isNewFormat()) { - ConstantInt *InnerSize = mdconst::extract<ConstantInt>(MD->getOperand(3)); - - if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) { - return nullptr; - } - - uint64_t NewSize = InnerSize->getZExtValue(); - uint64_t NewOffset = InnerOffset->getZExtValue() - Offset; - if (InnerOffset->getZExtValue() < Offset) { - NewOffset = 0; - NewSize -= Offset - InnerOffset->getZExtValue(); - } - - Sub.push_back(ConstantAsMetadata::get( - ConstantInt::get(InnerOffset->getType(), NewOffset))); - - Sub.push_back(ConstantAsMetadata::get( - ConstantInt::get(InnerSize->getType(), NewSize))); - - // immutable type - if (MD->getNumOperands() >= 5) - Sub.push_back(MD->getOperand(4)); - } else { - if (InnerOffset->getZExtValue() < Offset) - return nullptr; - - Sub.push_back(ConstantAsMetadata::get(ConstantInt::get( - InnerOffset->getType(), InnerOffset->getZExtValue() - Offset))); - - // immutable type - if (MD->getNumOperands() >= 4) - Sub.push_back(MD->getOperand(3)); - } - return MDNode::get(MD->getContext(), Sub); -} - -MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) { - // Fast path if there's no offset - if (Offset == 0) - return MD; - SmallVector<Metadata *, 3> Sub; - for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) { - ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i)); - ConstantInt *InnerSize = - mdconst::extract<ConstantInt>(MD->getOperand(i + 1)); - // Don't include any triples that aren't in bounds - if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) - continue; - - uint64_t NewSize = InnerSize->getZExtValue(); - uint64_t NewOffset = InnerOffset->getZExtValue() - Offset; - if (InnerOffset->getZExtValue() < Offset) { - NewOffset = 0; - NewSize -= Offset - InnerOffset->getZExtValue(); - } - - // Shift the offset of the triple - Sub.push_back(ConstantAsMetadata::get( - ConstantInt::get(InnerOffset->getType(), NewOffset))); - Sub.push_back(ConstantAsMetadata::get( - ConstantInt::get(InnerSize->getType(), NewSize))); - Sub.push_back(MD->getOperand(i + 2)); - } - return MDNode::get(MD->getContext(), Sub); -}
\ No newline at end of file + +MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) { + // Fast path if there's no offset + if (Offset == 0) + return MD; + // Fast path if there's no path tbaa node (and thus scalar) + if (!isStructPathTBAA(MD)) + return MD; + + TBAAStructTagNode Tag(MD); + SmallVector<Metadata *, 5> Sub; + Sub.push_back(MD->getOperand(0)); + Sub.push_back(MD->getOperand(1)); + ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(2)); + + if (Tag.isNewFormat()) { + ConstantInt *InnerSize = mdconst::extract<ConstantInt>(MD->getOperand(3)); + + if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) { + return nullptr; + } + + uint64_t NewSize = InnerSize->getZExtValue(); + uint64_t NewOffset = InnerOffset->getZExtValue() - Offset; + if (InnerOffset->getZExtValue() < Offset) { + NewOffset = 0; + NewSize -= Offset - InnerOffset->getZExtValue(); + } + + Sub.push_back(ConstantAsMetadata::get( + ConstantInt::get(InnerOffset->getType(), NewOffset))); + + Sub.push_back(ConstantAsMetadata::get( + ConstantInt::get(InnerSize->getType(), NewSize))); + + // immutable type + if (MD->getNumOperands() >= 5) + Sub.push_back(MD->getOperand(4)); + } else { + if (InnerOffset->getZExtValue() < Offset) + return nullptr; + + Sub.push_back(ConstantAsMetadata::get(ConstantInt::get( + InnerOffset->getType(), InnerOffset->getZExtValue() - Offset))); + + // immutable type + if (MD->getNumOperands() >= 4) + Sub.push_back(MD->getOperand(3)); + } + return MDNode::get(MD->getContext(), Sub); +} + +MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) { + // Fast path if there's no offset + if (Offset == 0) + return MD; + SmallVector<Metadata *, 3> Sub; + for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) { + ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i)); + ConstantInt *InnerSize = + mdconst::extract<ConstantInt>(MD->getOperand(i + 1)); + // Don't include any triples that aren't in bounds + if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) + continue; + + uint64_t NewSize = InnerSize->getZExtValue(); + uint64_t NewOffset = InnerOffset->getZExtValue() - Offset; + if (InnerOffset->getZExtValue() < Offset) { + NewOffset = 0; + NewSize -= Offset - InnerOffset->getZExtValue(); + } + + // Shift the offset of the triple + Sub.push_back(ConstantAsMetadata::get( + ConstantInt::get(InnerOffset->getType(), NewOffset))); + Sub.push_back(ConstantAsMetadata::get( + ConstantInt::get(InnerSize->getType(), NewSize))); + Sub.push_back(MD->getOperand(i + 2)); + } + return MDNode::get(MD->getContext(), Sub); +}
\ No newline at end of file diff --git a/contrib/libs/llvm12/lib/Analysis/VFABIDemangling.cpp b/contrib/libs/llvm12/lib/Analysis/VFABIDemangling.cpp index faa46537ad..6bda7fe220 100644 --- a/contrib/libs/llvm12/lib/Analysis/VFABIDemangling.cpp +++ b/contrib/libs/llvm12/lib/Analysis/VFABIDemangling.cpp @@ -290,9 +290,9 @@ bool verifyAllVectorsHaveSameWidth(FunctionType *Signature) { assert(VecTys.size() > 1 && "Invalid number of elements."); const ElementCount EC = VecTys[0]->getElementCount(); - return llvm::all_of(llvm::drop_begin(VecTys), [&EC](VectorType *VTy) { - return (EC == VTy->getElementCount()); - }); + return llvm::all_of(llvm::drop_begin(VecTys), [&EC](VectorType *VTy) { + return (EC == VTy->getElementCount()); + }); } #endif // NDEBUG @@ -310,7 +310,7 @@ ElementCount getECFromSignature(FunctionType *Signature) { if (auto *VTy = dyn_cast<VectorType>(Ty)) return VTy->getElementCount(); - return ElementCount::getFixed(/*Min=*/1); + return ElementCount::getFixed(/*Min=*/1); } } // namespace @@ -442,7 +442,7 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName, if (!F) return None; const ElementCount EC = getECFromSignature(F->getFunctionType()); - VF = EC.getKnownMinValue(); + VF = EC.getKnownMinValue(); } // Sanity checks. diff --git a/contrib/libs/llvm12/lib/Analysis/ValueTracking.cpp b/contrib/libs/llvm12/lib/Analysis/ValueTracking.cpp index 75486d3c80..edcedc35d3 100644 --- a/contrib/libs/llvm12/lib/Analysis/ValueTracking.cpp +++ b/contrib/libs/llvm12/lib/Analysis/ValueTracking.cpp @@ -115,7 +115,7 @@ struct Query { /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo /// (all of which can call computeKnownBits), and so on. - std::array<const Value *, MaxAnalysisRecursionDepth> Excluded; + std::array<const Value *, MaxAnalysisRecursionDepth> Excluded; /// If true, it is safe to use metadata during simplification. InstrInfoQuery IIQ; @@ -170,8 +170,8 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, return false; int NumElts = - cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements(); - int NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements(); + cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements(); + int NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements(); DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts); if (DemandedElts.isNullValue()) return true; @@ -350,14 +350,14 @@ bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, return Known.isNegative(); } -static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, - const Query &Q); +static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, + const Query &Q); bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::isKnownNonEqual(V1, V2, 0, + return ::isKnownNonEqual(V1, V2, 0, Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT, UseInstrInfo, /*ORE=*/nullptr)); } @@ -433,18 +433,18 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, bool isKnownNegativeOp0 = Known2.isNegative(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || - (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); + (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); // The product of a negative number and a non-negative number is either // negative or zero. if (!isKnownNonNegative) - isKnownNegative = - (isKnownNegativeOp1 && isKnownNonNegativeOp0 && - Known2.isNonZero()) || - (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero()); + isKnownNegative = + (isKnownNegativeOp1 && isKnownNonNegativeOp0 && + Known2.isNonZero()) || + (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero()); } } - Known = KnownBits::computeForMul(Known, Known2); + Known = KnownBits::computeForMul(Known, Known2); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in @@ -477,9 +477,9 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, unsigned CommonPrefixBits = (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); - APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth); - Known.One &= UnsignedMax & Mask; - Known.Zero &= ~UnsignedMax & Mask; + APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth); + Known.One &= UnsignedMax & Mask; + Known.Zero &= ~UnsignedMax & Mask; } } @@ -509,7 +509,7 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) { if (V == I || isSafeToSpeculativelyExecute(V)) { EphValues.insert(V); if (const User *U = dyn_cast<User>(V)) - append_range(WorkSet, U->operands()); + append_range(WorkSet, U->operands()); } } } @@ -526,7 +526,7 @@ bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { // FIXME: This list is repeated from NoTTI::getIntrinsicCost. case Intrinsic::assume: case Intrinsic::sideeffect: - case Intrinsic::pseudoprobe: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: @@ -534,7 +534,7 @@ bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: @@ -589,24 +589,24 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return false; } -static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { - // v u> y implies v != 0. - if (Pred == ICmpInst::ICMP_UGT) - return true; - - // Special-case v != 0 to also handle v != null. - if (Pred == ICmpInst::ICMP_NE) - return match(RHS, m_Zero()); - - // All other predicates - rely on generic ConstantRange handling. - const APInt *C; - if (!match(RHS, m_APInt(C))) - return false; - - ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); - return !TrueValues.contains(APInt::getNullValue(C->getBitWidth())); -} - +static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { + // v u> y implies v != 0. + if (Pred == ICmpInst::ICMP_UGT) + return true; + + // Special-case v != 0 to also handle v != null. + if (Pred == ICmpInst::ICMP_NE) + return match(RHS, m_Zero()); + + // All other predicates - rely on generic ConstantRange handling. + const APInt *C; + if (!match(RHS, m_APInt(C))) + return false; + + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); + return !TrueValues.contains(APInt::getNullValue(C->getBitWidth())); +} + static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! @@ -639,13 +639,13 @@ static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) { assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && "must be an assume intrinsic"); - Value *RHS; - CmpInst::Predicate Pred; - auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); - if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS)))) - return false; + Value *RHS; + CmpInst::Predicate Pred; + auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); + if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS)))) + return false; - if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) + if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) return true; } @@ -661,14 +661,14 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, unsigned BitWidth = Known.getBitWidth(); - // Refine Known set if the pointer alignment is set by assume bundles. - if (V->getType()->isPointerTy()) { - if (RetainedKnowledge RK = getKnowledgeValidInContext( - V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) { - Known.Zero.setLowBits(Log2_32(RK.ArgValue)); - } - } - + // Refine Known set if the pointer alignment is set by assume bundles. + if (V->getType()->isPointerTy()) { + if (RetainedKnowledge RK = getKnowledgeValidInContext( + V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) { + Known.Zero.setLowBits(Log2_32(RK.ArgValue)); + } + } + // Note that the patterns below need to be kept in sync with the code // in AssumptionCache::updateAffectedValues. @@ -703,7 +703,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth == MaxAnalysisRecursionDepth) + if (Depth == MaxAnalysisRecursionDepth) continue; ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); @@ -969,31 +969,31 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, /// Compute known bits from a shift operator, including those with a /// non-constant shift amount. Known is the output of this function. Known2 is a -/// pre-allocated temporary with the same bit width as Known and on return -/// contains the known bit of the shift value source. KF is an -/// operator-specific function that, given the known-bits and a shift amount, -/// compute the implied known-bits of the shift operator's result respectively -/// for that shift amount. The results from calling KF are conservatively -/// combined for all permitted shift amounts. +/// pre-allocated temporary with the same bit width as Known and on return +/// contains the known bit of the shift value source. KF is an +/// operator-specific function that, given the known-bits and a shift amount, +/// compute the implied known-bits of the shift operator's result respectively +/// for that shift amount. The results from calling KF are conservatively +/// combined for all permitted shift amounts. static void computeKnownBitsFromShiftOperator( const Operator *I, const APInt &DemandedElts, KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q, - function_ref<KnownBits(const KnownBits &, const KnownBits &)> KF) { + function_ref<KnownBits(const KnownBits &, const KnownBits &)> KF) { unsigned BitWidth = Known.getBitWidth(); - computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); - // Note: We cannot use Known.Zero.getLimitedValue() here, because if - // BitWidth > 64 and any upper bits are known, we'll end up returning the - // limit value (which implies all bits are known). - uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue(); - uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue(); - bool ShiftAmtIsConstant = Known.isConstant(); - bool MaxShiftAmtIsOutOfRange = Known.getMaxValue().uge(BitWidth); - - if (ShiftAmtIsConstant) { - Known = KF(Known2, Known); - + // Note: We cannot use Known.Zero.getLimitedValue() here, because if + // BitWidth > 64 and any upper bits are known, we'll end up returning the + // limit value (which implies all bits are known). + uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue(); + bool ShiftAmtIsConstant = Known.isConstant(); + bool MaxShiftAmtIsOutOfRange = Known.getMaxValue().uge(BitWidth); + + if (ShiftAmtIsConstant) { + Known = KF(Known2, Known); + // If the known bits conflict, this must be an overflowing left shift, so // the shift result is poison. We can return anything we want. Choose 0 for // the best folding opportunity. @@ -1007,7 +1007,7 @@ static void computeKnownBitsFromShiftOperator( // LHS, the value could be poison, but bail out because the check below is // expensive. // TODO: Should we just carry on? - if (MaxShiftAmtIsOutOfRange) { + if (MaxShiftAmtIsOutOfRange) { Known.resetAll(); return; } @@ -1050,8 +1050,8 @@ static void computeKnownBitsFromShiftOperator( continue; } - Known = KnownBits::commonBits( - Known, KF(Known2, KnownBits::makeConstant(APInt(32, ShiftAmt)))); + Known = KnownBits::commonBits( + Known, KF(Known2, KnownBits::makeConstant(APInt(32, ShiftAmt)))); } // If the known bits conflict, the result is poison. Return a 0 and hope the @@ -1115,9 +1115,9 @@ static void computeKnownBitsFromOperator(const Operator *I, break; } case Instruction::UDiv: { - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - Known = KnownBits::udiv(Known, Known2); + Known = KnownBits::udiv(Known, Known2); break; } case Instruction::Select: { @@ -1126,38 +1126,38 @@ static void computeKnownBitsFromOperator(const Operator *I, if (SelectPatternResult::isMinOrMax(SPF)) { computeKnownBits(RHS, Known, Depth + 1, Q); computeKnownBits(LHS, Known2, Depth + 1, Q); - switch (SPF) { - default: - llvm_unreachable("Unhandled select pattern flavor!"); - case SPF_SMAX: - Known = KnownBits::smax(Known, Known2); - break; - case SPF_SMIN: - Known = KnownBits::smin(Known, Known2); - break; - case SPF_UMAX: - Known = KnownBits::umax(Known, Known2); - break; - case SPF_UMIN: - Known = KnownBits::umin(Known, Known2); - break; - } - break; + switch (SPF) { + default: + llvm_unreachable("Unhandled select pattern flavor!"); + case SPF_SMAX: + Known = KnownBits::smax(Known, Known2); + break; + case SPF_SMIN: + Known = KnownBits::smin(Known, Known2); + break; + case SPF_UMAX: + Known = KnownBits::umax(Known, Known2); + break; + case SPF_UMIN: + Known = KnownBits::umin(Known, Known2); + break; + } + break; } - computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - - // Only known if known in both the LHS and RHS. - Known = KnownBits::commonBits(Known, Known2); - - if (SPF == SPF_ABS) { + computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + + // Only known if known in both the LHS and RHS. + Known = KnownBits::commonBits(Known, Known2); + + if (SPF == SPF_ABS) { // RHS from matchSelectPattern returns the negation part of abs pattern. // If the negate has an NSW flag we can assume the sign bit of the result // will be 0 because that makes abs(INT_MIN) undefined. if (match(RHS, m_Neg(m_Specific(LHS))) && Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) - Known.Zero.setSignBit(); + Known.Zero.setSignBit(); } break; @@ -1215,36 +1215,36 @@ static void computeKnownBitsFromOperator(const Operator *I, } case Instruction::Shl: { bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); - auto KF = [NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt) { - KnownBits Result = KnownBits::shl(KnownVal, KnownAmt); + auto KF = [NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt) { + KnownBits Result = KnownBits::shl(KnownVal, KnownAmt); // If this shift has "nsw" keyword, then the result is either a poison // value or has the same sign bit as the first operand. - if (NSW) { - if (KnownVal.Zero.isSignBitSet()) - Result.Zero.setSignBit(); - if (KnownVal.One.isSignBitSet()) - Result.One.setSignBit(); - } - return Result; + if (NSW) { + if (KnownVal.Zero.isSignBitSet()) + Result.Zero.setSignBit(); + if (KnownVal.One.isSignBitSet()) + Result.One.setSignBit(); + } + return Result; }; computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, - KF); + KF); break; } case Instruction::LShr: { - auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) { - return KnownBits::lshr(KnownVal, KnownAmt); + auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) { + return KnownBits::lshr(KnownVal, KnownAmt); }; computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, - KF); + KF); break; } case Instruction::AShr: { - auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) { - return KnownBits::ashr(KnownVal, KnownAmt); + auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) { + return KnownBits::ashr(KnownVal, KnownAmt); }; computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, - KF); + KF); break; } case Instruction::Sub: { @@ -1260,15 +1260,15 @@ static void computeKnownBitsFromOperator(const Operator *I, break; } case Instruction::SRem: - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - Known = KnownBits::srem(Known, Known2); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::srem(Known, Known2); break; - case Instruction::URem: + case Instruction::URem: computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - Known = KnownBits::urem(Known, Known2); + Known = KnownBits::urem(Known, Known2); break; case Instruction::Alloca: Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); @@ -1276,29 +1276,29 @@ static void computeKnownBitsFromOperator(const Operator *I, case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - // Accumulate the constant indices in a separate variable - // to minimize the number of calls to computeForAddSub. - APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + // Accumulate the constant indices in a separate variable + // to minimize the number of calls to computeForAddSub. + APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { // TrailZ can only become smaller, short-circuit if we hit zero. - if (Known.isUnknown()) + if (Known.isUnknown()) break; Value *Index = I->getOperand(i); - - // Handle case when index is zero. - Constant *CIndex = dyn_cast<Constant>(Index); - if (CIndex && CIndex->isZeroValue()) - continue; - + + // Handle case when index is zero. + Constant *CIndex = dyn_cast<Constant>(Index); + if (CIndex && CIndex->isZeroValue()) + continue; + if (StructType *STy = GTI.getStructTypeOrNull()) { // Handle struct member offset arithmetic. - assert(CIndex && - "Access to structure field must be known at compile time"); + assert(CIndex && + "Access to structure field must be known at compile time"); if (CIndex->getType()->isVectorTy()) Index = CIndex->getSplatValue(); @@ -1306,56 +1306,56 @@ static void computeKnownBitsFromOperator(const Operator *I, unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); const StructLayout *SL = Q.DL.getStructLayout(STy); uint64_t Offset = SL->getElementOffset(Idx); - AccConstIndices += Offset; - continue; - } - - // Handle array index arithmetic. - Type *IndexedTy = GTI.getIndexedType(); - if (!IndexedTy->isSized()) { - Known.resetAll(); - break; - } - - unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); - KnownBits IndexBits(IndexBitWidth); - computeKnownBits(Index, IndexBits, Depth + 1, Q); - TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy); - uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize(); - KnownBits ScalingFactor(IndexBitWidth); - // Multiply by current sizeof type. - // &A[i] == A + i * sizeof(*A[i]). - if (IndexTypeSize.isScalable()) { - // For scalable types the only thing we know about sizeof is - // that this is a multiple of the minimum size. - ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes)); - } else if (IndexBits.isConstant()) { - APInt IndexConst = IndexBits.getConstant(); - APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); - IndexConst *= ScalingFactor; - AccConstIndices += IndexConst.sextOrTrunc(BitWidth); - continue; + AccConstIndices += Offset; + continue; + } + + // Handle array index arithmetic. + Type *IndexedTy = GTI.getIndexedType(); + if (!IndexedTy->isSized()) { + Known.resetAll(); + break; + } + + unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); + KnownBits IndexBits(IndexBitWidth); + computeKnownBits(Index, IndexBits, Depth + 1, Q); + TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy); + uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize(); + KnownBits ScalingFactor(IndexBitWidth); + // Multiply by current sizeof type. + // &A[i] == A + i * sizeof(*A[i]). + if (IndexTypeSize.isScalable()) { + // For scalable types the only thing we know about sizeof is + // that this is a multiple of the minimum size. + ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes)); + } else if (IndexBits.isConstant()) { + APInt IndexConst = IndexBits.getConstant(); + APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); + IndexConst *= ScalingFactor; + AccConstIndices += IndexConst.sextOrTrunc(BitWidth); + continue; } else { - ScalingFactor = - KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); + ScalingFactor = + KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); } - IndexBits = KnownBits::computeForMul(IndexBits, ScalingFactor); - - // If the offsets have a different width from the pointer, according - // to the language reference we need to sign-extend or truncate them - // to the width of the pointer. - IndexBits = IndexBits.sextOrTrunc(BitWidth); - - // Note that inbounds does *not* guarantee nsw for the addition, as only - // the offset is signed, while the base address is unsigned. - Known = KnownBits::computeForAddSub( - /*Add=*/true, /*NSW=*/false, Known, IndexBits); - } - if (!Known.isUnknown() && !AccConstIndices.isNullValue()) { - KnownBits Index = KnownBits::makeConstant(AccConstIndices); - Known = KnownBits::computeForAddSub( - /*Add=*/true, /*NSW=*/false, Known, Index); + IndexBits = KnownBits::computeForMul(IndexBits, ScalingFactor); + + // If the offsets have a different width from the pointer, according + // to the language reference we need to sign-extend or truncate them + // to the width of the pointer. + IndexBits = IndexBits.sextOrTrunc(BitWidth); + + // Note that inbounds does *not* guarantee nsw for the addition, as only + // the offset is signed, while the base address is unsigned. + Known = KnownBits::computeForAddSub( + /*Add=*/true, /*NSW=*/false, Known, IndexBits); } + if (!Known.isUnknown() && !AccConstIndices.isNullValue()) { + KnownBits Index = KnownBits::makeConstant(AccConstIndices); + Known = KnownBits::computeForAddSub( + /*Add=*/true, /*NSW=*/false, Known, Index); + } break; } case Instruction::PHI: { @@ -1454,7 +1454,7 @@ static void computeKnownBitsFromOperator(const Operator *I, // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. - if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) { + if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) { // Skip if every incoming value references to ourself. if (dyn_cast_or_null<UndefValue>(P->hasConstantValue())) break; @@ -1476,11 +1476,11 @@ static void computeKnownBitsFromOperator(const Operator *I, Known2 = KnownBits(BitWidth); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); - Known = KnownBits::commonBits(Known, Known2); + computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); + Known = KnownBits::commonBits(Known, Known2); // If all bits have been ruled out, there's no need to check // more operands. - if (Known.isUnknown()) + if (Known.isUnknown()) break; } } @@ -1502,12 +1502,12 @@ static void computeKnownBitsFromOperator(const Operator *I, if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::abs: { - computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); - bool IntMinIsPoison = match(II->getArgOperand(1), m_One()); - Known = Known2.abs(IntMinIsPoison); - break; - } + case Intrinsic::abs: { + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + bool IntMinIsPoison = match(II->getArgOperand(1), m_One()); + Known = Known2.abs(IntMinIsPoison); + break; + } case Intrinsic::bitreverse: computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); Known.Zero |= Known2.Zero.reverseBits(); @@ -1521,7 +1521,7 @@ static void computeKnownBitsFromOperator(const Operator *I, case Intrinsic::ctlz: { computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If we have a known 1, its position is our upper bound. - unsigned PossibleLZ = Known2.countMaxLeadingZeros(); + unsigned PossibleLZ = Known2.countMaxLeadingZeros(); // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) PossibleLZ = std::min(PossibleLZ, BitWidth - 1); @@ -1532,7 +1532,7 @@ static void computeKnownBitsFromOperator(const Operator *I, case Intrinsic::cttz: { computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If we have a known 1, its position is our upper bound. - unsigned PossibleTZ = Known2.countMaxTrailingZeros(); + unsigned PossibleTZ = Known2.countMaxTrailingZeros(); // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) PossibleTZ = std::min(PossibleTZ, BitWidth - 1); @@ -1603,26 +1603,26 @@ static void computeKnownBitsFromOperator(const Operator *I, } break; } - case Intrinsic::umin: - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - Known = KnownBits::umin(Known, Known2); - break; - case Intrinsic::umax: - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - Known = KnownBits::umax(Known, Known2); - break; - case Intrinsic::smin: - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - Known = KnownBits::smin(Known, Known2); - break; - case Intrinsic::smax: - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - Known = KnownBits::smax(Known, Known2); - break; + case Intrinsic::umin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::umin(Known, Known2); + break; + case Intrinsic::umax: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::umax(Known, Known2); + break; + case Intrinsic::smin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::smin(Known, Known2); + break; + case Intrinsic::smax: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::smax(Known, Known2); + break; case Intrinsic::x86_sse42_crc32_64_64: Known.Zero.setBitsFrom(32); break; @@ -1655,7 +1655,7 @@ static void computeKnownBitsFromOperator(const Operator *I, if (!!DemandedRHS) { const Value *RHS = Shuf->getOperand(1); computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); - Known = KnownBits::commonBits(Known, Known2); + Known = KnownBits::commonBits(Known, Known2); } break; } @@ -1684,7 +1684,7 @@ static void computeKnownBitsFromOperator(const Operator *I, DemandedVecElts.clearBit(EltIdx); if (!!DemandedVecElts) { computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); - Known = KnownBits::commonBits(Known, Known2); + Known = KnownBits::commonBits(Known, Known2); } break; } @@ -1734,11 +1734,11 @@ static void computeKnownBitsFromOperator(const Operator *I, } } break; - case Instruction::Freeze: - if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, - Depth + 1)) - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - break; + case Instruction::Freeze: + if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, + Depth + 1)) + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + break; } } @@ -1784,7 +1784,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, } assert(V && "No Value?"); - assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); + assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); #ifndef NDEBUG Type *Ty = V->getType(); @@ -1815,7 +1815,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, const APInt *C; if (match(V, m_APInt(C))) { // We know all of the bits for a scalar constant or a splat vector constant! - Known = KnownBits::makeConstant(*C); + Known = KnownBits::makeConstant(*C); return; } // Null and aggregate-zero are all-zeros. @@ -1871,7 +1871,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, assert(!isa<ConstantData>(V) && "Unhandled constant data!"); // All recursive calls that increase depth must come after this. - if (Depth == MaxAnalysisRecursionDepth) + if (Depth == MaxAnalysisRecursionDepth) return; // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has @@ -1888,7 +1888,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, // Aligned pointers have trailing zeros - refine Known.Zero set if (isa<PointerType>(V->getType())) { Align Alignment = V->getPointerAlignment(Q.DL); - Known.Zero.setLowBits(Log2(Alignment)); + Known.Zero.setLowBits(Log2(Alignment)); } // computeKnownBitsFromAssume strictly refines Known. @@ -1906,7 +1906,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, /// types and vectors of integers. bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const Query &Q) { - assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); + assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); // Attempt to match against constants. if (OrZero && match(V, m_Power2OrZero())) @@ -1925,7 +1925,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, return true; // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ == MaxAnalysisRecursionDepth) + if (Depth++ == MaxAnalysisRecursionDepth) return false; Value *X = nullptr, *Y = nullptr; @@ -2054,7 +2054,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, // to recurse 10k times just because we have 10k GEP operands. We don't // bail completely out because we want to handle constant GEPs regardless // of depth. - if (Depth++ >= MaxAnalysisRecursionDepth) + if (Depth++ >= MaxAnalysisRecursionDepth) continue; if (isKnownNonZero(GTI.getOperand(), Depth, Q)) @@ -2086,8 +2086,8 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, if (auto *CalledFunc = CB->getCalledFunction()) for (const Argument &Arg : CalledFunc->args()) if (CB->getArgOperand(Arg.getArgNo()) == V && - Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) && - DT->dominates(CB, CtxI)) + Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) && + DT->dominates(CB, CtxI)) return true; // If the value is used as a load/store, then the pointer must be non null. @@ -2100,19 +2100,19 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, } // Consider only compare instructions uniquely controlling a branch - Value *RHS; + Value *RHS; CmpInst::Predicate Pred; - if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS)))) - continue; - - bool NonNullIfTrue; - if (cmpExcludesZero(Pred, RHS)) - NonNullIfTrue = true; - else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS)) - NonNullIfTrue = false; - else + if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS)))) continue; + bool NonNullIfTrue; + if (cmpExcludesZero(Pred, RHS)) + NonNullIfTrue = true; + else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS)) + NonNullIfTrue = false; + else + continue; + SmallVector<const User *, 4> WorkList; SmallPtrSet<const User *, 4> Visited; for (auto *CmpU : U->users()) { @@ -2127,23 +2127,23 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, // propagate "pred != null" condition through AND because it is only // correct to assume that all conditions of AND are met in true branch. // TODO: Support similar logic of OR and EQ predicate? - if (NonNullIfTrue) - if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { - for (auto *CurrU : Curr->users()) - if (Visited.insert(CurrU).second) - WorkList.push_back(CurrU); - continue; - } + if (NonNullIfTrue) + if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { + for (auto *CurrU : Curr->users()) + if (Visited.insert(CurrU).second) + WorkList.push_back(CurrU); + continue; + } if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) { assert(BI->isConditional() && "uses a comparison!"); BasicBlock *NonNullSuccessor = - BI->getSuccessor(NonNullIfTrue ? 0 : 1); + BI->getSuccessor(NonNullIfTrue ? 0 : 1); BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) return true; - } else if (NonNullIfTrue && isGuard(Curr) && + } else if (NonNullIfTrue && isGuard(Curr) && DT->dominates(cast<Instruction>(Curr), CtxI)) { return true; } @@ -2196,9 +2196,9 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, // See the comment for IntToPtr/PtrToInt instructions below. if (CE->getOpcode() == Instruction::IntToPtr || CE->getOpcode() == Instruction::PtrToInt) - if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) - .getFixedSize() <= - Q.DL.getTypeSizeInBits(CE->getType()).getFixedSize()) + if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) + .getFixedSize() <= + Q.DL.getTypeSizeInBits(CE->getType()).getFixedSize()) return isKnownNonZero(CE->getOperand(0), Depth, Q); } @@ -2244,24 +2244,24 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, return true; // Some of the tests below are recursive, so bail out if we hit the limit. - if (Depth++ >= MaxAnalysisRecursionDepth) + if (Depth++ >= MaxAnalysisRecursionDepth) return false; // Check for pointer simplifications. - - if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { + + if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { // Alloca never returns null, malloc might. if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0) return true; - // A byval, inalloca may not be null in a non-default addres space. A - // nonnull argument is assumed never 0. - if (const Argument *A = dyn_cast<Argument>(V)) { - if (((A->hasPassPointeeByValueCopyAttr() && - !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || - A->hasNonNullAttr())) + // A byval, inalloca may not be null in a non-default addres space. A + // nonnull argument is assumed never 0. + if (const Argument *A = dyn_cast<Argument>(V)) { + if (((A->hasPassPointeeByValueCopyAttr() && + !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || + A->hasNonNullAttr())) return true; - } + } // A Load tagged with nonnull metadata is never null. if (const LoadInst *LI = dyn_cast<LoadInst>(V)) @@ -2289,22 +2289,22 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well // as casts that can alter the value, e.g., AddrSpaceCasts. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - return isGEPKnownNonNull(GEP, Depth, Q); + return isGEPKnownNonNull(GEP, Depth, Q); if (auto *BCO = dyn_cast<BitCastOperator>(V)) return isKnownNonZero(BCO->getOperand(0), Depth, Q); if (auto *I2P = dyn_cast<IntToPtrInst>(V)) - if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()).getFixedSize() <= - Q.DL.getTypeSizeInBits(I2P->getDestTy()).getFixedSize()) + if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()).getFixedSize() <= + Q.DL.getTypeSizeInBits(I2P->getDestTy()).getFixedSize()) return isKnownNonZero(I2P->getOperand(0), Depth, Q); } // Similar to int2ptr above, we can look through ptr2int here if the cast // is a no-op or an extend and not a truncate. if (auto *P2I = dyn_cast<PtrToIntInst>(V)) - if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()).getFixedSize() <= - Q.DL.getTypeSizeInBits(P2I->getDestTy()).getFixedSize()) + if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()).getFixedSize() <= + Q.DL.getTypeSizeInBits(P2I->getDestTy()).getFixedSize()) return isKnownNonZero(P2I->getOperand(0), Depth, Q); unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL); @@ -2431,14 +2431,14 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, } } } - // Check if all incoming values are non-zero using recursion. - Query RecQ = Q; - unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); - return llvm::all_of(PN->operands(), [&](const Use &U) { - if (U.get() == PN) - return true; - RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); - return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); + // Check if all incoming values are non-zero using recursion. + Query RecQ = Q; + unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); + return llvm::all_of(PN->operands(), [&](const Use &U) { + if (U.get() == PN) + return true; + RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); + return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); }); } // ExtractElement @@ -2446,21 +2446,21 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, const Value *Vec = EEI->getVectorOperand(); const Value *Idx = EEI->getIndexOperand(); auto *CIdx = dyn_cast<ConstantInt>(Idx); - if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { - unsigned NumElts = VecTy->getNumElements(); - APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); - if (CIdx && CIdx->getValue().ult(NumElts)) - DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); - return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); - } - } - // Freeze - else if (const FreezeInst *FI = dyn_cast<FreezeInst>(V)) { - auto *Op = FI->getOperand(0); - if (isKnownNonZero(Op, Depth, Q) && - isGuaranteedNotToBePoison(Op, Q.AC, Q.CxtI, Q.DT, Depth)) - return true; - } + if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { + unsigned NumElts = VecTy->getNumElements(); + APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); + if (CIdx && CIdx->getValue().ult(NumElts)) + DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); + return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); + } + } + // Freeze + else if (const FreezeInst *FI = dyn_cast<FreezeInst>(V)) { + auto *Op = FI->getOperand(0); + if (isKnownNonZero(Op, Depth, Q) && + isGuaranteedNotToBePoison(Op, Q.AC, Q.CxtI, Q.DT, Depth)) + return true; + } KnownBits Known(BitWidth); computeKnownBits(V, DemandedElts, Known, Depth, Q); @@ -2480,8 +2480,8 @@ bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) { } /// Return true if V2 == V1 + X, where X is known non-zero. -static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth, - const Query &Q) { +static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth, + const Query &Q) { const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); if (!BO || BO->getOpcode() != Instruction::Add) return false; @@ -2492,75 +2492,75 @@ static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth, Op = BO->getOperand(0); else return false; - return isKnownNonZero(Op, Depth + 1, Q); + return isKnownNonZero(Op, Depth + 1, Q); } - + /// Return true if it is known that V1 != V2. -static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, - const Query &Q) { +static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, + const Query &Q) { if (V1 == V2) return false; if (V1->getType() != V2->getType()) // We can't look through casts yet. return false; - - if (Depth >= MaxAnalysisRecursionDepth) - return false; - - // See if we can recurse through (exactly one of) our operands. This - // requires our operation be 1-to-1 and map every input value to exactly - // one output value. Such an operation is invertible. - auto *O1 = dyn_cast<Operator>(V1); - auto *O2 = dyn_cast<Operator>(V2); - if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) { - switch (O1->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::Sub: - // Assume operand order has been canonicalized - if (O1->getOperand(0) == O2->getOperand(0)) - return isKnownNonEqual(O1->getOperand(1), O2->getOperand(1), - Depth + 1, Q); - if (O1->getOperand(1) == O2->getOperand(1)) - return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0), - Depth + 1, Q); - break; - case Instruction::Mul: { - // invertible if A * B == (A * B) mod 2^N where A, and B are integers - // and N is the bitwdith. The nsw case is non-obvious, but proven by - // alive2: https://alive2.llvm.org/ce/z/Z6D5qK - auto *OBO1 = cast<OverflowingBinaryOperator>(O1); - auto *OBO2 = cast<OverflowingBinaryOperator>(O2); - if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && - (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) - break; - - // Assume operand order has been canonicalized - if (O1->getOperand(1) == O2->getOperand(1) && - isa<ConstantInt>(O1->getOperand(1)) && - !cast<ConstantInt>(O1->getOperand(1))->isZero()) - return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0), - Depth + 1, Q); - break; - } - case Instruction::SExt: - case Instruction::ZExt: - if (O1->getOperand(0)->getType() == O2->getOperand(0)->getType()) - return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0), - Depth + 1, Q); - break; - }; - } - - if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q)) + + if (Depth >= MaxAnalysisRecursionDepth) + return false; + + // See if we can recurse through (exactly one of) our operands. This + // requires our operation be 1-to-1 and map every input value to exactly + // one output value. Such an operation is invertible. + auto *O1 = dyn_cast<Operator>(V1); + auto *O2 = dyn_cast<Operator>(V2); + if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) { + switch (O1->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + // Assume operand order has been canonicalized + if (O1->getOperand(0) == O2->getOperand(0)) + return isKnownNonEqual(O1->getOperand(1), O2->getOperand(1), + Depth + 1, Q); + if (O1->getOperand(1) == O2->getOperand(1)) + return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0), + Depth + 1, Q); + break; + case Instruction::Mul: { + // invertible if A * B == (A * B) mod 2^N where A, and B are integers + // and N is the bitwdith. The nsw case is non-obvious, but proven by + // alive2: https://alive2.llvm.org/ce/z/Z6D5qK + auto *OBO1 = cast<OverflowingBinaryOperator>(O1); + auto *OBO2 = cast<OverflowingBinaryOperator>(O2); + if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && + (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) + break; + + // Assume operand order has been canonicalized + if (O1->getOperand(1) == O2->getOperand(1) && + isa<ConstantInt>(O1->getOperand(1)) && + !cast<ConstantInt>(O1->getOperand(1))->isZero()) + return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0), + Depth + 1, Q); + break; + } + case Instruction::SExt: + case Instruction::ZExt: + if (O1->getOperand(0)->getType() == O2->getOperand(0)->getType()) + return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0), + Depth + 1, Q); + break; + }; + } + + if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q)) return true; if (V1->getType()->isIntOrIntVectorTy()) { // Are any known bits in V1 contradictory to known bits in V2? If V1 // has a known zero where V2 has a known one, they must not be equal. - KnownBits Known1 = computeKnownBits(V1, Depth, Q); - KnownBits Known2 = computeKnownBits(V2, Depth, Q); + KnownBits Known1 = computeKnownBits(V1, Depth, Q); + KnownBits Known2 = computeKnownBits(V2, Depth, Q); if (Known1.Zero.intersects(Known2.One) || Known2.Zero.intersects(Known1.One)) @@ -2672,7 +2672,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, return 1; #ifndef NDEBUG - assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); + assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { assert( @@ -2699,8 +2699,8 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, // Note that ConstantInt is handled by the general computeKnownBits case // below. - if (Depth == MaxAnalysisRecursionDepth) - return 1; + if (Depth == MaxAnalysisRecursionDepth) + return 1; if (auto *U = dyn_cast<Operator>(V)) { switch (Operator::getOpcode(V)) { @@ -2892,13 +2892,13 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. - Query RecQ = Q; - Tmp = TyBits; - for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) { + Query RecQ = Q; + Tmp = TyBits; + for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) { if (Tmp == 1) return Tmp; - RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator(); + RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator(); Tmp = std::min( - Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ)); + Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ)); } return Tmp; } @@ -2946,23 +2946,23 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, // fall-back. if (Tmp == 1) break; - assert(Tmp <= TyBits && "Failed to determine minimum sign bits"); + assert(Tmp <= TyBits && "Failed to determine minimum sign bits"); return Tmp; } - case Instruction::Call: { - if (const auto *II = dyn_cast<IntrinsicInst>(U)) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::abs: - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) break; - - // Absolute value reduces number of sign bits by at most 1. - return Tmp - 1; - } - } - } + case Instruction::Call: { + if (const auto *II = dyn_cast<IntrinsicInst>(U)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::abs: + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) break; + + // Absolute value reduces number of sign bits by at most 1. + return Tmp - 1; + } + } } + } } // Finally, if we can prove that the top bits of the result are 0's or 1's, @@ -2989,7 +2989,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool LookThroughSExt, unsigned Depth) { assert(V && "No Value?"); - assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); + assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); Type *T = V->getType(); @@ -3017,7 +3017,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, return true; } - if (Depth == MaxAnalysisRecursionDepth) return false; + if (Depth == MaxAnalysisRecursionDepth) return false; Operator *I = dyn_cast<Operator>(V); if (!I) return false; @@ -3051,11 +3051,11 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { if (Constant *Op1C = dyn_cast<Constant>(Op1)) if (Constant *MulC = dyn_cast<Constant>(Mul0)) { - if (Op1C->getType()->getPrimitiveSizeInBits().getFixedSize() < - MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) + if (Op1C->getType()->getPrimitiveSizeInBits().getFixedSize() < + MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); - if (Op1C->getType()->getPrimitiveSizeInBits().getFixedSize() > - MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) + if (Op1C->getType()->getPrimitiveSizeInBits().getFixedSize() > + MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) @@ -3075,11 +3075,11 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { if (Constant *Op0C = dyn_cast<Constant>(Op0)) if (Constant *MulC = dyn_cast<Constant>(Mul1)) { - if (Op0C->getType()->getPrimitiveSizeInBits().getFixedSize() < - MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) + if (Op0C->getType()->getPrimitiveSizeInBits().getFixedSize() < + MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); - if (Op0C->getType()->getPrimitiveSizeInBits().getFixedSize() > - MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) + if (Op0C->getType()->getPrimitiveSizeInBits().getFixedSize() > + MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) @@ -3219,7 +3219,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, if (auto *CFP = dyn_cast<ConstantFP>(V)) return !CFP->getValueAPF().isNegZero(); - if (Depth == MaxAnalysisRecursionDepth) + if (Depth == MaxAnalysisRecursionDepth) return false; auto *Op = dyn_cast<Operator>(V); @@ -3287,8 +3287,8 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, } } - if (Depth == MaxAnalysisRecursionDepth) - return false; + if (Depth == MaxAnalysisRecursionDepth) + return false; const Operator *I = dyn_cast<Operator>(V); if (!I) @@ -3440,7 +3440,7 @@ bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, if (auto *CFP = dyn_cast<ConstantFP>(V)) return !CFP->isInfinity(); - if (Depth == MaxAnalysisRecursionDepth) + if (Depth == MaxAnalysisRecursionDepth) return false; if (auto *Inst = dyn_cast<Instruction>(V)) { @@ -3449,30 +3449,30 @@ bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, return isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1) && isKnownNeverInfinity(Inst->getOperand(2), TLI, Depth + 1); } - case Instruction::SIToFP: - case Instruction::UIToFP: { - // Get width of largest magnitude integer (remove a bit if signed). - // This still works for a signed minimum value because the largest FP - // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx). - int IntSize = Inst->getOperand(0)->getType()->getScalarSizeInBits(); - if (Inst->getOpcode() == Instruction::SIToFP) - --IntSize; - - // If the exponent of the largest finite FP value can hold the largest - // integer, the result of the cast must be finite. - Type *FPTy = Inst->getType()->getScalarType(); - return ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize; - } + case Instruction::SIToFP: + case Instruction::UIToFP: { + // Get width of largest magnitude integer (remove a bit if signed). + // This still works for a signed minimum value because the largest FP + // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx). + int IntSize = Inst->getOperand(0)->getType()->getScalarSizeInBits(); + if (Inst->getOpcode() == Instruction::SIToFP) + --IntSize; + + // If the exponent of the largest finite FP value can hold the largest + // integer, the result of the cast must be finite. + Type *FPTy = Inst->getType()->getScalarType(); + return ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize; + } default: break; } } // try to handle fixed width vector constants - auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); - if (VFVTy && isa<Constant>(V)) { + auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); + if (VFVTy && isa<Constant>(V)) { // For vectors, verify that each element is not infinity. - unsigned NumElts = VFVTy->getNumElements(); + unsigned NumElts = VFVTy->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = cast<Constant>(V)->getAggregateElement(i); if (!Elt) @@ -3504,7 +3504,7 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, if (auto *CFP = dyn_cast<ConstantFP>(V)) return !CFP->isNaN(); - if (Depth == MaxAnalysisRecursionDepth) + if (Depth == MaxAnalysisRecursionDepth) return false; if (auto *Inst = dyn_cast<Instruction>(V)) { @@ -3574,10 +3574,10 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, } // Try to handle fixed width vector constants - auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); - if (VFVTy && isa<Constant>(V)) { + auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); + if (VFVTy && isa<Constant>(V)) { // For vectors, verify that each element is not NaN. - unsigned NumElts = VFVTy->getNumElements(); + unsigned NumElts = VFVTy->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = cast<Constant>(V)->getAggregateElement(i); if (!Elt) @@ -3655,13 +3655,13 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { if (auto *CE = dyn_cast<ConstantExpr>(C)) { if (CE->getOpcode() == Instruction::IntToPtr) { - if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) { - unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); - return isBytewiseValue( - ConstantExpr::getIntegerCast(CE->getOperand(0), - Type::getIntNTy(Ctx, BitWidth), false), - DL); - } + if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) { + unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); + return isBytewiseValue( + ConstantExpr::getIntegerCast(CE->getOperand(0), + Type::getIntNTy(Ctx, BitWidth), false), + DL); + } } } @@ -4130,7 +4130,7 @@ static bool isSameUnderlyingObjectInLoop(const PHINode *PN, return true; } -Value *llvm::getUnderlyingObject(Value *V, unsigned MaxLookup) { +Value *llvm::getUnderlyingObject(Value *V, unsigned MaxLookup) { if (!V->getType()->isPointerTy()) return V; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { @@ -4175,15 +4175,15 @@ Value *llvm::getUnderlyingObject(Value *V, unsigned MaxLookup) { return V; } -void llvm::getUnderlyingObjects(const Value *V, +void llvm::getUnderlyingObjects(const Value *V, SmallVectorImpl<const Value *> &Objects, - LoopInfo *LI, unsigned MaxLookup) { + LoopInfo *LI, unsigned MaxLookup) { SmallPtrSet<const Value *, 4> Visited; SmallVector<const Value *, 4> Worklist; Worklist.push_back(V); do { const Value *P = Worklist.pop_back_val(); - P = getUnderlyingObject(P, MaxLookup); + P = getUnderlyingObject(P, MaxLookup); if (!Visited.insert(P).second) continue; @@ -4207,7 +4207,7 @@ void llvm::getUnderlyingObjects(const Value *V, // underlying objects. if (!LI || !LI->isLoopHeader(PN->getParent()) || isSameUnderlyingObjectInLoop(PN, LI)) - append_range(Worklist, PN->incoming_values()); + append_range(Worklist, PN->incoming_values()); continue; } @@ -4243,18 +4243,18 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { } while (true); } -/// This is a wrapper around getUnderlyingObjects and adds support for basic +/// This is a wrapper around getUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. -/// It returns false if unidentified object is found in getUnderlyingObjects. +/// It returns false if unidentified object is found in getUnderlyingObjects. bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, - SmallVectorImpl<Value *> &Objects) { + SmallVectorImpl<Value *> &Objects) { SmallPtrSet<const Value *, 16> Visited; SmallVector<const Value *, 4> Working(1, V); do { V = Working.pop_back_val(); SmallVector<const Value *, 4> Objs; - getUnderlyingObjects(V, Objs); + getUnderlyingObjects(V, Objs); for (const Value *V : Objs) { if (!Visited.insert(V).second) @@ -4267,7 +4267,7 @@ bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, continue; } } - // If getUnderlyingObjects fails to find an identifiable object, + // If getUnderlyingObjects fails to find an identifiable object, // getUnderlyingObjectsForCodeGen also fails for safety. if (!isIdentifiedObject(V)) { Objects.clear(); @@ -4279,72 +4279,72 @@ bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, return true; } -AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) { - AllocaInst *Result = nullptr; - SmallPtrSet<Value *, 4> Visited; - SmallVector<Value *, 4> Worklist; - - auto AddWork = [&](Value *V) { - if (Visited.insert(V).second) - Worklist.push_back(V); - }; - - AddWork(V); - do { - V = Worklist.pop_back_val(); - assert(Visited.count(V)); - - if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - if (Result && Result != AI) - return nullptr; - Result = AI; - } else if (CastInst *CI = dyn_cast<CastInst>(V)) { - AddWork(CI->getOperand(0)); - } else if (PHINode *PN = dyn_cast<PHINode>(V)) { - for (Value *IncValue : PN->incoming_values()) - AddWork(IncValue); - } else if (auto *SI = dyn_cast<SelectInst>(V)) { - AddWork(SI->getTrueValue()); - AddWork(SI->getFalseValue()); - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { - if (OffsetZero && !GEP->hasAllZeroIndices()) - return nullptr; - AddWork(GEP->getPointerOperand()); - } else { - return nullptr; - } - } while (!Worklist.empty()); - - return Result; -} - -static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper( - const Value *V, bool AllowLifetime, bool AllowDroppable) { +AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) { + AllocaInst *Result = nullptr; + SmallPtrSet<Value *, 4> Visited; + SmallVector<Value *, 4> Worklist; + + auto AddWork = [&](Value *V) { + if (Visited.insert(V).second) + Worklist.push_back(V); + }; + + AddWork(V); + do { + V = Worklist.pop_back_val(); + assert(Visited.count(V)); + + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + if (Result && Result != AI) + return nullptr; + Result = AI; + } else if (CastInst *CI = dyn_cast<CastInst>(V)) { + AddWork(CI->getOperand(0)); + } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + for (Value *IncValue : PN->incoming_values()) + AddWork(IncValue); + } else if (auto *SI = dyn_cast<SelectInst>(V)) { + AddWork(SI->getTrueValue()); + AddWork(SI->getFalseValue()); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { + if (OffsetZero && !GEP->hasAllZeroIndices()) + return nullptr; + AddWork(GEP->getPointerOperand()); + } else { + return nullptr; + } + } while (!Worklist.empty()); + + return Result; +} + +static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper( + const Value *V, bool AllowLifetime, bool AllowDroppable) { for (const User *U : V->users()) { const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); - if (!II) - return false; - - if (AllowLifetime && II->isLifetimeStartOrEnd()) - continue; + if (!II) + return false; - if (AllowDroppable && II->isDroppable()) - continue; - - return false; + if (AllowLifetime && II->isLifetimeStartOrEnd()) + continue; + + if (AllowDroppable && II->isDroppable()) + continue; + + return false; } return true; } -bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { - return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( - V, /* AllowLifetime */ true, /* AllowDroppable */ false); -} -bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) { - return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( - V, /* AllowLifetime */ true, /* AllowDroppable */ true); -} - +bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { + return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( + V, /* AllowLifetime */ true, /* AllowDroppable */ false); +} +bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) { + return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( + V, /* AllowLifetime */ true, /* AllowDroppable */ true); +} + bool llvm::mustSuppressSpeculation(const LoadInst &LI) { if (!LI.isUnordered()) return true; @@ -4390,7 +4390,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, if (*Denominator == 0) return false; // It's safe to hoist if the denominator is not 0 or -1. - if (!Denominator->isAllOnesValue()) + if (!Denominator->isAllOnesValue()) return true; // At this point we know that the denominator is -1. It is safe to hoist as // long we know that the numerator is not INT_MIN. @@ -4698,30 +4698,30 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); } -static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { +static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { // See whether I has flags that may create poison - if (const auto *OvOp = dyn_cast<OverflowingBinaryOperator>(Op)) { - if (OvOp->hasNoSignedWrap() || OvOp->hasNoUnsignedWrap()) - return true; - } - if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(Op)) - if (ExactOp->isExact()) - return true; - if (const auto *FP = dyn_cast<FPMathOperator>(Op)) { + if (const auto *OvOp = dyn_cast<OverflowingBinaryOperator>(Op)) { + if (OvOp->hasNoSignedWrap() || OvOp->hasNoUnsignedWrap()) + return true; + } + if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(Op)) + if (ExactOp->isExact()) + return true; + if (const auto *FP = dyn_cast<FPMathOperator>(Op)) { auto FMF = FP->getFastMathFlags(); if (FMF.noNaNs() || FMF.noInfs()) return true; } - unsigned Opcode = Op->getOpcode(); + unsigned Opcode = Op->getOpcode(); - // Check whether opcode is a poison/undef-generating operation + // Check whether opcode is a poison/undef-generating operation switch (Opcode) { case Instruction::Shl: case Instruction::AShr: case Instruction::LShr: { // Shifts return poison if shiftwidth is larger than the bitwidth. - if (auto *C = dyn_cast<Constant>(Op->getOperand(1))) { + if (auto *C = dyn_cast<Constant>(Op->getOperand(1))) { SmallVector<Constant *, 4> ShiftAmounts; if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) { unsigned NumElts = FVTy->getNumElements(); @@ -4733,8 +4733,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { ShiftAmounts.push_back(C); bool Safe = llvm::all_of(ShiftAmounts, [](Constant *C) { - auto *CI = dyn_cast_or_null<ConstantInt>(C); - return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth()); + auto *CI = dyn_cast_or_null<ConstantInt>(C); + return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth()); }); return !Safe; } @@ -4747,29 +4747,29 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { return true; case Instruction::Call: case Instruction::CallBr: - case Instruction::Invoke: { - const auto *CB = cast<CallBase>(Op); - return !CB->hasRetAttr(Attribute::NoUndef); - } + case Instruction::Invoke: { + const auto *CB = cast<CallBase>(Op); + return !CB->hasRetAttr(Attribute::NoUndef); + } case Instruction::InsertElement: case Instruction::ExtractElement: { // If index exceeds the length of the vector, it returns poison - auto *VTy = cast<VectorType>(Op->getOperand(0)->getType()); - unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; - auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp)); - if (!Idx || Idx->getValue().uge(VTy->getElementCount().getKnownMinValue())) + auto *VTy = cast<VectorType>(Op->getOperand(0)->getType()); + unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; + auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp)); + if (!Idx || Idx->getValue().uge(VTy->getElementCount().getKnownMinValue())) return true; return false; } - case Instruction::ShuffleVector: { - // shufflevector may return undef. - if (PoisonOnly) - return false; - ArrayRef<int> Mask = isa<ConstantExpr>(Op) - ? cast<ConstantExpr>(Op)->getShuffleMask() - : cast<ShuffleVectorInst>(Op)->getShuffleMask(); - return is_contained(Mask, UndefMaskElem); - } + case Instruction::ShuffleVector: { + // shufflevector may return undef. + if (PoisonOnly) + return false; + ArrayRef<int> Mask = isa<ConstantExpr>(Op) + ? cast<ConstantExpr>(Op)->getShuffleMask() + : cast<ShuffleVectorInst>(Op)->getShuffleMask(); + return is_contained(Mask, UndefMaskElem); + } case Instruction::FNeg: case Instruction::PHI: case Instruction::Select: @@ -4781,104 +4781,104 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { case Instruction::ICmp: case Instruction::FCmp: return false; - case Instruction::GetElementPtr: { - const auto *GEP = cast<GEPOperator>(Op); - return GEP->isInBounds(); - } - default: { - const auto *CE = dyn_cast<ConstantExpr>(Op); - if (isa<CastInst>(Op) || (CE && CE->isCast())) + case Instruction::GetElementPtr: { + const auto *GEP = cast<GEPOperator>(Op); + return GEP->isInBounds(); + } + default: { + const auto *CE = dyn_cast<ConstantExpr>(Op); + if (isa<CastInst>(Op) || (CE && CE->isCast())) return false; - else if (Instruction::isBinaryOp(Opcode)) + else if (Instruction::isBinaryOp(Opcode)) return false; // Be conservative and return true. return true; } - } -} - -bool llvm::canCreateUndefOrPoison(const Operator *Op) { - return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false); -} - -bool llvm::canCreatePoison(const Operator *Op) { - return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true); -} - -static bool directlyImpliesPoison(const Value *ValAssumedPoison, - const Value *V, unsigned Depth) { - if (ValAssumedPoison == V) - return true; - - const unsigned MaxDepth = 2; + } +} + +bool llvm::canCreateUndefOrPoison(const Operator *Op) { + return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false); +} + +bool llvm::canCreatePoison(const Operator *Op) { + return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true); +} + +static bool directlyImpliesPoison(const Value *ValAssumedPoison, + const Value *V, unsigned Depth) { + if (ValAssumedPoison == V) + return true; + + const unsigned MaxDepth = 2; if (Depth >= MaxDepth) return false; - const auto *I = dyn_cast<Instruction>(V); - if (I && propagatesPoison(cast<Operator>(I))) { - return any_of(I->operands(), [=](const Value *Op) { - return directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1); - }); - } - return false; -} - -static bool impliesPoison(const Value *ValAssumedPoison, const Value *V, - unsigned Depth) { - if (isGuaranteedNotToBeUndefOrPoison(ValAssumedPoison)) + const auto *I = dyn_cast<Instruction>(V); + if (I && propagatesPoison(cast<Operator>(I))) { + return any_of(I->operands(), [=](const Value *Op) { + return directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1); + }); + } + return false; +} + +static bool impliesPoison(const Value *ValAssumedPoison, const Value *V, + unsigned Depth) { + if (isGuaranteedNotToBeUndefOrPoison(ValAssumedPoison)) return true; - if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0)) - return true; - - const unsigned MaxDepth = 2; - if (Depth >= MaxDepth) - return false; - - const auto *I = dyn_cast<Instruction>(ValAssumedPoison); - if (I && !canCreatePoison(cast<Operator>(I))) { - return all_of(I->operands(), [=](const Value *Op) { - return impliesPoison(Op, V, Depth + 1); - }); - } - return false; -} - -bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) { - return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0); -} - -static bool programUndefinedIfUndefOrPoison(const Value *V, - bool PoisonOnly); - -static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, - AssumptionCache *AC, - const Instruction *CtxI, - const DominatorTree *DT, - unsigned Depth, bool PoisonOnly) { - if (Depth >= MaxAnalysisRecursionDepth) - return false; - - if (isa<MetadataAsValue>(V)) - return false; - - if (const auto *A = dyn_cast<Argument>(V)) { - if (A->hasAttribute(Attribute::NoUndef)) - return true; - } - + if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0)) + return true; + + const unsigned MaxDepth = 2; + if (Depth >= MaxDepth) + return false; + + const auto *I = dyn_cast<Instruction>(ValAssumedPoison); + if (I && !canCreatePoison(cast<Operator>(I))) { + return all_of(I->operands(), [=](const Value *Op) { + return impliesPoison(Op, V, Depth + 1); + }); + } + return false; +} + +bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) { + return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0); +} + +static bool programUndefinedIfUndefOrPoison(const Value *V, + bool PoisonOnly); + +static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, + AssumptionCache *AC, + const Instruction *CtxI, + const DominatorTree *DT, + unsigned Depth, bool PoisonOnly) { + if (Depth >= MaxAnalysisRecursionDepth) + return false; + + if (isa<MetadataAsValue>(V)) + return false; + + if (const auto *A = dyn_cast<Argument>(V)) { + if (A->hasAttribute(Attribute::NoUndef)) + return true; + } + if (auto *C = dyn_cast<Constant>(V)) { - if (isa<UndefValue>(C)) - return PoisonOnly && !isa<PoisonValue>(C); + if (isa<UndefValue>(C)) + return PoisonOnly && !isa<PoisonValue>(C); if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) || isa<ConstantPointerNull>(C) || isa<Function>(C)) return true; - if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) - return (PoisonOnly ? !C->containsPoisonElement() - : !C->containsUndefOrPoisonElement()) && - !C->containsConstantExpression(); + if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) + return (PoisonOnly ? !C->containsPoisonElement() + : !C->containsUndefOrPoisonElement()) && + !C->containsConstantExpression(); } // Strip cast operations from a pointer value. @@ -4895,45 +4895,45 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, return true; auto OpCheck = [&](const Value *V) { - return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, - PoisonOnly); + return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, + PoisonOnly); }; - if (auto *Opr = dyn_cast<Operator>(V)) { - // If the value is a freeze instruction, then it can never - // be undef or poison. - if (isa<FreezeInst>(V)) - return true; - - if (const auto *CB = dyn_cast<CallBase>(V)) { - if (CB->hasRetAttr(Attribute::NoUndef)) + if (auto *Opr = dyn_cast<Operator>(V)) { + // If the value is a freeze instruction, then it can never + // be undef or poison. + if (isa<FreezeInst>(V)) + return true; + + if (const auto *CB = dyn_cast<CallBase>(V)) { + if (CB->hasRetAttr(Attribute::NoUndef)) return true; } - - if (const auto *PN = dyn_cast<PHINode>(V)) { - unsigned Num = PN->getNumIncomingValues(); - bool IsWellDefined = true; - for (unsigned i = 0; i < Num; ++i) { - auto *TI = PN->getIncomingBlock(i)->getTerminator(); - if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI, - DT, Depth + 1, PoisonOnly)) { - IsWellDefined = false; - break; - } - } - if (IsWellDefined) + + if (const auto *PN = dyn_cast<PHINode>(V)) { + unsigned Num = PN->getNumIncomingValues(); + bool IsWellDefined = true; + for (unsigned i = 0; i < Num; ++i) { + auto *TI = PN->getIncomingBlock(i)->getTerminator(); + if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI, + DT, Depth + 1, PoisonOnly)) { + IsWellDefined = false; + break; + } + } + if (IsWellDefined) return true; - } else if (!canCreateUndefOrPoison(Opr) && all_of(Opr->operands(), OpCheck)) + } else if (!canCreateUndefOrPoison(Opr) && all_of(Opr->operands(), OpCheck)) return true; } - if (auto *I = dyn_cast<LoadInst>(V)) - if (I->getMetadata(LLVMContext::MD_noundef)) - return true; - - if (programUndefinedIfUndefOrPoison(V, PoisonOnly)) - return true; - + if (auto *I = dyn_cast<LoadInst>(V)) + if (I->getMetadata(LLVMContext::MD_noundef)) + return true; + + if (programUndefinedIfUndefOrPoison(V, PoisonOnly)) + return true; + // CxtI may be null or a cloned instruction. if (!CtxI || !CtxI->getParent() || !DT) return false; @@ -4952,48 +4952,48 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, while (Dominator) { auto *TI = Dominator->getBlock()->getTerminator(); - Value *Cond = nullptr; + Value *Cond = nullptr; if (auto BI = dyn_cast<BranchInst>(TI)) { - if (BI->isConditional()) - Cond = BI->getCondition(); + if (BI->isConditional()) + Cond = BI->getCondition(); } else if (auto SI = dyn_cast<SwitchInst>(TI)) { - Cond = SI->getCondition(); - } - - if (Cond) { - if (Cond == V) + Cond = SI->getCondition(); + } + + if (Cond) { + if (Cond == V) return true; - else if (PoisonOnly && isa<Operator>(Cond)) { - // For poison, we can analyze further - auto *Opr = cast<Operator>(Cond); - if (propagatesPoison(Opr) && is_contained(Opr->operand_values(), V)) - return true; - } + else if (PoisonOnly && isa<Operator>(Cond)) { + // For poison, we can analyze further + auto *Opr = cast<Operator>(Cond); + if (propagatesPoison(Opr) && is_contained(Opr->operand_values(), V)) + return true; + } } Dominator = Dominator->getIDom(); } - SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NoUndef}; - if (getKnowledgeValidInContext(V, AttrKinds, CtxI, DT, AC)) - return true; - + SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NoUndef}; + if (getKnowledgeValidInContext(V, AttrKinds, CtxI, DT, AC)) + return true; + return false; } -bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, - const Instruction *CtxI, - const DominatorTree *DT, - unsigned Depth) { - return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false); -} - -bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, - const Instruction *CtxI, - const DominatorTree *DT, unsigned Depth) { - return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true); -} - +bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, + const Instruction *CtxI, + const DominatorTree *DT, + unsigned Depth) { + return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false); +} + +bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, + const Instruction *CtxI, + const DominatorTree *DT, unsigned Depth) { + return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true); +} + OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, AssumptionCache *AC, @@ -5023,9 +5023,9 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { if (isa<UnreachableInst>(I)) return false; - // An instruction that returns without throwing must transfer control flow - // to a successor. - return !I->mayThrow() && I->willReturn(); + // An instruction that returns without throwing must transfer control flow + // to a successor. + return !I->mayThrow() && I->willReturn(); } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { @@ -5052,7 +5052,7 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, llvm_unreachable("Instruction not contained in its own parent basic block."); } -bool llvm::propagatesPoison(const Operator *I) { +bool llvm::propagatesPoison(const Operator *I) { switch (I->getOpcode()) { case Instruction::Freeze: case Instruction::Select: @@ -5073,141 +5073,141 @@ bool llvm::propagatesPoison(const Operator *I) { } } -void llvm::getGuaranteedNonPoisonOps(const Instruction *I, - SmallPtrSetImpl<const Value *> &Operands) { +void llvm::getGuaranteedNonPoisonOps(const Instruction *I, + SmallPtrSetImpl<const Value *> &Operands) { switch (I->getOpcode()) { case Instruction::Store: - Operands.insert(cast<StoreInst>(I)->getPointerOperand()); - break; + Operands.insert(cast<StoreInst>(I)->getPointerOperand()); + break; case Instruction::Load: - Operands.insert(cast<LoadInst>(I)->getPointerOperand()); - break; + Operands.insert(cast<LoadInst>(I)->getPointerOperand()); + break; case Instruction::AtomicCmpXchg: - Operands.insert(cast<AtomicCmpXchgInst>(I)->getPointerOperand()); - break; + Operands.insert(cast<AtomicCmpXchgInst>(I)->getPointerOperand()); + break; case Instruction::AtomicRMW: - Operands.insert(cast<AtomicRMWInst>(I)->getPointerOperand()); - break; + Operands.insert(cast<AtomicRMWInst>(I)->getPointerOperand()); + break; case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: - Operands.insert(I->getOperand(1)); - break; + Operands.insert(I->getOperand(1)); + break; case Instruction::Call: - case Instruction::Invoke: { - const CallBase *CB = cast<CallBase>(I); - if (CB->isIndirectCall()) - Operands.insert(CB->getCalledOperand()); - for (unsigned i = 0; i < CB->arg_size(); ++i) { - if (CB->paramHasAttr(i, Attribute::NoUndef)) - Operands.insert(CB->getArgOperand(i)); + case Instruction::Invoke: { + const CallBase *CB = cast<CallBase>(I); + if (CB->isIndirectCall()) + Operands.insert(CB->getCalledOperand()); + for (unsigned i = 0; i < CB->arg_size(); ++i) { + if (CB->paramHasAttr(i, Attribute::NoUndef)) + Operands.insert(CB->getArgOperand(i)); } - break; - } + break; + } default: - break; + break; } } bool llvm::mustTriggerUB(const Instruction *I, const SmallSet<const Value *, 16>& KnownPoison) { - SmallPtrSet<const Value *, 4> NonPoisonOps; - getGuaranteedNonPoisonOps(I, NonPoisonOps); - - for (const auto *V : NonPoisonOps) - if (KnownPoison.count(V)) - return true; - - return false; -} - -static bool programUndefinedIfUndefOrPoison(const Value *V, - bool PoisonOnly) { - // We currently only look for uses of values within the same basic + SmallPtrSet<const Value *, 4> NonPoisonOps; + getGuaranteedNonPoisonOps(I, NonPoisonOps); + + for (const auto *V : NonPoisonOps) + if (KnownPoison.count(V)) + return true; + + return false; +} + +static bool programUndefinedIfUndefOrPoison(const Value *V, + bool PoisonOnly) { + // We currently only look for uses of values within the same basic // block, as that makes it easier to guarantee that the uses will be - // executed given that Inst is executed. + // executed given that Inst is executed. // // FIXME: Expand this to consider uses beyond the same basic block. To do // this, look out for the distinction between post-dominance and strong // post-dominance. - const BasicBlock *BB = nullptr; - BasicBlock::const_iterator Begin; - if (const auto *Inst = dyn_cast<Instruction>(V)) { - BB = Inst->getParent(); - Begin = Inst->getIterator(); - Begin++; - } else if (const auto *Arg = dyn_cast<Argument>(V)) { - BB = &Arg->getParent()->getEntryBlock(); - Begin = BB->begin(); - } else { - return false; - } - - // Limit number of instructions we look at, to avoid scanning through large - // blocks. The current limit is chosen arbitrarily. - unsigned ScanLimit = 32; - BasicBlock::const_iterator End = BB->end(); - - if (!PoisonOnly) { - // Be conservative & just check whether a value is passed to a noundef - // argument. - // Instructions that raise UB with a poison operand are well-defined - // or have unclear semantics when the input is partially undef. - // For example, 'udiv x, (undef | 1)' isn't UB. - - for (auto &I : make_range(Begin, End)) { - if (isa<DbgInfoIntrinsic>(I)) - continue; - if (--ScanLimit == 0) - break; - - if (const auto *CB = dyn_cast<CallBase>(&I)) { - for (unsigned i = 0; i < CB->arg_size(); ++i) { - if (CB->paramHasAttr(i, Attribute::NoUndef) && - CB->getArgOperand(i) == V) - return true; - } - } - if (!isGuaranteedToTransferExecutionToSuccessor(&I)) - break; - } - return false; - } - - // Set of instructions that we have proved will yield poison if Inst + const BasicBlock *BB = nullptr; + BasicBlock::const_iterator Begin; + if (const auto *Inst = dyn_cast<Instruction>(V)) { + BB = Inst->getParent(); + Begin = Inst->getIterator(); + Begin++; + } else if (const auto *Arg = dyn_cast<Argument>(V)) { + BB = &Arg->getParent()->getEntryBlock(); + Begin = BB->begin(); + } else { + return false; + } + + // Limit number of instructions we look at, to avoid scanning through large + // blocks. The current limit is chosen arbitrarily. + unsigned ScanLimit = 32; + BasicBlock::const_iterator End = BB->end(); + + if (!PoisonOnly) { + // Be conservative & just check whether a value is passed to a noundef + // argument. + // Instructions that raise UB with a poison operand are well-defined + // or have unclear semantics when the input is partially undef. + // For example, 'udiv x, (undef | 1)' isn't UB. + + for (auto &I : make_range(Begin, End)) { + if (isa<DbgInfoIntrinsic>(I)) + continue; + if (--ScanLimit == 0) + break; + + if (const auto *CB = dyn_cast<CallBase>(&I)) { + for (unsigned i = 0; i < CB->arg_size(); ++i) { + if (CB->paramHasAttr(i, Attribute::NoUndef) && + CB->getArgOperand(i) == V) + return true; + } + } + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + } + return false; + } + + // Set of instructions that we have proved will yield poison if Inst // does. SmallSet<const Value *, 16> YieldsPoison; SmallSet<const BasicBlock *, 4> Visited; - YieldsPoison.insert(V); - auto Propagate = [&](const User *User) { - if (propagatesPoison(cast<Operator>(User))) - YieldsPoison.insert(User); - }; - for_each(V->users(), Propagate); - Visited.insert(BB); + YieldsPoison.insert(V); + auto Propagate = [&](const User *User) { + if (propagatesPoison(cast<Operator>(User))) + YieldsPoison.insert(User); + }; + for_each(V->users(), Propagate); + Visited.insert(BB); - while (true) { + while (true) { for (auto &I : make_range(Begin, End)) { - if (isa<DbgInfoIntrinsic>(I)) - continue; - if (--ScanLimit == 0) - return false; - if (mustTriggerUB(&I, YieldsPoison)) - return true; - if (!isGuaranteedToTransferExecutionToSuccessor(&I)) - return false; + if (isa<DbgInfoIntrinsic>(I)) + continue; + if (--ScanLimit == 0) + return false; + if (mustTriggerUB(&I, YieldsPoison)) + return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + return false; // Mark poison that propagates from I through uses of I. - if (YieldsPoison.count(&I)) - for_each(I.users(), Propagate); + if (YieldsPoison.count(&I)) + for_each(I.users(), Propagate); } if (auto *NextBB = BB->getSingleSuccessor()) { @@ -5224,14 +5224,14 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, return false; } -bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) { - return ::programUndefinedIfUndefOrPoison(Inst, false); -} - -bool llvm::programUndefinedIfPoison(const Instruction *Inst) { - return ::programUndefinedIfUndefOrPoison(Inst, true); -} - +bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) { + return ::programUndefinedIfUndefOrPoison(Inst, false); +} + +bool llvm::programUndefinedIfPoison(const Instruction *Inst) { + return ::programUndefinedIfUndefOrPoison(Inst, true); +} + static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { if (FMF.noNaNs()) return true; @@ -5594,10 +5594,10 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, // elements because those can not be back-propagated for analysis. Value *OutputZeroVal = nullptr; if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) && - !cast<Constant>(TrueVal)->containsUndefOrPoisonElement()) + !cast<Constant>(TrueVal)->containsUndefOrPoisonElement()) OutputZeroVal = TrueVal; else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) && - !cast<Constant>(FalseVal)->containsUndefOrPoisonElement()) + !cast<Constant>(FalseVal)->containsUndefOrPoisonElement()) OutputZeroVal = FalseVal; if (OutputZeroVal) { @@ -5874,7 +5874,7 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp, unsigned Depth) { - if (Depth >= MaxAnalysisRecursionDepth) + if (Depth >= MaxAnalysisRecursionDepth) return {SPF_UNKNOWN, SPNB_NA, false}; SelectInst *SI = dyn_cast<SelectInst>(V); @@ -5953,46 +5953,46 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) { return getMinMaxPred(getInverseMinMaxFlavor(SPF)); } -std::pair<Intrinsic::ID, bool> -llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { - // Check if VL contains select instructions that can be folded into a min/max - // vector intrinsic and return the intrinsic if it is possible. - // TODO: Support floating point min/max. - bool AllCmpSingleUse = true; - SelectPatternResult SelectPattern; - SelectPattern.Flavor = SPF_UNKNOWN; - if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { - Value *LHS, *RHS; - auto CurrentPattern = matchSelectPattern(I, LHS, RHS); - if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) || - CurrentPattern.Flavor == SPF_FMINNUM || - CurrentPattern.Flavor == SPF_FMAXNUM || - !I->getType()->isIntOrIntVectorTy()) - return false; - if (SelectPattern.Flavor != SPF_UNKNOWN && - SelectPattern.Flavor != CurrentPattern.Flavor) - return false; - SelectPattern = CurrentPattern; - AllCmpSingleUse &= - match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value())); - return true; - })) { - switch (SelectPattern.Flavor) { - case SPF_SMIN: - return {Intrinsic::smin, AllCmpSingleUse}; - case SPF_UMIN: - return {Intrinsic::umin, AllCmpSingleUse}; - case SPF_SMAX: - return {Intrinsic::smax, AllCmpSingleUse}; - case SPF_UMAX: - return {Intrinsic::umax, AllCmpSingleUse}; - default: - llvm_unreachable("unexpected select pattern flavor"); - } - } - return {Intrinsic::not_intrinsic, false}; -} - +std::pair<Intrinsic::ID, bool> +llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { + // Check if VL contains select instructions that can be folded into a min/max + // vector intrinsic and return the intrinsic if it is possible. + // TODO: Support floating point min/max. + bool AllCmpSingleUse = true; + SelectPatternResult SelectPattern; + SelectPattern.Flavor = SPF_UNKNOWN; + if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { + Value *LHS, *RHS; + auto CurrentPattern = matchSelectPattern(I, LHS, RHS); + if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) || + CurrentPattern.Flavor == SPF_FMINNUM || + CurrentPattern.Flavor == SPF_FMAXNUM || + !I->getType()->isIntOrIntVectorTy()) + return false; + if (SelectPattern.Flavor != SPF_UNKNOWN && + SelectPattern.Flavor != CurrentPattern.Flavor) + return false; + SelectPattern = CurrentPattern; + AllCmpSingleUse &= + match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value())); + return true; + })) { + switch (SelectPattern.Flavor) { + case SPF_SMIN: + return {Intrinsic::smin, AllCmpSingleUse}; + case SPF_UMIN: + return {Intrinsic::umin, AllCmpSingleUse}; + case SPF_SMAX: + return {Intrinsic::smax, AllCmpSingleUse}; + case SPF_UMAX: + return {Intrinsic::umax, AllCmpSingleUse}; + default: + llvm_unreachable("unexpected select pattern flavor"); + } + } + return {Intrinsic::not_intrinsic, false}; +} + /// Return true if "icmp Pred LHS RHS" is always true. static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const DataLayout &DL, @@ -6172,25 +6172,25 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS, /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is /// false. Otherwise, return None if we can't infer anything. We expect the -/// RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' instruction. +/// RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' instruction. static Optional<bool> -isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, +isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, const Value *RHSOp0, const Value *RHSOp1, const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { - // The LHS must be an 'or', 'and', or a 'select' instruction. + // The LHS must be an 'or', 'and', or a 'select' instruction. assert((LHS->getOpcode() == Instruction::And || - LHS->getOpcode() == Instruction::Or || - LHS->getOpcode() == Instruction::Select) && - "Expected LHS to be 'and', 'or', or 'select'."); + LHS->getOpcode() == Instruction::Or || + LHS->getOpcode() == Instruction::Select) && + "Expected LHS to be 'and', 'or', or 'select'."); - assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit"); + assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit"); // If the result of an 'or' is false, then we know both legs of the 'or' are // false. Similarly, if the result of an 'and' is true, then we know both // legs of the 'and' are true. - const Value *ALHS, *ARHS; - if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) || - (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) { + const Value *ALHS, *ARHS; + if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) || + (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) { // FIXME: Make this non-recursion. if (Optional<bool> Implication = isImpliedCondition( ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) @@ -6208,7 +6208,7 @@ llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, const Value *RHSOp0, const Value *RHSOp1, const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { // Bail out when we hit the limit. - if (Depth == MaxAnalysisRecursionDepth) + if (Depth == MaxAnalysisRecursionDepth) return None; // A mismatch occurs when we compare a scalar cmp to a vector cmp, for @@ -6231,14 +6231,14 @@ llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth); - /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect - /// the RHS to be an icmp. - /// FIXME: Add support for and/or/select on the RHS. - if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) { - if ((LHSI->getOpcode() == Instruction::And || - LHSI->getOpcode() == Instruction::Or || - LHSI->getOpcode() == Instruction::Select)) - return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, + /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect + /// the RHS to be an icmp. + /// FIXME: Add support for and/or/select on the RHS. + if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) { + if ((LHSI->getOpcode() == Instruction::And || + LHSI->getOpcode() == Instruction::Or || + LHSI->getOpcode() == Instruction::Select)) + return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth); } return None; @@ -6471,13 +6471,13 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower, unsigned Width = Lower.getBitWidth(); const APInt *C; switch (II.getIntrinsicID()) { - case Intrinsic::ctpop: - case Intrinsic::ctlz: - case Intrinsic::cttz: - // Maximum of set/clear bits is the bit width. - assert(Lower == 0 && "Expected lower bound to be zero"); - Upper = Width + 1; - break; + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + // Maximum of set/clear bits is the bit width. + assert(Lower == 0 && "Expected lower bound to be zero"); + Upper = Width + 1; + break; case Intrinsic::uadd_sat: // uadd.sat(x, C) produces [C, UINT_MAX]. if (match(II.getOperand(0), m_APInt(C)) || @@ -6529,41 +6529,41 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower, } } break; - case Intrinsic::umin: - case Intrinsic::umax: - case Intrinsic::smin: - case Intrinsic::smax: - if (!match(II.getOperand(0), m_APInt(C)) && - !match(II.getOperand(1), m_APInt(C))) - break; - - switch (II.getIntrinsicID()) { - case Intrinsic::umin: - Upper = *C + 1; - break; - case Intrinsic::umax: - Lower = *C; - break; - case Intrinsic::smin: - Lower = APInt::getSignedMinValue(Width); - Upper = *C + 1; - break; - case Intrinsic::smax: - Lower = *C; - Upper = APInt::getSignedMaxValue(Width) + 1; - break; - default: - llvm_unreachable("Must be min/max intrinsic"); - } - break; - case Intrinsic::abs: - // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX], - // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. - if (match(II.getOperand(1), m_One())) - Upper = APInt::getSignedMaxValue(Width) + 1; - else - Upper = APInt::getSignedMinValue(Width) + 1; - break; + case Intrinsic::umin: + case Intrinsic::umax: + case Intrinsic::smin: + case Intrinsic::smax: + if (!match(II.getOperand(0), m_APInt(C)) && + !match(II.getOperand(1), m_APInt(C))) + break; + + switch (II.getIntrinsicID()) { + case Intrinsic::umin: + Upper = *C + 1; + break; + case Intrinsic::umax: + Lower = *C; + break; + case Intrinsic::smin: + Lower = APInt::getSignedMinValue(Width); + Upper = *C + 1; + break; + case Intrinsic::smax: + Lower = *C; + Upper = APInt::getSignedMaxValue(Width) + 1; + break; + default: + llvm_unreachable("Must be min/max intrinsic"); + } + break; + case Intrinsic::abs: + // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX], + // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. + if (match(II.getOperand(1), m_One())) + Upper = APInt::getSignedMaxValue(Width) + 1; + else + Upper = APInt::getSignedMinValue(Width) + 1; + break; default: break; } @@ -6628,7 +6628,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo, unsigned Depth) { assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); - if (Depth == MaxAnalysisRecursionDepth) + if (Depth == MaxAnalysisRecursionDepth) return ConstantRange::getFull(V->getType()->getScalarSizeInBits()); const APInt *C; diff --git a/contrib/libs/llvm12/lib/Analysis/VectorUtils.cpp b/contrib/libs/llvm12/lib/Analysis/VectorUtils.cpp index 9a4c96b6f7..1c7a473bcf 100644 --- a/contrib/libs/llvm12/lib/Analysis/VectorUtils.cpp +++ b/contrib/libs/llvm12/lib/Analysis/VectorUtils.cpp @@ -43,18 +43,18 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor( /// hasVectorInstrinsicScalarOpd). bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::abs: // Begin integer bit-manipulation. - case Intrinsic::bswap: + case Intrinsic::abs: // Begin integer bit-manipulation. + case Intrinsic::bswap: case Intrinsic::bitreverse: case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::fshl: case Intrinsic::fshr: - case Intrinsic::smax: - case Intrinsic::smin: - case Intrinsic::umax: - case Intrinsic::umin: + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: case Intrinsic::uadd_sat: @@ -99,7 +99,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx) { switch (ID) { - case Intrinsic::abs: + case Intrinsic::abs: case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::powi: @@ -125,8 +125,8 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || - ID == Intrinsic::experimental_noalias_scope_decl || - ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe) + ID == Intrinsic::experimental_noalias_scope_decl || + ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe) return ID; return Intrinsic::not_intrinsic; } @@ -137,7 +137,7 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { const DataLayout &DL = Gep->getModule()->getDataLayout(); unsigned LastOperand = Gep->getNumOperands() - 1; - TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType()); + TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType()); // Walk backwards and try to peel off zeros. while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { @@ -209,7 +209,7 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { if (Ptr != OrigPtr) // Strip off casts. - while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) + while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) V = C->getOperand(); const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); @@ -242,7 +242,7 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { // Strip off casts. Type *StripedOffRecurrenceCast = nullptr; - if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) { + if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) { StripedOffRecurrenceCast = C->getType(); V = C->getOperand(); } @@ -291,10 +291,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { if (EltNo == IIElt) return III->getOperand(1); - // Guard against infinite loop on malformed, unreachable IR. - if (III == III->getOperand(0)) - return nullptr; - + // Guard against infinite loop on malformed, unreachable IR. + if (III == III->getOperand(0)) + return nullptr; + // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return findScalarElement(III->getOperand(0), EltNo); @@ -347,7 +347,7 @@ int llvm::getSplatIndex(ArrayRef<int> Mask) { /// This function is not fully general. It checks only 2 cases: /// the input value is (1) a splat constant vector or (2) a sequence /// of instructions that broadcasts a scalar at element 0. -Value *llvm::getSplatValue(const Value *V) { +Value *llvm::getSplatValue(const Value *V) { if (isa<VectorType>(V->getType())) if (auto *C = dyn_cast<Constant>(V)) return C->getSplatValue(); @@ -363,7 +363,7 @@ Value *llvm::getSplatValue(const Value *V) { } bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) { - assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); + assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); if (isa<VectorType>(V->getType())) { if (isa<UndefValue>(V)) @@ -390,7 +390,7 @@ bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) { } // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ == MaxAnalysisRecursionDepth) + if (Depth++ == MaxAnalysisRecursionDepth) return false; // If both operands of a binop are splats, the result is a splat. @@ -421,7 +421,7 @@ void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, ScaledMask.clear(); for (int MaskElt : Mask) { if (MaskElt >= 0) { - assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX && + assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX && "Overflowed 32-bits"); } for (int SliceElt = 0; SliceElt != Scale; ++SliceElt) @@ -823,14 +823,14 @@ static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, VecTy1->getScalarType() == VecTy2->getScalarType() && "Expect two vectors with the same element type"); - unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements(); - unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements(); + unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements(); + unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements(); assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements"); if (NumElts1 > NumElts2) { // Extend with UNDEFs. V2 = Builder.CreateShuffleVector( - V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2)); + V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2)); } return Builder.CreateShuffleVector( @@ -866,22 +866,22 @@ Value *llvm::concatenateVectors(IRBuilderBase &Builder, } bool llvm::maskIsAllZeroOrUndef(Value *Mask) { - assert(isa<VectorType>(Mask->getType()) && - isa<IntegerType>(Mask->getType()->getScalarType()) && - cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == - 1 && - "Mask must be a vector of i1"); - + assert(isa<VectorType>(Mask->getType()) && + isa<IntegerType>(Mask->getType()->getScalarType()) && + cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a vector of i1"); + auto *ConstMask = dyn_cast<Constant>(Mask); if (!ConstMask) return false; if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask)) return true; - if (isa<ScalableVectorType>(ConstMask->getType())) - return false; - for (unsigned - I = 0, - E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); + if (isa<ScalableVectorType>(ConstMask->getType())) + return false; + for (unsigned + I = 0, + E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); I != E; ++I) { if (auto *MaskElt = ConstMask->getAggregateElement(I)) if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt)) @@ -893,22 +893,22 @@ bool llvm::maskIsAllZeroOrUndef(Value *Mask) { bool llvm::maskIsAllOneOrUndef(Value *Mask) { - assert(isa<VectorType>(Mask->getType()) && - isa<IntegerType>(Mask->getType()->getScalarType()) && - cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == - 1 && - "Mask must be a vector of i1"); - + assert(isa<VectorType>(Mask->getType()) && + isa<IntegerType>(Mask->getType()->getScalarType()) && + cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a vector of i1"); + auto *ConstMask = dyn_cast<Constant>(Mask); if (!ConstMask) return false; if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask)) return true; - if (isa<ScalableVectorType>(ConstMask->getType())) - return false; - for (unsigned - I = 0, - E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); + if (isa<ScalableVectorType>(ConstMask->getType())) + return false; + for (unsigned + I = 0, + E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); I != E; ++I) { if (auto *MaskElt = ConstMask->getAggregateElement(I)) if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt)) @@ -921,14 +921,14 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) { /// TODO: This is a lot like known bits, but for /// vectors. Is there something we can common this with? APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { - assert(isa<FixedVectorType>(Mask->getType()) && - isa<IntegerType>(Mask->getType()->getScalarType()) && - cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == - 1 && - "Mask must be a fixed width vector of i1"); - - const unsigned VWidth = - cast<FixedVectorType>(Mask->getType())->getNumElements(); + assert(isa<FixedVectorType>(Mask->getType()) && + isa<IntegerType>(Mask->getType()->getScalarType()) && + cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a fixed width vector of i1"); + + const unsigned VWidth = + cast<FixedVectorType>(Mask->getType())->getNumElements(); APInt DemandedElts = APInt::getAllOnesValue(VWidth); if (auto *CV = dyn_cast<ConstantVector>(Mask)) for (unsigned i = 0; i < VWidth; i++) diff --git a/contrib/libs/llvm12/lib/Analysis/ya.make b/contrib/libs/llvm12/lib/Analysis/ya.make index 6e09c89fe4..4a6521c1fd 100644 --- a/contrib/libs/llvm12/lib/Analysis/ya.make +++ b/contrib/libs/llvm12/lib/Analysis/ya.make @@ -15,13 +15,13 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/BinaryFormat - contrib/libs/llvm12/lib/IR - contrib/libs/llvm12/lib/Object - contrib/libs/llvm12/lib/ProfileData - contrib/libs/llvm12/lib/Support + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/BinaryFormat + contrib/libs/llvm12/lib/IR + contrib/libs/llvm12/lib/Object + contrib/libs/llvm12/lib/ProfileData + contrib/libs/llvm12/lib/Support ) ADDINCL( @@ -56,28 +56,28 @@ SRCS( CmpInstAnalysis.cpp CodeMetrics.cpp ConstantFolding.cpp - ConstraintSystem.cpp + ConstraintSystem.cpp CostModel.cpp DDG.cpp - DDGPrinter.cpp + DDGPrinter.cpp Delinearization.cpp DemandedBits.cpp DependenceAnalysis.cpp DependenceGraphBuilder.cpp - DevelopmentModeInlineAdvisor.cpp + DevelopmentModeInlineAdvisor.cpp DivergenceAnalysis.cpp DomPrinter.cpp DomTreeUpdater.cpp DominanceFrontier.cpp EHPersonalities.cpp - FunctionPropertiesAnalysis.cpp + FunctionPropertiesAnalysis.cpp GlobalsModRef.cpp GuardUtils.cpp HeatUtils.cpp - IRSimilarityIdentifier.cpp + IRSimilarityIdentifier.cpp IVDescriptors.cpp IVUsers.cpp - ImportedFunctionsInliningStatistics.cpp + ImportedFunctionsInliningStatistics.cpp IndirectCallPromotionAnalysis.cpp InlineAdvisor.cpp InlineCost.cpp @@ -101,7 +101,7 @@ SRCS( LoopNestAnalysis.cpp LoopPass.cpp LoopUnrollAnalyzer.cpp - MLInlineAdvisor.cpp + MLInlineAdvisor.cpp MemDepPrinter.cpp MemDerefPrinter.cpp MemoryBuiltins.cpp @@ -124,8 +124,8 @@ SRCS( RegionInfo.cpp RegionPass.cpp RegionPrinter.cpp - ReleaseModeModelRunner.cpp - ReplayInlineAdvisor.cpp + ReleaseModeModelRunner.cpp + ReplayInlineAdvisor.cpp ScalarEvolution.cpp ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionDivision.cpp @@ -135,7 +135,7 @@ SRCS( StackSafetyAnalysis.cpp SyncDependenceAnalysis.cpp SyntheticCountsUtils.cpp - TFUtils.cpp + TFUtils.cpp TargetLibraryInfo.cpp TargetTransformInfo.cpp Trace.cpp |